In [1]:
import pandas as pd
import numpy as np


file_to_load = "purchase_data.csv"
purchase_data_df = pd.read_csv(file_to_load)
purchase_data_df.head(10)

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44
5,5,Yalae81,22,Male,81,Dreamkiss,3.61
6,6,Itheria73,36,Male,169,"Interrogator, Blood Blade of the Queen",2.18
7,7,Iskjaskst81,20,Male,162,Abyssal Shard,2.67
8,8,Undjask33,22,Male,21,Souleater,1.1
9,9,Chanosian48,35,Other / Non-Disclosed,136,Ghastly Adamantite Protector,3.58


In [2]:
purchase_data_df.shape

(780, 7)

In [3]:
# Calculate the Number of Unique Players
player_demographics = purchase_data_df.loc[:, ["Gender", "SN", "Age"]]
player_demographics = player_demographics.drop_duplicates()
num_players = player_demographics.count()[0]

# Display the total number of players
pd.DataFrame({"Total Players": [num_players]})

Unnamed: 0,Total Players
0,576


In [4]:
unique_items = len(purchase_data_df["Item ID"].unique())
unique_items

183

In [5]:
average_price = purchase_data_df["Price"].mean()
average_price

3.050987179487176

In [6]:
total_purchases = purchase_data_df["Price"].count()
total_purchases

780

In [7]:
revenue = purchase_data_df["Price"].sum()
revenue

2379.77

In [8]:
summary_table = pd.DataFrame({"Number of Unique Items": unique_items, "Average Price": [average_price], "Number of Purchases": [total_purchases], "Total Revenue": [revenue]})
summary_table

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,183,3.050987,780,2379.77


In [9]:
gender_total = player_demographics["Gender"].value_counts()
gender_total

gender_percentage = gender_total/num_players*100
gender_percentage

Male                     84.027778
Female                   14.062500
Other / Non-Disclosed     1.909722
Name: Gender, dtype: float64

In [10]:
gender_demographics = pd.DataFrame({"Total Count": gender_total,"Percentage of Players": gender_percentage})
gender_demographics

Unnamed: 0,Total Count,Percentage of Players
Male,484,84.027778
Female,81,14.0625
Other / Non-Disclosed,11,1.909722


In [11]:
purchase_count = purchase_data_df["Gender"].value_counts()
purchase_count

Male                     652
Female                   113
Other / Non-Disclosed     15
Name: Gender, dtype: int64

In [12]:
average_purchase_price = purchase_data_df.groupby('Gender')['Price'].mean()
average_purchase_price

Gender
Female                   3.203009
Male                     3.017853
Other / Non-Disclosed    3.346000
Name: Price, dtype: float64

In [13]:
total_purchase_value = purchase_data_df.groupby('Gender')['Price'].sum()
total_purchase_value

Gender
Female                    361.94
Male                     1967.64
Other / Non-Disclosed      50.19
Name: Price, dtype: float64

In [15]:
avg_purchase = total_purchase_value/gender_demographics["Total Count"]
avg_purchase


Female                   4.468395
Male                     4.065372
Other / Non-Disclosed    4.562727
dtype: float64

In [17]:
gender_data = pd.DataFrame({"Purchase Count": purchase_count, 
                            "Average Purchase Price": average_purchase_price, 
                            "Total Purchase Value": total_purchase_value, 
                            "Average Total Purchase Per Person": avg_purchase})
gender_data

Unnamed: 0,Purchase Count,Average Purchase Price,Total Purchase Value,Average Total Purchase Per Person
Female,113,3.203009,361.94,4.468395
Male,652,3.017853,1967.64,4.065372
Other / Non-Disclosed,15,3.346,50.19,4.562727


In [21]:
bins = [0, 9.90, 14.90, 19.90, 24.9, 29.9, 34.90, 39.90, 9999999]

group_labels = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", ">40"]


player_demographics["Age Group"] = pd.cut(player_demographics["Age"], bins, labels=group_labels)
player_demographics.head()

Unnamed: 0,Gender,SN,Age,Age Group
0,Male,Lisim78,20,20-24
1,Male,Lisovynya38,40,>40
2,Male,Ithergue48,24,20-24
3,Male,Chamassasya86,24,20-24
4,Male,Iskosia90,23,20-24


In [22]:
age_totals = player_demographics["Age Group"].value_counts()
age_totals



20-24    258
15-19    107
25-29     77
30-34     52
35-39     31
10-14     22
<10       17
>40       12
Name: Age Group, dtype: int64

In [24]:
age_percents = age_totals / num_players * 100
age_percents

20-24    44.791667
15-19    18.576389
25-29    13.368056
30-34     9.027778
35-39     5.381944
10-14     3.819444
<10       2.951389
>40       2.083333
Name: Age Group, dtype: float64

In [25]:
age_demographics = pd.DataFrame({"Total Count": age_totals, "Percent of Players": age_percents})
age_demographics

Unnamed: 0,Total Count,Percent of Players
20-24,258,44.791667
15-19,107,18.576389
25-29,77,13.368056
30-34,52,9.027778
35-39,31,5.381944
10-14,22,3.819444
<10,17,2.951389
>40,12,2.083333


In [26]:
age_demographics = age_demographics.sort_index()
age_demographics

Unnamed: 0,Total Count,Percent of Players
<10,17,2.951389
10-14,22,3.819444
15-19,107,18.576389
20-24,258,44.791667
25-29,77,13.368056
30-34,52,9.027778
35-39,31,5.381944
>40,12,2.083333
