In [55]:
import pandas as pd

In [56]:
#Load File
file_to_load = "../Resources/purchase_data.csv"

In [57]:
#Read File
purchase_data = pd.read_csv(file_to_load)
purchase_data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [58]:
#Unique Player Count
unique_sn_count = purchase_data['SN'].nunique()
unique_sn_count

576

In [59]:
#Unique Game Count
unique_item_count = purchase_data['Item Name'].nunique()
#unique_item_count

#Average Purchase Price
average_price = purchase_data['Price'].mean()
#average_price

#Total Purchases
total_purchases = purchase_data['Purchase ID'].count()
#total_purchases

#Total Revenue
total_revenue = purchase_data['Price'].sum()
#total_revenue

In [60]:
#Purchasing Analysis (Total)

pd.DataFrame([{
    
    'Number of Unique Items': unique_item_count,
    'Average Purchase Price': '${:,.2f}'.format(average_price),
    'Total Number of Purchases': total_purchases,
    'Total Revenue': '${:,.2f}'.format(total_revenue)
    
}])

Unnamed: 0,Number of Unique Items,Average Purchase Price,Total Number of Purchases,Total Revenue
0,179,$3.05,780,"$2,379.77"


In [61]:
#Gender Demographics
player_demographics = purchase_data.loc[:, ["Gender", "SN", "Age"]]
player_demographics = player_demographics.drop_duplicates()
gender_demographics_totals = player_demographics["Gender"].value_counts()

gender_percentage = gender_demographics_totals / unique_sn_count

In [62]:
#Gender Demographics DataFrame
gender_summary = pd.DataFrame({'Total Count': gender_demographics_totals,
                               '% of Players': gender_percentage
                             })
gender_summary

Unnamed: 0,Total Count,% of Players
Male,484,0.840278
Female,81,0.140625
Other / Non-Disclosed,11,0.019097


In [65]:
#Unremove removal of Duplicates
player_demographics = purchase_data.loc[:, ["Gender", "SN", "Age"]]
gender_demographics_totals_full = player_demographics["Gender"].value_counts()
#gender_demographics_totals_full

#Purchase Value Total
purch_value_total = purchase_data.groupby(["Gender"]).sum()["Price"]
#purch_value_total

#Avrage Purchase Price
avg_purch_price = purchase_data.groupby(["Gender"]).mean()["Price"]
avg_purch_price



Gender
Female                   3.203009
Male                     3.017853
Other / Non-Disclosed    3.346000
Name: Price, dtype: float64

In [66]:
#Gender Demographics DataFrame
value_summary = pd.DataFrame({'Total Count': gender_demographics_totals_full,
                              'Average Purchase Price': avg_purch_price,
                              'Total Purchase Value': purch_value_total,
                            })
value_summary

Unnamed: 0,Total Count,Average Purchase Price,Total Purchase Value
Female,113,3.203009,361.94
Male,652,3.017853,1967.64
Other / Non-Disclosed,15,3.346,50.19


In [71]:
#Create Bins
bins = [0, 9, 14, 19, 24, 29, 34, 39, 99]

age_labels = ['<10', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39', '40-99']

In [78]:
#Slice the Data and put it into Bins
view_group = pd.cut(purchase_data['Age'], bins, labels=age_labels)
view_group

0      20-24
1      40-99
2      20-24
3      20-24
4      20-24
       ...  
775    20-24
776    20-24
777    20-24
778      <10
779    20-24
Name: Age, Length: 780, dtype: category
Categories (8, object): [<10 < 10-14 < 15-19 < 20-24 < 25-29 < 30-34 < 35-39 < 40-99]

In [79]:
# Place the data series into a new column inside of the DataFrame
purchase_data['View Group'] = view_group
purchase_data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price,View Group
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53,20-24
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56,40-99
2,2,Ithergue48,24,Male,92,Final Critic,4.88,20-24
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27,20-24
4,4,Iskosia90,23,Male,131,Fury,1.44,20-24


In [80]:
#Create groupby object based on Age
age_group = purchase_data.groupby(['View Group'])

#Count Age Groups
age_group.count()['SN']

View Group
<10       23
10-14     28
15-19    136
20-24    365
25-29    101
30-34     73
35-39     41
40-99     13
Name: SN, dtype: int64

In [88]:
#Age Summary Total
age_summary_totals = purchase_data.groupby(["View Group"]).count()["Age"]

percent_of_players = age_summary_totals / unique_sn_count

In [89]:
age_summary = pd.DataFrame({'Total Count': age_summary_totals,
                            'Percentage of Players': percent_of_players
                            })
age_summary

Unnamed: 0_level_0,Total Count,Percentage of Players
View Group,Unnamed: 1_level_1,Unnamed: 2_level_1
<10,23,0.039931
10-14,28,0.048611
15-19,136,0.236111
20-24,365,0.633681
25-29,101,0.175347
30-34,73,0.126736
35-39,41,0.071181
40-99,13,0.022569


In [102]:
avg_age_purch_price = purchase_data.groupby(["View Group"]).mean()["Price"]

age_purch_value = purchase_data.groupby(["View Group"]).sum()["Price"]

avg_tot_per_person = age_purch_value / age_summary_totals

In [103]:
age_analysis = pd.DataFrame({'Purchase Count': age_summary_totals,
                             'Average Purchase Price': avg_age_purch_price,
                             'Total Purchase Value': age_purch_value,
                             'Avg Total Purch Per Person': avg_tot_per_person
                            })
age_analysis

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purch Per Person
View Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
<10,23,3.353478,77.13,3.353478
10-14,28,2.956429,82.78,2.956429
15-19,136,3.035956,412.89,3.035956
20-24,365,3.052219,1114.06,3.052219
25-29,101,2.90099,293.0,2.90099
30-34,73,2.931507,214.0,2.931507
35-39,41,3.601707,147.67,3.601707
40-99,13,2.941538,38.24,2.941538
