In [1]:
import pandas as pd
import json


In [2]:
filename = 'purchase_data.json'
read_json = pd.read_json(filename)
raw_data = pd.DataFrame(read_json)
raw_data.head()


Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46
2,34,Male,174,Primitive Blade,2.46,Assastnya25
3,21,Male,92,Final Critic,1.36,Pheusrical25
4,23,Male,63,Stormfury Mace,1.27,Aela59


In [3]:
total_players = raw_data['SN'].nunique()
total_players

573

In [4]:
unique_items = raw_data['Item Name'].nunique()
unique_items

179

In [5]:
total_purchases = raw_data.count().max()
total_purchases

780

In [6]:
#raw_data['Gender'].unique()       #check genders for consistency

male_data = raw_data.loc[raw_data['Gender'] == 'Male']
#male_data['Gender'].unique()      #check for successful filter, should be 'Male'

male_count = male_data['SN'].nunique()
male_percentage = male_count / total_players

male_purchases= male_data.count().max()
male_total_purchase_value = male_data['Price'].sum()
average_male_price = male_total_purchase_value / male_purchases

male_dict = {
    'Gender': 'Male',
    'Players': male_count, 
    'Percentage': male_percentage, 
    'Number_Purchases': male_purchases,
    'Average_Price': average_male_price,
    'Total_Value': male_total_purchase_value,
    'Normalized_Value': male_total_purchase_value / male_count
             }

print(f' # men {male_count} \n % men {male_percentage} \n male purchases {male_purchases} \n male purchase value {male_total_purchase_value} \naverage male purchase value {average_male_price}')

 # men 465 
 % men 0.8115183246073299 
 male purchases 633 
 male purchase value 1867.6799999999985 
average male purchase value 2.9505213270142154


In [7]:
female_data = raw_data.loc[raw_data['Gender'] == 'Female']
#female_data['Gender'].unique()    #check for successful filter, should be 'Female'

female_count = female_data['SN'].nunique()
female_percentage = female_count / total_players

female_purchases= female_data.count().max()
female_total_purchase_value = female_data['Price'].sum()
average_female_price = female_total_purchase_value / female_purchases

female_dict = {
    'Gender': 'Female',
    'Players': female_count, 
    'Percentage': female_percentage, 
    'Number_Purchases': female_purchases,
    'Average_Price': average_female_price,
    'Total_Value': female_total_purchase_value,
    'Normalized_Value': female_total_purchase_value / female_count
             }

print(f' # female {female_count} \n % female {female_percentage} \n female purchases {female_purchases} \n female purchase value {female_total_purchase_value} \n average female purchase value {average_female_price}')

 # female 100 
 % female 0.17452006980802792 
 female purchases 136 
 female purchase value 382.90999999999985 
 average female purchase value 2.815514705882352


In [8]:
other_data = raw_data.loc[(raw_data['Gender'] != 'Male') & (raw_data['Gender'] != 'Female')]
#other_data['Gender'].unique()     #check for successful filter, should not be 'Male' or 'Female'

other_count = other_data['SN'].nunique()
other_percentage = other_count / total_players

other_purchases = other_data.count().max()
other_total_purchase_value = other_data['Price'].sum()
average_other_price = other_total_purchase_value / other_purchases

other_dict = {
    'Gender': 'Other',
    'Players': other_count, 
    'Percentage': other_percentage, 
    'Number_Purchases': other_purchases,
    'Average_Price': average_other_price,
    'Total_Value': other_total_purchase_value,
    'Normalized_Value': other_total_purchase_value / other_count
             }

print(f' # other {other_count} \n % other {other_percentage} \n other purchases {other_purchases} \n other purchase value {other_total_purchase_value} \n average other price {average_other_price}')

 # other 8 
 % other 0.013961605584642234 
 other purchases 11 
 other purchase value 35.739999999999995 
 average other price 3.2490909090909086


In [9]:
columns = [
    male_dict,
    female_dict,
    other_dict
    ]
gender_df = pd.DataFrame(columns)
gender_df.set_index('Gender')

Unnamed: 0_level_0,Average_Price,Normalized_Value,Number_Purchases,Percentage,Players,Total_Value
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Male,2.950521,4.016516,633,0.811518,465,1867.68
Female,2.815515,3.8291,136,0.17452,100,382.91
Other,3.249091,4.4675,11,0.013962,8,35.74


In [10]:
print(f"{raw_data['Age'].min()} - {raw_data['Age'].max()} years")

7 - 45 years


In [11]:
bins = [6,10,14,18,22,26,30,34,38,42,46]
group_labels = ['06-10', '10-14', '14-18', '18-22','22-26','26-30','30-34','34-38','38-42','42-46']
age_binned = pd.cut(raw_data['Age'], bins,labels = group_labels)

In [12]:
raw_data['Age Group']= age_binned
raw_data.head()

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN,Age Group
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34,34-38
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46,18-22
2,34,Male,174,Primitive Blade,2.46,Assastnya25,30-34
3,21,Male,92,Final Critic,1.36,Pheusrical25,18-22
4,23,Male,63,Stormfury Mace,1.27,Aela59,22-26


In [13]:
grouped = raw_data.groupby('Age Group')

columns = {
    'Proportion': grouped['SN'].nunique() / total_players,
    'Avg Price': grouped['Price'].mean(),
    'Purchase Count': grouped['SN'].count(),
    'Total Value': grouped['Price'].sum(),
    'Relative Purchases': grouped['Price'].sum() / (grouped['SN'].nunique())
        }
grouped_stats = pd.DataFrame(columns)
grouped_stats = grouped_stats.round(2)
grouped_stats



Unnamed: 0_level_0,Avg Price,Proportion,Purchase Count,Relative Purchases,Total Value
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
06-10,3.02,0.04,32,4.39,96.62
10-14,2.7,0.03,31,4.19,83.79
14-18,2.88,0.15,111,3.8,319.32
18-22,2.93,0.31,231,3.8,676.2
22-26,2.94,0.27,207,3.97,608.02
26-30,2.98,0.08,63,4.27,187.99
30-34,3.07,0.06,46,4.15,141.24
34-38,2.81,0.04,37,4.16,104.06
38-42,3.13,0.02,20,5.69,62.56
42-46,3.26,0.0,2,3.26,6.53


In [36]:
top_spenders_group = raw_data.groupby('SN')

columns = {
    #'SN': top_spenders_group['SN'],
    'Purchase Count': top_spenders_group['Price'].count(),
    'Average Purchase Price': (top_spenders_group['Price'].sum() / top_spenders_group['Price'].count()).round(2),
    'Total Purchase Value': top_spenders_group['Price'].sum()
}
top_spenders_df = pd.DataFrame(columns)
top_spenders_df = top_spenders_df.sort_values(by=['Total Purchase Value'],ascending = False)
top_spenders_df.head(5)

Unnamed: 0_level_0,Average Purchase Price,Purchase Count,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Undirrala66,3.41,5,17.06
Saedue76,3.39,4,13.56
Mindimnya67,3.18,4,12.74
Haellysu29,4.24,3,12.73
Eoda93,3.86,3,11.58


In [62]:
items_group = raw_data.groupby(['Item ID','Item Name','Price'])

#print(items_group['Price'].sum())

columns = {
    'Purchase Count': items_group['Price'].count(),
    'Total Purchase Value': items_group['Price'].sum()
}
#columns

top_items_df = pd.DataFrame(columns)
top_items_df = top_items_df.sort_values('Purchase Count', ascending = False)
top_items_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Purchase Count,Total Purchase Value
Item ID,Item Name,Price,Unnamed: 3_level_1,Unnamed: 4_level_1
39,"Betrayal, Whisper of Grieving Widows",2.35,11,25.85
84,Arcane Gem,2.23,11,24.53
31,Trickster,2.07,9,18.63
175,Woeful Adamantite Claymore,1.24,9,11.16
13,Serenity,1.49,9,13.41


In [63]:
top_items_df = top_items_df.sort_values('Total Purchase Value', ascending = False)
top_items_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Purchase Count,Total Purchase Value
Item ID,Item Name,Price,Unnamed: 3_level_1,Unnamed: 4_level_1
34,Retribution Axe,4.14,9,37.26
115,Spectral Diamond Doomblade,4.25,7,29.75
32,Orenmir,4.95,6,29.7
103,Singed Scalpel,4.87,6,29.22
107,"Splitter, Foe Of Subtlety",3.61,8,28.88
