In [1]:
import pandas as pd
import os
readfile = os.path.join('purchase_data2.json')
pur_data = pd.read_json(readfile)

In [2]:
player_count = len(pur_data['SN'].unique())
players_df = pd.DataFrame([{'Total Players': player_count}])
players_df.set_index('Total Players', inplace = True)
players_df

74


In [3]:
no_dup_items = pur_data.drop_duplicates(['Item ID'], keep = 'last')
total_unique = len(no_dup_items)

total_pur = pur_data['Price'].count()

total_rev = round(pur_data['Price'].sum(),2)

avg_price = round(total_rev/total_pur, 2)


pur_analysis = pd.DataFrame([{
    
    "Number of Unique Items": total_unique,
    'Average Purchase Price': avg_price,
    'Total Purchases': total_pur,
    'Total Revenue': total_rev
}])


pur_analysis.style.format({'Average Purchase Price': '${:.2f}', 'Total Revenue': '${:,.2f}'})

Unnamed: 0,Average Purchase Price,Number of Unique Items,Total Purchases,Total Revenue
0,$2.92,64,78,$228.10


In [4]:

no_dup_players = pur_data.drop_duplicates(['SN'], keep ='last')


gender_counts = no_dup_players['Gender'].value_counts().reset_index()

gender_counts['% of Players'] = gender_counts['Gender']/player_count * 100

gender_counts.rename(columns = {'index': 'Gender', 'Gender': '# of Players'}, inplace = True)

gender_counts.set_index(['Gender'], inplace = True)

gender_counts.style.format({"% of Players": "{:.1f}%"})

Unnamed: 0_level_0,# of Players,% of Players
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Male,60,81.1%
Female,13,17.6%
Other / Non-Disclosed,1,1.4%


In [5]:
pur_count_by_gen = pd.DataFrame(pur_data.groupby('Gender')['Gender'].count())

total_pur_by_gen = pd.DataFrame(pur_data.groupby('Gender')['Price'].sum())

pur_analysis_gen = pd.merge(pur_count_by_gen, total_pur_by_gen, left_index = True, right_index = True)

pur_analysis_gen.rename(columns = {'Gender': '# of Purchases', 'Price':'Total Purchase Value'}, inplace=True)

pur_analysis_gen['Average Purchase Price'] = pur_analysis_gen['Total Purchase Value']/pur_analysis_gen['# of Purchases']

pur_analysis_gen = pur_analysis_gen.merge(gender_counts, left_index = True, right_index = True)

pur_analysis_gen['Normalized Totals'] = pur_analysis_gen['Total Purchase Value']/pur_analysis_gen['# of Players']
pur_analysis_gen

del pur_analysis_gen['% of Players']
del pur_analysis_gen['# of Players']

pur_analysis_gen.style.format({'Total Purchase Value': '${:.2f}', 'Average Purchase Price': '${:.2f}', 'Normalized Totals': '${:.2f}'})

Unnamed: 0_level_0,# of Purchases,Total Purchase Value,Average Purchase Price,Normalized Totals
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,13,$41.38,$3.18,$3.18
Male,64,$184.60,$2.88,$3.08
Other / Non-Disclosed,1,$2.12,$2.12,$2.12


In [6]:
# By AGE

pur_data.loc[(pur_data['Age'] < 10), 'age_bin'] = "< 10"
pur_data.loc[(pur_data['Age'] >= 10) & (pur_data['Age'] <= 14), 'age_bin'] = "10 - 14"
pur_data.loc[(pur_data['Age'] >= 15) & (pur_data['Age'] <= 19), 'age_bin'] = "15 - 19"
pur_data.loc[(pur_data['Age'] >= 20) & (pur_data['Age'] <= 24), 'age_bin'] = "20 - 24"
pur_data.loc[(pur_data['Age'] >= 25) & (pur_data['Age'] <= 29), 'age_bin'] = "25 - 29"
pur_data.loc[(pur_data['Age'] >= 30) & (pur_data['Age'] <= 34), 'age_bin'] = "30 - 34"
pur_data.loc[(pur_data['Age'] >= 35) & (pur_data['Age'] <= 39), 'age_bin'] = "35 - 39"
pur_data.loc[(pur_data['Age'] >= 40), 'age_bin'] = "> 40"



pur_count_age = pd.DataFrame(pur_data.groupby('age_bin')['SN'].count())

avg_price_age = pd.DataFrame(pur_data.groupby('age_bin')['Price'].mean())

tot_pur_age = pd.DataFrame(pur_data.groupby('age_bin')['Price'].sum())

no_dup_age = pd.DataFrame(pur_data.drop_duplicates('SN', keep = 'last').groupby('age_bin')['SN'].count())

merge_age = pd.merge(pur_count_age, avg_price_age, left_index = True, right_index = True).merge(tot_pur_age, left_index = True, right_index = True).merge(no_dup_age, left_index = True, right_index = True)

merge_age.rename(columns = {"SN_x": "# of Purchases", "Price_x": "Average Purchase Price", "Price_y": "Total Purchase Value", "SN_y": "# of Purchasers"}, inplace = True)

merge_age['Normalized Totals'] = merge_age['Total Purchase Value']/merge_age['# of Purchasers']

merge_age.index.rename("Age", inplace = True)

merge_age.style.format({'Average Purchase Price': '${:.2f}', 'Total Purchase Value': '${:.2f}', 'Normalized Totals': '${:.2f}'})

Unnamed: 0_level_0,# of Purchases,Average Purchase Price,Total Purchase Value,# of Purchasers,Normalized Totals
Age,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
10 - 14,3,$2.99,$8.96,3,$2.99
15 - 19,11,$2.76,$30.41,11,$2.76
20 - 24,36,$3.02,$108.89,34,$3.20
25 - 29,9,$2.90,$26.11,8,$3.26
30 - 34,7,$1.98,$13.89,6,$2.31
35 - 39,6,$3.56,$21.37,6,$3.56
< 10,5,$2.76,$13.82,5,$2.76
> 40,1,$4.65,$4.65,1,$4.65


In [7]:
# Identify the the top 5 spenders in the game by total purchase value, then list (in a table):


#Group by screen name to find, total purchase per person, number of purchases per person, and average price price per person
purchase_amt_by_SN = pd.DataFrame(pur_data.groupby('SN')['Price'].sum())
num_purchase_by_SN = pd.DataFrame(pur_data.groupby('SN')['Price'].count())
avg_purchase_by_SN = pd.DataFrame(pur_data.groupby('SN')['Price'].mean())
# merge the above dfs
merged_top5 = pd.merge(purchase_amt_by_SN, num_purchase_by_SN, left_index = True, right_index = True).merge(avg_purchase_by_SN, left_index=True, right_index=True)
# rename columns
merged_top5.rename(columns = {'Price_x': 'Total Purchase Value', 'Price_y':'Purchase Count', 'Price':'Average Purchase Price'}, inplace = True)
# sort from highest purchase value to lowest
merged_top5.sort_values('Total Purchase Value', ascending = False, inplace=True)
# take top 5 only
merged_top5 = merged_top5.head()
# format
merged_top5.style.format({'Total Purchase Value': '${:.2f}', 'Average Purchase Price': '${:.2f}'})

Unnamed: 0_level_0,Total Purchase Value,Purchase Count,Average Purchase Price
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Sundaky74,$7.41,2,$3.71
Aidaira26,$5.13,2,$2.56
Eusty71,$4.81,1,$4.81
Chanirra64,$4.78,1,$4.78
Alarap40,$4.71,1,$4.71


In [8]:
# Identify the 5 most popular items by purchase count, then list (in a table):



top5_items_ID = pd.DataFrame(pur_data.groupby('Item ID')['Item ID'].count())

top5_items_ID.sort_values('Item ID', ascending = False, inplace = True)

top5_items_ID = top5_items_ID.iloc[0:6][:]

top5_items_total = pd.DataFrame(pur_data.groupby('Item ID')['Price'].sum())
 
top5_items = pd.merge(top5_items_ID, top5_items_total, left_index = True, right_index = True)

no_dup_items = pur_data.drop_duplicates(['Item ID'], keep = 'last')

top5_merge_ID = pd.merge(top5_items, no_dup_items, left_index = True, right_on = 'Item ID')

top5_merge_ID = top5_merge_ID[['Item ID', 'Item Name', 'Item ID_x', 'Price_y', 'Price_x']]

top5_merge_ID.set_index(['Item ID'], inplace = True)

top5_merge_ID.rename(columns =  {'Item ID_x': 'Purchase Count', 'Price_y': 'Item Price', 'Price_x': 'Total Purchase Value'}, inplace=True)

top5_merge_ID.style.format({'Item Price': '${:.2f}', 'Total Purchase Value': '${:.2f}'})

Unnamed: 0_level_0,Item Name,Purchase Count,Item Price,Total Purchase Value
Item ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
94,Mourning Blade,3,$3.64,$10.92
90,Betrayer,2,$4.12,$8.24
111,Misery's End,2,$1.79,$3.58
64,Fusion Pummel,2,$2.42,$4.84
154,Feral Katana,2,$4.11,$8.22
126,Exiled Mithril Longsword,2,$1.08,$2.16


In [9]:
# Most Profitable Items


top5_profit = pd.DataFrame(pur_data.groupby('Item ID')['Price'].sum())
top5_profit.sort_values('Price', ascending = False, inplace = True)

top5_profit = top5_profit.iloc[0:5][:]

pur_count_profit = pd.DataFrame(pur_data.groupby('Item ID')['Item ID'].count())

top5_profit = pd.merge(top5_profit, pur_count_profit, left_index = True, right_index = True, how = 'left')
top5_merge_profit = pd.merge(top5_profit, no_dup_items, left_index = True, right_on = 'Item ID', how = 'left')
top5_merge_profit = top5_merge_profit[['Item ID', 'Item Name', 'Item ID_x', 'Price_y','Price_x']]
top5_merge_profit.set_index(['Item ID'], inplace=True)
top5_merge_profit.rename(columns = {'Item ID_x': 'Purchase Count', 'Price_y': 'Item Price', 'Price_x': 'Total Purchase Value'}, inplace = True)
top5_merge_profit.style.format({'Item Price': '${:.2f}', 'Total Purchase Value': '${:.2f}'})

Unnamed: 0_level_0,Item Name,Purchase Count,Item Price,Total Purchase Value
Item ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
94,Mourning Blade,3,$3.64,$10.92
117,"Heartstriker, Legacy of the Light",2,$4.71,$9.42
93,Apocalyptic Battlescythe,2,$4.49,$8.98
90,Betrayer,2,$4.12,$8.24
154,Feral Katana,2,$4.11,$8.22


In [10]:
highest_priced = no_dup_items.sort_values('Price', ascending = False)
highest_priced[['Item ID', 'Item Name', 'Price']].head(18)

Unnamed: 0,Item ID,Item Name,Price
48,84,Arcane Gem,4.81
62,25,Hero Cane,4.78
32,117,"Heartstriker, Legacy of the Light",4.71
6,148,"Warmonger, Gift of Suffering's End",4.65
61,31,Trickster,4.59
17,156,Soul-Forged Steel Shortsword,4.53
73,93,Apocalyptic Battlescythe,4.49
60,68,"Storm-Weaver, Slayer of Inception",4.39
14,44,Bonecarvin Battle Axe,4.36
24,70,Hope's End,4.28


In [11]:
lowest_priced = no_dup_items.sort_values('Price', ascending = True)
lowest_priced[['Item ID', 'Item Name', 'Price']].head(18)

Unnamed: 0,Item ID,Item Name,Price
58,105,Hailstorm Shadowsteel Scythe,1.02
69,126,Exiled Mithril Longsword,1.08
70,98,"Deadline, Voice Of Subtlety",1.29
64,164,Exiled Doomblade,1.31
56,79,"Alpha, Oath of Zeal",1.31
41,127,"Heartseeker, Reaver of Souls",1.34
16,174,Primitive Blade,1.36
38,9,"Thorn, Conqueror of the Corrupted",1.42
13,23,Crucifer,1.62
28,170,Shadowsteel,1.74


In [12]:
pur_analysis_gen.style.format({'Total Purchase Value': '${:.2f}', 'Average Purchase Price': '${:.2f}', 'Normalized Totals': '${:.2f}'})

Unnamed: 0_level_0,# of Purchases,Total Purchase Value,Average Purchase Price,Normalized Totals
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,13,$41.38,$3.18,$3.18
Male,64,$184.60,$2.88,$3.08
Other / Non-Disclosed,1,$2.12,$2.12,$2.12


In [13]:
percent_total_gen = pur_analysis_gen['Total Purchase Value']/total_rev
percent_total_gen

Gender
Female                   0.181412
Male                     0.809294
Other / Non-Disclosed    0.009294
Name: Total Purchase Value, dtype: float64