In [1]:
import pandas as pd
import numpy as np
import json

In [2]:
data_file = "purchase_data.json"
gamedata = pd.read_json(data_file)

gamedata.head()

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46
2,34,Male,174,Primitive Blade,2.46,Assastnya25
3,21,Male,92,Final Critic,1.36,Pheusrical25
4,23,Male,63,Stormfury Mace,1.27,Aela59


In [3]:
gamedata.shape

(780, 6)

In [4]:
gamedata.dtypes

Age            int64
Gender        object
Item ID        int64
Item Name     object
Price        float64
SN            object
dtype: object

In [5]:
players = gamedata["SN"].nunique()
player_count = pd.DataFrame({"Total Number of Players":[players]})

player_count

Unnamed: 0,Total Number of Players
0,573


In [6]:
products = gamedata["Item ID"].nunique()
avgprice = gamedata["Price"].mean()
purchases = gamedata["Item Name"].nunique()
totrevenue = gamedata["Price"].sum()


purchase_analysis = pd.DataFrame({"Number of Unique Items":[products], "Avg Purchase Price":[avgprice], 
                                  "Number of Purchases":[purchases], "Total Revenue":[totrevenue]})
purchase_analysis = purchase_analysis[["Number of Unique Items","Avg Purchase Price", "Number of Purchases", "Total Revenue"]]
purchase_analysis = purchase_analysis.round(2)
purchase_analysis

Unnamed: 0,Number of Unique Items,Avg Purchase Price,Number of Purchases,Total Revenue
0,183,2.93,179,2286.33


In [7]:
gender = gamedata["Gender"].value_counts()
gender


Male                     633
Female                   136
Other / Non-Disclosed     11
Name: Gender, dtype: int64

In [8]:
temp= gamedata.groupby(["SN","Gender"])
a= temp.sum()
a=a.reset_index()
a["Gender"].value_counts()

Male                     465
Female                   100
Other / Non-Disclosed      8
Name: Gender, dtype: int64

In [9]:
databy = gamedata.groupby(["SN","Gender"])
genderdata = databy.sum()
genderdata = genderdata.reset_index()

male = genderdata["Gender"].value_counts()['Male']
female = genderdata["Gender"].value_counts()['Female']
other = players - male - female

male_percent = (male/players)*100
female_percent = (female/players)*100
other_percent = (other/players)*100

gender_demo = pd.DataFrame({'Gender':['Male','Female','Other / Non-Disclosed'],
                            'Percentage of Players':[male_percent,female_percent,other_percent],
                            'Total Count':[male,female,other]})
gender_demo

Unnamed: 0,Gender,Percentage of Players,Total Count
0,Male,81.151832,465
1,Female,17.452007,100
2,Other / Non-Disclosed,1.396161,8


In [10]:
databy_price = pd.DataFrame(gamedata.groupby("Gender")["Price"].sum())
male_totprice = databy_price.loc["Male", "Price"]
female_totprice = databy_price.loc["Female", "Price"]
other_totprice = databy_price.loc["Other / Non-Disclosed", "Price"]

dnorm = pd.DataFrame(gamedata.groupby("Gender")["Price"].mean())
male_normprice = dnorm.loc["Male", "Price"]
female_normprice = dnorm.loc["Female", "Price"]
other_normprice = dnorm.loc["Other / Non-Disclosed", "Price"]

male_avgprice = male_totprice/male
female_avgprice = female_totprice/female
other_avgprice = other_totprice/other

gender_purchase = pd.DataFrame({'Gender':['Male','Female','Other / Non-Disclosed'],'Purchase Count':[male,female,other],
                                'Average Purchase Price':[male_avgprice,female_avgprice,other_avgprice],
                               'Total Purchase Value':[male_totprice,female_totprice,other_totprice], 
                                'Normalized Price':[male_normprice,female_normprice,other_normprice]})
gender_purchase = gender_purchase[["Gender","Purchase Count", "Average Purchase Price", "Total Purchase Value", 
                                   "Normalized Price"]]
gender_purchase

Unnamed: 0,Gender,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Price
0,Male,465,4.016516,1867.68,2.950521
1,Female,100,3.8291,382.91,2.815515
2,Other / Non-Disclosed,8,4.4675,35.74,3.249091


In [11]:
agedata = gamedata.groupby(["SN","Age"])
d = agedata.sum()
d = d.reset_index()

age10 = d.loc[(d["Age"] < 10), :].count()['Age']
age10_14 = d.loc[(d["Age"] >= 10) & (d["Age"] <= 14), :].count()['Age']
age15_19 = d.loc[(d["Age"] >= 15) & (d["Age"] <= 19), :].count()['Age']
age20_24 = d.loc[(d["Age"] >= 20) & (d["Age"] <= 24), :].count()['Age']
age25_29 = d.loc[(d["Age"] >= 25) & (d["Age"] <= 29), :].count()['Age']
age30_34 = d.loc[(d["Age"] >= 30) & (d["Age"] <= 34), :].count()['Age']
age35_39 = d.loc[(d["Age"] >= 35) & (d["Age"] <= 39), :].count()['Age']
age40 = d.loc[(d["Age"] >= 40), :].count()['Age']

agecount = pd.DataFrame({"Age Range":["<10","10-14","15-19","20-24","25-29","30-34","35-39","40+"],
                         "Percentage of Players":[(age10/players)*100,(age10_14/players)*100,(age15_19/players)*100,
                                                 (age20_24/players)*100,(age25_29/players)*100,(age30_34/players)*100,
                                                 (age35_39/players)*100,(age40/players)*100],
                         "Total Count":[age10,age10_14,age15_19,age20_24,age25_29,age30_34,age35_39,age40]})
agecount

Unnamed: 0,Age Range,Percentage of Players,Total Count
0,<10,3.315881,19
1,10-14,4.013962,23
2,15-19,17.452007,100
3,20-24,45.200698,259
4,25-29,15.183246,87
5,30-34,8.202443,47
6,35-39,4.712042,27
7,40+,1.919721,11


In [12]:
age10totp = d.loc[(d["Age"] < 10), :].sum()['Price']
age10_14totp = d.loc[(d["Age"] >= 10) & (d["Age"] <= 14), :].sum()['Price']
age15_19totp = d.loc[(d["Age"] >= 15) & (d["Age"] <= 19), :].sum()['Price']
age20_24totp = d.loc[(d["Age"] >= 20) & (d["Age"] <= 24), :].sum()['Price']
age25_29totp = d.loc[(d["Age"] >= 25) & (d["Age"] <= 29), :].sum()['Price']
age30_34totp = d.loc[(d["Age"] >= 30) & (d["Age"] <= 34), :].sum()['Price']
age35_39totp = d.loc[(d["Age"] >= 35) & (d["Age"] <= 39), :].sum()['Price']
age40totp = d.loc[(d["Age"] >= 40), :].sum()['Price']

age10norm = gamedata.loc[(gamedata["Age"] < 10), :].mean()['Price']
age10_14norm = gamedata.loc[(gamedata["Age"] >= 10) & (gamedata["Age"] <= 14), :].mean()['Price']
age15_19norm = gamedata.loc[(gamedata["Age"] >= 15) & (gamedata["Age"] <= 19), :].mean()['Price']
age20_24norm = gamedata.loc[(gamedata["Age"] >= 20) & (gamedata["Age"] <= 24), :].mean()['Price']
age25_29norm = gamedata.loc[(gamedata["Age"] >= 25) & (gamedata["Age"] <= 29), :].mean()['Price']
age30_34norm = gamedata.loc[(gamedata["Age"] >= 30) & (gamedata["Age"] <= 34), :].mean()['Price']
age35_39norm = gamedata.loc[(d["Age"] >= 35) & (gamedata["Age"] <= 39), :].mean()['Price']
age40norm = gamedata.loc[(gamedata["Age"] >= 40), :].mean()['Price']

agepurchase = pd.DataFrame({"Age Range":["<10","10-14","15-19","20-24","25-29","30-34","35-39","40+"],
                            "Total Count":[age10,age10_14,age15_19,age20_24,age25_29,age30_34,age35_39,age40],
                         "Average Purchase Price":[(age10totp/age10),(age10_14totp/age10_14),(age15_19totp/age15_19),
                                                   (age20_24totp/age20_24),(age25_29totp/age25_29),(age30_34totp/age30_34),
                                                  (age35_39totp/age35_39),(age40totp/age40)],
                            "Total Purchase Value":[age10totp,age10_14totp,age15_19totp,age20_24totp,age25_29totp,
                                                    age30_34totp,age35_39totp,age40totp],
                            "Normalized Price":[age10norm,age10_14norm,age15_19norm,age20_24norm,age25_29norm,age30_34norm,
                                                age35_39norm,age40norm]})
agepurchase = agepurchase[["Age Range","Total Count", "Average Purchase Price", "Total Purchase Value", 
                                   "Normalized Price"]]
agepurchase

Unnamed: 0,Age Range,Total Count,Average Purchase Price,Total Purchase Value,Normalized Price
0,<10,19,4.392632,83.46,2.980714
1,10-14,23,4.215217,96.95,2.77
2,15-19,100,3.8642,386.42,2.905414
3,20-24,259,3.779035,978.77,2.913006
4,25-29,87,4.256667,370.33,2.96264
5,30-34,47,4.196809,197.25,3.082031
6,35-39,27,4.422222,119.4,2.566486
7,40+,11,4.886364,53.75,3.161765


In [13]:
sn_sort = d.sort_values("Price", ascending=False)
#sn_sort = sn_sort.set_index("SN")
topval = sn_sort.loc[:, ["SN","Price"]]
topval = topval.rename(columns={"Price": "Total Purchase Value"})
topval.head()

Unnamed: 0,SN,Total Purchase Value
538,Undirrala66,17.06
428,Saedue76,13.56
354,Mindimnya67,12.74
181,Haellysu29,12.73
120,Eoda93,11.58


In [14]:
#agedata = gamedata.groupby(["SN","Age"])
tcount = agedata.count()
tcount = tcount.reset_index()
tcount = tcount.sort_values("Price",ascending=False)
tcount = tcount.drop(columns=['Age', 'Gender','Item Name','Price'])
tcount = tcount.rename(columns={"Item ID": "Purchase Count"})
tcount.head()

Unnamed: 0,SN,Purchase Count
538,Undirrala66,5
354,Mindimnya67,4
385,Qarwen67,4
428,Saedue76,4
467,Sondastan54,4


In [15]:
topmerge = pd.merge(topval,tcount,how='inner')
topmerge.head()

Unnamed: 0,SN,Total Purchase Value,Purchase Count
0,Undirrala66,17.06,5
1,Saedue76,13.56,4
2,Mindimnya67,12.74,4
3,Haellysu29,12.73,3
4,Eoda93,11.58,3


In [16]:
newtop_merge = pd.DataFrame(topmerge["Total Purchase Value"]/topmerge["Purchase Count"],columns=['Average Purchase Price'])
ntopmerge = pd.merge(topmerge,newtop_merge, how='outer',left_index=True,right_index=True)
ntopmerge = ntopmerge[["SN","Purchase Count","Average Purchase Price","Total Purchase Value"]]
ntopmerge.head()

Unnamed: 0,SN,Purchase Count,Average Purchase Price,Total Purchase Value
0,Undirrala66,5,3.412,17.06
1,Saedue76,4,3.39,13.56
2,Mindimnya67,4,3.185,12.74
3,Haellysu29,3,4.243333,12.73
4,Eoda93,3,3.86,11.58


In [17]:
itemdata = gamedata.groupby(["Item ID","Price"])
icount = itemdata.count()
icount = icount.reset_index()
icount = icount.sort_values("Item Name",ascending=False)
icount = icount.drop(columns=['Age', 'Gender','Item Name'])
icount = icount.rename(columns={"SN": "Purchase Count"})

icount.head()

Unnamed: 0,Item ID,Price,Purchase Count
39,39,2.35,11
84,84,2.23,11
31,31,2.07,9
174,175,1.24,9
13,13,1.49,9


In [18]:
iname = gamedata.groupby(["Item ID","Item Name"])
popname = iname.sum()
popname = popname.reset_index()
popname = popname.drop(columns=['Age','Price'])
popitem = pd.merge(icount,popname,how='inner')

popi_totvalue = pd.DataFrame(popitem["Price"]*popitem["Purchase Count"],columns=['Total Purchase Value'])
popimerge = pd.merge(popitem,popi_totvalue, how='outer',left_index=True,right_index=True)
popimerge = popimerge[["Item ID","Item Name","Purchase Count","Price","Total Purchase Value"]]
popimerge.head(6)

Unnamed: 0,Item ID,Item Name,Purchase Count,Price,Total Purchase Value
0,39,"Betrayal, Whisper of Grieving Widows",11,2.35,25.85
1,84,Arcane Gem,11,2.23,24.53
2,31,Trickster,9,2.07,18.63
3,175,Woeful Adamantite Claymore,9,1.24,11.16
4,13,Serenity,9,1.49,13.41
5,34,Retribution Axe,9,4.14,37.26


In [19]:
profit_sort = popimerge.sort_values("Total Purchase Value",ascending=False)
profit_sort.head()

Unnamed: 0,Item ID,Item Name,Purchase Count,Price,Total Purchase Value
5,34,Retribution Axe,9,4.14,37.26
13,115,Spectral Diamond Doomblade,7,4.25,29.75
39,32,Orenmir,6,4.95,29.7
24,103,Singed Scalpel,6,4.87,29.22
9,107,"Splitter, Foe Of Subtlety",8,3.61,28.88
