In [1]:
# Imports
import pandas as pd
import numpy as np

In [249]:
# The path to the json file
file = "purchase_data.json"

# Read the purchase data into pandas
file_df = pd.read_json(file)
file_df.head()

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46
2,34,Male,174,Primitive Blade,2.46,Assastnya25
3,21,Male,92,Final Critic,1.36,Pheusrical25
4,23,Male,63,Stormfury Mace,1.27,Aela59


In [250]:
# Find the total number of players
totalPlayers = file_df["SN"].count()
players_df = pd.DataFrame({"Total Players":[totalPlayers]})
players_df

Unnamed: 0,Total Players
0,780


In [251]:
# Number of unique items
unique_items_df = pd.DataFrame({'Number Of Unique Items': [len(file_df["Item Name"].unique())],
                                'Average purchase price': [file_df["Price"].mean()],
                                'Number of purchases': [file_df["Item ID"].count()],
                                'Total Revenue': [file_df["Price"].sum()] })
unique_items_df

Unnamed: 0,Average purchase price,Number Of Unique Items,Number of purchases,Total Revenue
0,2.931192,179,780,2286.33


In [274]:
#Gender Demographics
df = file_df["Gender"].value_counts()

p_cent_males = round((df.values.tolist()[0]/totalPlayers * 100), 2)
p_cent_females = round((df.values.tolist()[1]/totalPlayers * 100), 2)
p_cent_others = round((df.values.tolist()[2]/totalPlayers * 100), 2)

new_df = pd.DataFrame({'Gender': ["Male", "Female", "others/Non-Disclosed"],
                       'Percentage of players':[p_cent_males, p_cent_females, p_cent_others],
                       'Total Count':[df.values.tolist()[0],df.values.tolist()[1],df.values.tolist()[2]]},)

grouped_df = new_df.groupby(['Gender'])
grouped_df.head()

Unnamed: 0,Gender,Percentage of players,Total Count
0,Male,81.15,633
1,Female,17.44,136
2,others/Non-Disclosed,1.41,11


In [285]:
#Purchasing Analysis (Males)
male_df = file_df.loc[file_df["Gender"] == "Male"]


# #Purchase Count
# print("Purchase Count: " + str(male_df["Price"].count()))

# #Average Purchase Price
# print("Average purchase price: " + str(male_df["Price"].mean()))

# #Total Purchase Value
# print("Total Purchase Value: " + str(male_df["Price"].sum()))

# #Normalized Totals
# #????????????????

#male_df.head()

In [283]:
#Purchasing Analysis (Females)
female_df = file_df.loc[file_df["Gender"] == "Female"]

In [287]:
#Purchasing Analysis (Other/Non-Disclosed)
other_nonDis_df = file_df.loc[file_df["Gender"] == "Other/Non-Disclosed"]
purchase_anlys_df = pd.DataFrame({'Gender': ["Male", "Female", "others/Non-Disclosed"],
                                 'Purchase Count': [male_df["Price"].count(), female_df["Price"].count(), other_nonDis_df["Price"].count()],
                                 'Average Puchase Price': [male_df["Price"].mean(),female_df["Price"].mean(),other_nonDis_df["Price"].mean()],
                                 'Total Purchase Value': [male_df["Price"].sum(),female_df["Price"].sum(),other_nonDis_df["Price"].sum()]})
purchase_anlys_df.head()

#Sanity Check:
#file_df[file_df["Gender"] == "others/Non-Disclosed"]

Unnamed: 0,Average Puchase Price,Gender,Purchase Count,Total Purchase Value
0,2.950521,Male,633,1867.68
1,2.815515,Female,136,382.91
2,,others/Non-Disclosed,0,0.0


In [277]:
#Age Demographics
#------------------
# The below each broken into bins of 4 years (i.e. <10, 10-14, 15-19, etc.)

bins = [6,10,14,18,22,26,30,34,38,42,46]
group_labels = ["6-10 years","10-14 years", "14-18 years", "18-22 years", "22-26 years", "26-30 years",
               "30-34 years", "34-38 years", "38-42 years", "42-48 years"]

# Slice the data and place it into bins and place the data series into a new column inside of the DataFrame
file_df["Age Group"] = pd.cut(file_df["Age"],bins,labels=group_labels)
#file_df.head(10)


In [278]:
#Age demographics continued...
#-----------------------------
# Purchase Count
# Average Purchase Price
# Total Purchase Value
# Normalized Totals

# Create a GroupBy object based upon "Age Group"
file_group = file_df.groupby("Age Group")

#print(file_group["Item ID"].count())

# Get the average of each column within the GroupBy object
file_group["Price"].mean()
file_group["Price"].sum()
Age_goup_df = pd.DataFrame({'Purchase Count' : file_group["Item ID"].count(), 
                        'Average Purchase Price': file_group["Price"].mean(),
                         'Total Purchase Value':file_group["Price"].sum()})
Age_goup_df.head()

Unnamed: 0_level_0,Average Purchase Price,Purchase Count,Total Purchase Value
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
6-10 years,3.019375,32,96.62
10-14 years,2.702903,31,83.79
14-18 years,2.876757,111,319.32
18-22 years,2.927273,231,676.2
22-26 years,2.937295,207,608.02


In [281]:
#Just to check if my results are ok
# SN = Undirrala66 pymoli_df[pymoli_df["Item Name"] == "Stormfury Mace"]
#file_df[file_df["SN"] == "Undirrala66"]
#file_df[file_df["Item Name"] == "Stormfury Mace"]

In [181]:
# Top Spenders
#--------------
# Identify the the top 5 spenders in the game by total purchase value, then list (in a table):
# SN
# Purchase Count
# Average Purchase Price
# Total Purchase Value
#---------------------------------------------------------------------------------------------

# Create a GroupBy object based upon "SN"
groupby_SN = file_df.groupby(["SN"])


new_df = pd.DataFrame({'Purchase Count' : groupby_SN["Item Name"].count(),
                       'Average Purchase Price': groupby_SN["Price"].mean(),
                       'Total Purchase Value':groupby_SN["Price"].sum()})
new_df.head()

#Sort the values based on Total Purchase Value

sort_df = new_df.sort_values(["Total Purchase Value"], ascending=False)
sort_df.head()       


Unnamed: 0_level_0,Average Purchase Price,Purchase Count,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Undirrala66,3.412,5,17.06
Saedue76,3.39,4,13.56
Mindimnya67,3.185,4,12.74
Haellysu29,4.243333,3,12.73
Eoda93,3.86,3,11.58


In [213]:
# Most Popular Items
#--------------------
# Identify the 5 most popular items by purchase count, then list (in a table):
# Item ID
# Item Name
# Purchase Count
# Item Price
# Total Purchase Value
#------------------------------------------------------------------------------

# Create a GroupBy object based upon "Item Name"
groupby_ID = file_df.groupby(["Item ID"])

new_df = pd.DataFrame({'Item Name':groupby_ID["Item Name"].head(),
                       'Purchase Count' : groupby_ID["Item Name"].count(),
                       'Item Price': groupby_ID["Price"].head(),
                       'Total Purchase Value':groupby_ID["Price"].sum()})

sort_df = new_df.sort_values(["Purchase Count"], ascending=False)
sort_df.head()   

Unnamed: 0,Item Name,Item Price,Purchase Count,Total Purchase Value
39,Stormfury Mace,1.27,11.0,25.85
84,"Thorn, Satchel of Dark Souls",4.51,11.0,24.53
31,"Shadow Strike, Glory of Ending Hope",1.93,9.0,18.63
175,Retribution Axe,4.14,9.0,11.16
13,"Piety, Guardian of Riddles",3.68,9.0,13.41


In [214]:
# Most Profitable Items

# Identify the 5 most profitable items by total purchase value, then list (in a table):
# Item ID
# Item Name
# Purchase Count
# Item Price
# Total Purchase Value

# Create a GroupBy object based upon "Item Name"
groupby_ID = file_df.groupby(["Item ID"])

new_df = pd.DataFrame({'Item Name':groupby_ID["Item Name"].head(),
                       'Purchase Count' : groupby_ID["Item Name"].count(),
                       'Item Price': groupby_ID["Price"].head(),
                       'Total Purchase Value':groupby_ID["Price"].sum()})

sort_df = new_df.sort_values(["Total Purchase Value"], ascending=False)
sort_df.head()   

Unnamed: 0,Item Name,Item Price,Purchase Count,Total Purchase Value
34,"Alpha, Reach of Ending Hope",1.55,9.0,37.26
115,"Thorn, Conqueror of the Corrupted",2.04,7.0,29.75
32,"Rage, Legacy of the Lone Victor",4.32,6.0,29.7
103,"Mercy, Katana of Dismay",4.37,6.0,29.22
107,Spectral Diamond Doomblade,4.25,8.0,28.88
