In [64]:
# Import dependencies
import pandas as pd

In [65]:
# Define the data path then read the data into a data frame
json_path = 'raw_data/purchase_data.json'

pymoli_df = pd.read_json(json_path)

pymoli_df.head()

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46
2,34,Male,174,Primitive Blade,2.46,Assastnya25
3,21,Male,92,Final Critic,1.36,Pheusrical25
4,23,Male,63,Stormfury Mace,1.27,Aela59


In [66]:
# Find the total number of players by counting the number of screen names (SN)
# Note that we use value_counts because SN's are repeated in the dataset

total_players = len(pymoli_df["SN"].value_counts())

players_df = pd.DataFrame({"Total Players": total_players}, index = [0])

players_df

Unnamed: 0,Total Players
0,573


In [69]:
# Purchasing analysis (Total)
unique_items = len(pymoli_df["Item ID"].value_counts())
avg_purchase = pymoli_df["Price"].mean()
total_purchases = len(pymoli_df["Price"])
total_revenue = pymoli_df["Price"].sum()

purchasing_analysis_df = pd.DataFrame({"Number of unique items": unique_items,
                                       "Average price": avg_purchase,
                                       "Number of purchases": total_purchases,
                                       "Total revenue": total_revenue}, index = [0])

purchasing_analysis_df["Average price"] = purchasing_analysis_df["Average price"].map("${:.2f}".format)
purchasing_analysis_df["Total revenue"] = purchasing_analysis_df["Total revenue"].map("${:.2f}".format)

purchasing_analysis_df

Unnamed: 0,Average price,Number of purchases,Number of unique items,Total revenue
0,$2.93,780,183,$2286.33


In [68]:
# Gender Demographics
gender_grouped = pymoli_df.groupby("Gender")

gender_demo = pd.DataFrame({"Total Counts": gender_grouped["SN"].nunique(),
                           "Percentage of Players": 100*gender_grouped["SN"].nunique()/total_players})

gender_demo["Percentage of Players"] = gender_demo["Percentage of Players"].map("{:.2f}".format)

gender_demo

Unnamed: 0_level_0,Percentage of Players,Total Counts
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,17.45,100
Male,81.15,465
Other / Non-Disclosed,1.4,8


In [74]:
# Purchasing Analysis (Gender)
purchase_count = gender_grouped["Age"].count()
avg_purchase = gender_grouped["Price"].mean()
total_purchase_value = gender_grouped["Price"].sum()


purchasing_by_gender_df = pd.DataFrame({"Purchase count": purchase_count,
                                       "Average purchase price": avg_purchase,
                                       "Total purchase value": total_purchase_value})

purchasing_by_gender_df["Average purchase price"] = purchasing_by_gender_df["Average purchase price"].map("${:.2f}".format)
purchasing_by_gender_df["Total purchase value"] = purchasing_by_gender_df["Total purchase value"].map("${:.2f}".format)

purchasing_by_gender_df

Unnamed: 0_level_0,Average purchase price,Purchase count,Total purchase value
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Female,$2.82,136,$382.91
Male,$2.95,633,$1867.68
Other / Non-Disclosed,$3.25,11,$35.74


In [81]:
# Age demographics

# Use halves because ages are whole numbers and borders appear to be inclusive
bins = [0, 9.5, 14.5, 19.5, 24.5, 29.5, 34.5, 39.5, 44.5, 100]

bin_labels = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40-44", "45+"]

pymoli_df["Age group"] = pd.cut(pymoli_df["Age"], bins, labels = bin_labels)

age_grouped = pymoli_df.groupby("Age group")

age_demo = pd.DataFrame({"Total Counts": age_grouped["SN"].nunique(),
                           "Percentage of Players": 100*age_grouped["SN"].nunique()/total_players})

age_demo["Percentage of Players"] = age_demo["Percentage of Players"].map("{:.2f}".format)

age_demo

Unnamed: 0_level_0,Percentage of Players,Total Counts
Age group,Unnamed: 1_level_1,Unnamed: 2_level_1
<10,3.32,19
10-14,4.01,23
15-19,17.45,100
20-24,45.2,259
25-29,15.18,87
30-34,8.2,47
35-39,4.71,27
40-44,1.75,10
45+,0.17,1
