In [614]:
#import dependencies
import pandas as pd
import numpy as np

#import file
file = "Resources/purchase_data.json"

#read file
df = pd.read_json(file, low_memory=False)
df.head(20)
# df.style.format({"Price": "${:.2f}"})


Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46
2,34,Male,174,Primitive Blade,2.46,Assastnya25
3,21,Male,92,Final Critic,1.36,Pheusrical25
4,23,Male,63,Stormfury Mace,1.27,Aela59
5,20,Male,10,Sleepwalker,1.73,Tanimnya91
6,20,Male,153,Mercenary Sabre,4.57,Undjaskla97
7,29,Female,169,"Interrogator, Blood Blade of the Queen",3.32,Iathenudil29
8,25,Male,118,"Ghost Reaver, Longsword of Magic",2.77,Sondenasta63
9,31,Male,99,"Expiration, Warscythe Of Lost Worlds",4.53,Hilaerin92


In [585]:
#Player Count
total_players = pd.DataFrame(
    np.array(
        [
            [len(df["SN"].unique())], 
        ]))

total_players.columns = ["Total Players"]
total_players

Unnamed: 0,Total Players
0,573


In [586]:
#Purchasing Analysis(Total)
total_purchase_amount_sum = df["Price"].sum()
average_price = total_purchase_amount_sum/len(df.index)
total_num_purchases = len(df.index)
total_rev = df["Price"].sum()


purchasing_analysis = pd.DataFrame(
    np.array(
        [
            [len(df["Item ID"].unique()), 
             average_price,
             total_num_purchases,
             total_rev
            ]
            
        ]))
purchasing_analysis.columns = ["Number of Unique Items", "Average Price", "Number of Purchases", "Total Revenue"]
purchasing_analysis.style.format({"Average Price": "${:.2f}", "Total Revenue":"${:.2f}"})
purchasing_analysis


Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,183.0,2.931192,780.0,2286.33


In [587]:
# Gender Breakdown (number & percent)
# Count-----
gender = df["Gender"].value_counts()

# Percentage ----
female_gender_percent = gender["Female"]/len(df.index) * 100
male_gender_percent = gender["Male"]/len(df.index) * 100
non_disclosed_gender = gender["Other / Non-Disclosed"]/len(df.index) * 100

percentage = pd.DataFrame(
        {   "Gender": ["Female", "Male", "Non Disclosed Gender"],
            "Percentage of Players": [female_gender_percent,male_gender_percent,non_disclosed_gender],
            "Total Count": [gender["Female"],gender["Male"],gender["Other / Non-Disclosed"]],
        }
    )
percentage[["Gender","Total Count","Percentage of Players"]]
new_percentage = percentage.set_index(['Gender'])

del new_percentage.index.name
new_percentage


Unnamed: 0,Percentage of Players,Total Count
Female,17.435897,136
Male,81.153846,633
Non Disclosed Gender,1.410256,11


In [588]:
#Purchasing Analysis (Gender)
#Purchase Count Variables
filtered_f_purchase_price = df.loc[df["Gender"] == "Female"]
filtered_m_purchase_price = df.loc[df["Gender"] == "Male"]
filtered_n_purchase_price = df.loc[df["Gender"] == "Other / Non-Disclosed"]

# Purchase total
filtered_f_price_total = filtered_f_purchase_price["Price"].sum()
filtered_m_price_total = filtered_m_purchase_price["Price"].sum()
filtered_n_price_total = filtered_n_purchase_price["Price"].sum()

# Average price 
avg_purchase_f_price = filtered_f_price_total/gender["Female"]
avg_purchase_m_price = filtered_m_price_total/gender["Male"]
avg_purchase_n_price = filtered_n_price_total/gender["Other / Non-Disclosed"]

# normalized purchase
normalized_f_purchase = filtered_f_price_total/len(filtered_f_purchase_price["SN"].value_counts())
normalized_m_purchase = filtered_m_price_total/len(filtered_m_purchase_price["SN"].value_counts())
normalized_n_purchase = filtered_n_price_total/len(filtered_n_purchase_price["SN"].value_counts())


purchase_analysis = pd.DataFrame(
        {   "Gender": ["Female", "Male", "Non Disclosed Gender"],
            "Purchase Count": [gender["Female"],gender["Male"],gender["Other / Non-Disclosed"]],
            "Average Purchase Price": [avg_purchase_f_price,avg_purchase_m_price,avg_purchase_n_price],
            "Total Purchase Value": [filtered_f_price_total,filtered_m_price_total,filtered_n_price_total],
            "Normalized Totals": [normalized_f_purchase,normalized_m_purchase,normalized_n_purchase],
        }
    )
purchase_analysis[["Gender","Purchase Count","Average Purchase Price", "Total Purchase Value", "Normalized Totals"]]
new_purchase_analysis = purchase_analysis.set_index(['Gender'])

del new_purchase_analysis.index.name
new_purchase_analysis

Unnamed: 0,Average Purchase Price,Normalized Totals,Purchase Count,Total Purchase Value
Female,2.815515,3.8291,136,382.91
Male,2.950521,4.016516,633,1867.68
Non Disclosed Gender,3.249091,4.4675,11,35.74


In [589]:
#Age Demographic Data
#Count
under_ten = len(df.loc[df["Age"] <= 10]) 
ten_fourteen = len(df.loc[(df["Age"] >= 10) & (df["Age"] <= 14)])
fift_nint = len(df.loc[(df["Age"] >= 15) & (df["Age"] <= 19)]) 
twent_twentfour = len(df.loc[(df["Age"] >= 20) & (df["Age"] <= 24)])
twentfive_twentnine = len(df.loc[(df["Age"] >= 25) & (df["Age"] <= 29)])
thirty_thirtyfour = len(df.loc[(df["Age"] >= 30) & (df["Age"] <= 34)])
thirtyfi_thirtynine = len(df.loc[(df["Age"] >= 35) & (df["Age"] <= 39)])
fourty_plus = len(df.loc[(df["Age"] >= 40)])

# Percentage
p_under_ten = under_ten/len(df.index) * 100
p_ten_fourteen = ten_fourteen/len(df.index) * 100
p_fift_nint = fift_nint/len(df.index) * 100
p_twent_twentfour = twent_twentfour/len(df.index) * 100
p_twentfive_twentnine = twentfive_twentnine/len(df.index) * 100
p_thirty_thirtyfour = thirty_thirtyfour/len(df.index) * 100
p_thirtyfi_thirtynine = thirtyfi_thirtynine/len(df.index) * 100
p_fourty_plus = fourty_plus/len(df.index) * 100

age_count_analysis = pd.DataFrame(
        {   "Age": ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"],
            "Percentage of Players": [p_under_ten, p_ten_fourteen, p_fift_nint, p_twent_twentfour, p_twentfive_twentnine, p_thirty_thirtyfour, p_thirtyfi_thirtynine, p_fourty_plus],
            "Total Count": [under_ten, ten_fourteen, fift_nint, twent_twentfour, twentfive_twentnine, thirty_thirtyfour,thirtyfi_thirtynine,fourty_plus],
        }
    )
age_count_analysis[["Age","Percentage of Players", "Total Count"]]
new_age_count_analysis = age_count_analysis.set_index(["Age"])

del new_age_count_analysis.index.name
new_age_count_analysis

Unnamed: 0,Percentage of Players,Total Count
<10,4.102564,32
10-14,4.487179,35
15-19,17.051282,133
20-24,43.076923,336
25-29,16.025641,125
30-34,8.205128,64
35-39,5.384615,42
40+,2.179487,17


In [612]:
# average purchase price --> use len() w/o unique()
# general values to work with
ap_under_ten = df.loc[df["Age"] <= 10] 
ap_ten_fourteen = df.loc[(df["Age"] >= 10) & (df["Age"] <= 14)]
ap_fift_nint = df.loc[(df["Age"] >= 15) & (df["Age"] <= 19)]
ap_twent_twentfour = df.loc[(df["Age"] >= 20) & (df["Age"] <= 24)]
ap_twentfive_twentnine = df.loc[(df["Age"] >= 25) & (df["Age"] <= 29)]
ap_thirty_thirtyfour = df.loc[(df["Age"] >= 30) & (df["Age"] <= 34)]
ap_thirtyfi_thirtynine = df.loc[(df["Age"] >= 35) & (df["Age"] <= 39)]
ap_fourty_plus = df.loc[(df["Age"] >= 40)]

# normalized 
n_under_ten = len(ap_under_ten["SN"].unique())
n_ten_fourteen = len(ap_ten_fourteen["SN"].unique())
n_fift_nint = len(ap_fift_nint["SN"].unique())
n_twent_twentfour = len(ap_twent_twentfour["SN"].unique())
n_twentfive_twentnine = len(ap_twentfive_twentnine["SN"].unique())
n_thirty_thirtyfour = len(ap_thirty_thirtyfour["SN"].unique())
n_thirtyfi_thirtynine = len(ap_thirtyfi_thirtynine["SN"].unique())
n_fourty_plus = len(ap_fourty_plus["SN"].unique())


# averages
av_under_ten = ap_under_ten["Price"].sum()/under_ten
av_ten_fourteen = ap_ten_fourteen["Price"].sum()/ten_fourteen
av_fift_nint = ap_fift_nint["Price"].sum()/fift_nint
av_twent_twentfour = ap_twent_twentfour["Price"].sum()/twent_twentfour
av_twentfive_twentnine = ap_twentfive_twentnine["Price"].sum()/twentfive_twentnine
av_thirty_thirtyfour = ap_thirty_thirtyfour["Price"].sum()/thirty_thirtyfour
av_thirtyfi_thirtynine = ap_thirtyfi_thirtynine["Price"].sum()/thirtyfi_thirtynine
av_fourty_plus = ap_fourty_plus["Price"].sum()/fourty_plus

# total purchase value
t_under_ten = ap_under_ten["Price"].sum()
t_ten_fourteen = ap_ten_fourteen["Price"].sum()
t_fift_nint = ap_fift_nint["Price"].sum()
t_twent_twentfour = ap_twent_twentfour["Price"].sum()
t_twentfive_twentnine = ap_twentfive_twentnine["Price"].sum()
t_thirty_thirtyfour = ap_thirty_thirtyfour["Price"].sum()
t_thirtyfi_thirtynine = ap_thirtyfi_thirtynine["Price"].sum()
t_fourty_plus = ap_fourty_plus["Price"].sum()


ap_analysis = pd.DataFrame(
        {   "Age": ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"],
            "Purchase Count": [under_ten, ten_fourteen, fift_nint, twent_twentfour, twentfive_twentnine,thirty_thirtyfour,thirtyfi_thirtynine,fourty_plus],
            "Average Purchase Price": [av_under_ten,av_ten_fourteen,av_fift_nint,av_twent_twentfour,av_twentfive_twentnine,av_thirty_thirtyfour,av_thirtyfi_thirtynine,av_fourty_plus],
           "Total Purchase Value": [t_under_ten,t_ten_fourteen,t_fift_nint,t_twent_twentfour,t_twentfive_twentnine,t_thirty_thirtyfour,t_thirtyfi_thirtynine, t_fourty_plus],
            "Normalized Totals": [n_under_ten,n_ten_fourteen,n_fift_nint,n_twent_twentfour,n_twentfive_twentnine,n_thirty_thirtyfour,n_thirtyfi_thirtynine,n_fourty_plus]
        }
    )
ap_analysis[["Age","Purchase Count", "Average Purchase Price"]]
new_ap_analysis = ap_analysis.set_index(['Age'])

del new_ap_analysis.index.name
new_ap_analysis

Unnamed: 0,Average Purchase Price,Normalized Totals,Purchase Count,Total Purchase Value
<10,3.019375,22,32,96.62
10-14,2.77,23,35,96.95
15-19,2.905414,100,133,386.42
20-24,2.913006,259,336,978.77
25-29,2.96264,87,125,370.33
30-34,3.082031,47,64,197.25
35-39,2.842857,27,42,119.4
40+,3.161765,11,17,53.75


In [None]:
#             "Average Purchase Price": [sum(ap_under_ten/under_ten), sum(ap_ten_fourteen/ten_fourteen), sum(ap_fift_nint/fift_nint), sum(ap_twent_twentfour/twent_twentfour),sum(ap_twentfive_twentnine/twentfive_twentnine), sum(ap_thirty_thirtyfour/thirty_thirtyfour), sum(ap_thirtyfi_thirtynine/thirtyfi_thirtynine), sum(ap_fourty_plus/fourty_plus)],
# # 

# ap_analysis = pd.DataFrame(
#         {   "Age": ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"],
#             "Purchase Count": [under_ten, ten_fourteen, fift_nint, twent_twentfour, twentfive_twentnine,thirty_thirtyfour,thirtyfi_thirtynine,fourty_plus],
# #             "Average Purchase Price": [sum(ap_under_ten/under_ten), sum(ap_ten_fourteen/ten_fourteen), sum(ap_fift_nint/fift_nint), sum(ap_twent_twentfour/twent_twentfour),sum(ap_twentfive_twentnine/twentfive_twentnine), sum(ap_thirty_thirtyfour/thirty_thirtyfour), sum(ap_thirtyfi_thirtynine/thirtyfi_thirtynine), sum(ap_fourty_plus/fourty_plus)],
# #            "Total Purchase Value": [ap_under_ten,ap_ten_fourteen,ap_fift_nint,ap_twent_twentfour,ap_twentfive_twentnine,ap_thirty_thirtyfour,ap_thirtyfi_thirtynine, ap_fourty_plus],
# #             "Normalized Totals": [n_under_ten,n_ten_fourteen,n_fift_nint,n_twent_twentfour,n_twentfive_twentnine,n_thirty_thirtyfour,n_thirtyfi_thirtynine,n_fourty_plus]
#         }
#     )
# ap_analysis[["Age","Purchase Count", "Average Purchase Price"]]
# #,"Average Purchase Price", "Total Purchase Value", "Normalized Totals"
# new_ap_analysis = ap_analysis.set_index(['Age'])

# del new_ap_analysis.index.name
# new_ap_analysis