In [36]:
#import dependencies 
import pandas as pd
import numpy as np
import json 

In [37]:
#import data files 
data_file = 'purchase_data.json'
purchase_data = pd.read_json(data_file, orient="records")
purchase_data.head()

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46
2,34,Male,174,Primitive Blade,2.46,Assastnya25
3,21,Male,92,Final Critic,1.36,Pheusrical25
4,23,Male,63,Stormfury Mace,1.27,Aela59


# Player Counts & Demographics

In [38]:
# obtain player demographics 
player_demos = purchase_data.loc[:, ["Gender", "SN", "Age"]]
player_demos.head()

Unnamed: 0,Gender,SN,Age
0,Male,Aelalis34,38
1,Male,Eolo46,21
2,Male,Assastnya25,34
3,Male,Pheusrical25,21
4,Male,Aela59,23


In [39]:
# obtain unique values of player demographics to obtain total players
player_demos = player_demos.drop_duplicates()
total_players = player_demos.count()[0]
total_players

573

In [40]:
total_playersdf = pd.DataFrame({"Total Players": [total_players]})

In [41]:
# Counts and percentages of Male and Female Players
# counts by gender
gender_count = player_demos["Gender"].value_counts()
gender_count

# Percentage of male and female players
gender_percent = (gender_count / total_players)*100
gender_percent

# Put into one table 
gender_demos = pd.DataFrame({"Gender Count": gender_count,
                            "Gender Percentage": gender_percent})

# Round to 2 decimal places 
gender_demos = gender_demos.round(2)


# print data
gender_demos

Unnamed: 0,Gender Count,Gender Percentage
Male,465,81.15
Female,100,17.45
Other / Non-Disclosed,8,1.4


# Purchase Analysis (Total) 

In [42]:
#Calculations for analysis
#average purchase price 
average_item_price = purchase_data["Price"].mean()

#total number of purchases 
purchase_count = purchase_data["Price"].count()

#total revenue 
total_purchase_value = purchase_data["Price"].sum()

#total items purchased
item_count = len(purchase_data["Item ID"].unique())

# Create a data frame for purchase data analysis 
purchase_totals = pd.DataFrame({"Number of Unique Items": [item_count], 
                               "Total Revenue": [total_purchase_value],
                               "Number of purchases": [purchase_count], 
                               "Average Purchase Price": [average_item_price]})

#summary of purchase analysis 
purchase_totals = purchase_totals.round(2)
purchase_totals["Average Purchase Price"] = purchase_totals["Average Purchase Price"].map("${:,.2f}".format)
purchase_totals["Total Revenue"] = purchase_totals["Total Revenue"].map("${:,.2f}".format)
purchase_totals

Unnamed: 0,Average Purchase Price,Number of Unique Items,Number of purchases,Total Revenue
0,$2.93,183,780,"$2,286.33"


# Purchase Analysis (Gender)  

In [43]:
#Calculations for purchase analysis by gender 
gender_purchases = purchase_data.groupby(["Gender"]).sum()["Price"].rename("Total Purchase Value")
gender_avgprice = purchase_data.groupby(["Gender"]).mean()["Price"].rename("Average Purchase Value")
gender_counts = purchase_data.groupby(["Gender"]).count()["Price"].rename("Purchase Count")

#Normalize data 
normalized_total = gender_purchases / gender_count

# Create data frame to house results 
gender_data = pd.DataFrame({"Normalized Total": normalized_total, 
                            "Purchase Count": gender_counts, 
                            "Total Purchase Value": gender_purchases, 
                            "Average Purchase Value": gender_avgprice})

#format results 
gender_data = gender_data.round(2)
gender_data["Average Purchase Value"] = gender_data["Average Purchase Value"].map("${:,.2f}".format)
gender_data["Total Purchase Value"] = gender_data["Total Purchase Value"].map("${:,.2f}".format)
gender_data["Normalized Total"] = gender_data["Normalized Total"].map("${:,.2f}".format)


#Print results of purchase analysis 
gender_data

Unnamed: 0_level_0,Average Purchase Value,Normalized Total,Purchase Count,Total Purchase Value
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,$2.82,$3.83,136,$382.91
Male,$2.95,$4.02,633,"$1,867.68"
Other / Non-Disclosed,$3.25,$4.47,11,$35.74


# Age Demographics

In [45]:
age_bins = [0, 9.90, 14.90, 19.90, 24.9, 29.9, 34.90, 39.90, 9999999]
group_names = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", ">40"]

#Cut data to put players into age bins 
player_demos["Age Ranges"] = pd.cut(player_demos["Age"], age_bins, labels=group_names)
player_demos.head()

Unnamed: 0,Gender,SN,Age,Age Ranges
0,Male,Aelalis34,38,35-39
1,Male,Eolo46,21,20-24
2,Male,Assastnya25,34,30-34
3,Male,Pheusrical25,21,20-24
4,Male,Aela59,23,20-24


In [49]:
# total players and percentages by age 
age_demos_total = player_demos["Age Ranges"].value_counts()
age_demo_percents = (age_demos_total / total_players) * 100

#create data frame to hold the results 
age_demos = pd.DataFrame({"Total Count": age_demos_total, "Percent of Players": age_demo_percents})
age_demos = age_demos.sort_index()
age_demos = age_demos.round(2)
age_demos

Unnamed: 0,Percent of Players,Total Count
<10,3.32,19
10-14,4.01,23
15-19,17.45,100
20-24,45.2,259
25-29,15.18,87
30-34,8.2,47
35-39,4.71,27
>40,1.92,11


# Purchase Analysis (Age)

In [None]:
#Calculations for purchase analysis by age bins  
age_purchases = age_demos.groupby(["Age Ranges"]).sum()["Price"].rename("Total Purchase Value")
age_avgprice = age_demos.groupby(["Age_Ranges"]).mean()["Price"].rename("Average Purchase Value")
age_counts = age_demos.groupby(["Age_Ranges"]).count()["Price"].rename("Purchase Count")

#Normalize data 
normalized_total = age_purchases / gender_count

# Create data frame to house results 
gender_data = pd.DataFrame({"Normalized Total": normalized_total, 
                            "Purchase Count": gender_counts, 
                            "Total Purchase Value": gender_purchases, 
                            "Average Purchase Value": gender_avgprice})

#format results 
gender_data = gender_data.round(2)
gender_data["Average Purchase Value"] = gender_data["Average Purchase Value"].map("${:,.2f}".format)
gender_data["Total Purchase Value"] = gender_data["Total Purchase Value"].map("${:,.2f}".format)
gender_data["Normalized Total"] = gender_data["Normalized Total"].map("${:,.2f}".format)


#Print results of purchase analysis 
gender_data