In [121]:
import pandas as pd
import numpy as np
import os

In [122]:
# import and read the json purchase data file 
purchase_data = os.path.join('Resources','purchase_data.json')

purchase_data_df = pd.read_json(purchase_data)

purchase_data_df.head()


Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46
2,34,Male,174,Primitive Blade,2.46,Assastnya25
3,21,Male,92,Final Critic,1.36,Pheusrical25
4,23,Male,63,Stormfury Mace,1.27,Aela59


In [123]:
## Player Count
# Total Number of Players
total_unique_players = purchase_data_df["SN"].nunique()

## Output
total_players_df = pd.DataFrame([{"Total Players": total_unique_players}])
total_players_df

Unnamed: 0,Total Players
0,573


In [124]:
## Purchasing Analysis (Total)
# Number of Unique Items
num_unique_items = purchase_data_df["Item ID"].nunique()

# Average Purchase Price
avg_purchase_price = purchase_data_df["Price"].mean()
avg_purchase_price = np.round(avg_purchase_price, 2)

# Total Number of Purchases
total_num_purchases = purchase_data_df["Price"].count()


# Total Revenue
total_revenue = purchase_data_df["Price"].sum()
total_revenue = np.round(total_revenue, 2)

## Report
purchasing_analysis = pd.DataFrame([{"Number of Unique Items": num_unique_items, 
                                     "Average Purchase Price": avg_purchase_price,
                                     "Total Number of Purchases": total_num_purchases,
                                     "Total Revenue": total_revenue
                                    }])
purchasing_analysis

Unnamed: 0,Average Purchase Price,Number of Unique Items,Total Number of Purchases,Total Revenue
0,2.93,183,780,2286.33


In [125]:
## Gender Demographics

# Percentage and Count of Male Players
total_player_count = purchase_data_df["Gender"].count()

male_count = purchase_data_df[purchase_data_df["Gender"]=="Male"].count()["Gender"]
male_percentage = (male_count/total_player_count) * 100
male_percentage = np.round(male_percentage, 2)
male_percentage

# Percentage and Count of Female Players
female_count = purchase_data_df[purchase_data_df["Gender"]=="Female"].count()["Gender"]
female_percentage = (female_count/total_player_count) * 100
female_percentage = np.round(female_percentage, 2)
female_percentage

# Percentage and Count of Other / Non-Disclosed
other_count = purchase_data_df[(purchase_data_df["Gender"]!="Female") & (purchase_data_df["Gender"]!="Male")].count()["Gender"]
other_percentage = (other_count/total_player_count) * 100
other_percentage = np.round(other_percentage, 2)
other_percentage

## Output report
gender_demographics = pd.DataFrame({"Percentage of Players": [male_percentage, female_percentage, other_percentage], 
                                    "Total Count": [male_count, female_count, other_count],
                                    "Gender": ["Male", "Female", "Other / Non-Disclosed"]
                                   })

gender_demographics = gender_demographics.set_index("Gender")
gender_demographics.index.name = None 
gender_demographics


Unnamed: 0,Percentage of Players,Total Count
Male,81.15,633
Female,17.44,136
Other / Non-Disclosed,1.41,11


In [128]:
### Purchasing Analysis (Gender)
### The below each broken by gender:

## Purchase Count
male_pur_df = purchase_data_df.loc[purchase_data_df["Gender"] == "Male"]
female_pur_df = purchase_data_df.loc[purchase_data_df["Gender"] == "Female"]
other_pur_df = purchase_data_df.loc[(purchase_data_df["Gender"]!="Female") & (purchase_data_df["Gender"]!="Male")]

male_pur_count = male_pur_df["Price"].count()
female_pur_count= female_pur_df["Price"].count()
other_pur_count = other_pur_df["Price"].count()

## Average Purchase Price
male_avg_price = np.round((male_pur_df["Price"].mean()),2)
female_avg_price = np.round((female_pur_df["Price"].mean()),2)
other_avg_price = np.round((other_pur_df["Price"].mean()),2)

## Total Purchase Value
total_male_pur_value = male_pur_df["Price"].sum()
total_female_pur_value = female_pur_df["Price"].sum()
total_other_pur_value = other_pur_df["Price"].sum()

## Normalized Totals

### Output
pur_analysis_gen = pd.DataFrame({"Purchase Count": [male_pur_count, female_pur_count, other_pur_count], 
                                    "Average Purchase Price": [male_avg_price, female_avg_price, other_avg_price],
                                    "Total Purchase Value": [total_male_pur_value, total_female_pur_value, total_other_pur_value],
                                    #"Normalized Total": []
                                    "Gender": ["Male", "Female", "Other / Non-Disclosed"]
                                   })

pur_analysis_gen = pur_analysis_gen.set_index("Gender")
pur_analysis_gen


Unnamed: 0_level_0,Average Purchase Price,Purchase Count,Total Purchase Value
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Male,2.95,633,1867.68
Female,2.82,136,382.91
Other / Non-Disclosed,3.25,11,35.74


In [133]:
## Age Demographics
# The below each broken into bins of 4 years (i.e. <10, 10-14, 15-19, etc.)
print(purchase_data_df["Age"].max())

bins = [0, 10, 15, 20, 25, 30, 35, 40, 45]
bin_names = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]

age_pur_df = pd.DataFrame(purchase_data_df)
age_pur_df.head()
# Purchase Count
# Average Purchase Price
# Total Purchase Value
# Normalized Totals



45


Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46
2,34,Male,174,Primitive Blade,2.46,Assastnya25
3,21,Male,92,Final Critic,1.36,Pheusrical25
4,23,Male,63,Stormfury Mace,1.27,Aela59


In [None]:
## Top Spenders
## Identify the the top 5 spenders in the game by total purchase value, then list (in a table):
# SN
# Purchase Count
# Average Purchase Price
# Total Purchase Value

In [None]:
## Most Popular Items
## Identify the 5 most popular items by purchase count, then list (in a table):
# Item ID
# Item Name
# Purchase Count
# Item Price
# Total Purchase Value

In [None]:
## Most Profitable Items
## Identify the 5 most profitable items by total purchase value, then list (in a table):
# Item ID
# Item Name
# Purchase Count
# Item Price
# Total Purchase Value