In [121]:
import pandas as pd
import numpy as np
import os

In [206]:
# import and read the json purchase data file 
purchase_data = os.path.join('Resources','purchase_data.json')

purchase_data_df = pd.read_json(purchase_data)

purchase_data_df


Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46
2,34,Male,174,Primitive Blade,2.46,Assastnya25
3,21,Male,92,Final Critic,1.36,Pheusrical25
4,23,Male,63,Stormfury Mace,1.27,Aela59
5,20,Male,10,Sleepwalker,1.73,Tanimnya91
6,20,Male,153,Mercenary Sabre,4.57,Undjaskla97
7,29,Female,169,"Interrogator, Blood Blade of the Queen",3.32,Iathenudil29
8,25,Male,118,"Ghost Reaver, Longsword of Magic",2.77,Sondenasta63
9,31,Male,99,"Expiration, Warscythe Of Lost Worlds",4.53,Hilaerin92


In [207]:
## Player Count
# Total Number of Players
total_unique_players = purchase_data_df["SN"].nunique()

## Output
total_players_df = pd.DataFrame([{"Total Players": total_unique_players}])
total_players_df

Unnamed: 0,Total Players
0,573


In [208]:
## Purchasing Analysis (Total)
# Number of Unique Items
num_unique_items = purchase_data_df["Item ID"].nunique()

# Average Purchase Price
avg_purchase_price = purchase_data_df["Price"].mean()
avg_purchase_price = np.round(avg_purchase_price, 2)

# Total Number of Purchases
total_num_purchases = purchase_data_df["Price"].count()


# Total Revenue
total_revenue = purchase_data_df["Price"].sum()
total_revenue = np.round(total_revenue, 2)

## Report
purchasing_analysis = pd.DataFrame([{"Number of Unique Items": num_unique_items, 
                                     "Average Purchase Price": avg_purchase_price,
                                     "Total Number of Purchases": total_num_purchases,
                                     "Total Revenue": total_revenue
                                    }])
purchasing_analysis

Unnamed: 0,Average Purchase Price,Number of Unique Items,Total Number of Purchases,Total Revenue
0,2.93,183,780,2286.33


In [209]:
## Gender Demographics

# Percentage and Count of Male Players
unique_players = purchase_data_df.loc[:, ["Gender", "SN", "Age"]]
unique_players = unique_players.drop_duplicates()
total_player_count = unique_players.count()["SN"]

male_count = unique_players[unique_players["Gender"]=="Male"].count()["Gender"]
male_percentage = (male_count/total_player_count) * 100
male_percentage = np.round(male_percentage, 2)

# Percentage and Count of Female Players
female_count = unique_players[unique_players["Gender"]=="Female"].count()["Gender"]
female_percentage = (female_count/total_player_count) * 100
female_percentage = np.round(female_percentage, 2)

# Percentage and Count of Other / Non-Disclosed
other_count = unique_players[(unique_players["Gender"]!="Female") & (unique_players["Gender"]!="Male")].count()["Gender"]
other_percentage = (other_count/total_player_count) * 100
other_percentage = np.round(other_percentage, 2)

## Output report
gender_demographics = pd.DataFrame({"Percentage of Players": [male_percentage, female_percentage, other_percentage], 
                                    "Total Count": [male_count, female_count, other_count],
                                    "Gender": ["Male", "Female", "Other / Non-Disclosed"]
                                   })

gender_demographics = gender_demographics.set_index("Gender")
gender_demographics.index.name = None 
gender_demographics


Unnamed: 0,Percentage of Players,Total Count
Male,81.15,465
Female,17.45,100
Other / Non-Disclosed,1.4,8


In [217]:
### Purchasing Analysis (Gender)
### The below each broken by gender:

## Purchase Count
male_pur_df = purchase_data_df.loc[purchase_data_df["Gender"] == "Male"]
female_pur_df = purchase_data_df.loc[purchase_data_df["Gender"] == "Female"]
other_pur_df = purchase_data_df.loc[(purchase_data_df["Gender"]!="Female") & (purchase_data_df["Gender"]!="Male")]

male_pur_count = male_pur_df["Price"].count()
female_pur_count= female_pur_df["Price"].count()
other_pur_count = other_pur_df["Price"].count()

## Average Purchase Price
male_avg_price = np.round((male_pur_df["Price"].mean()),2)
female_avg_price = np.round((female_pur_df["Price"].mean()),2)
other_avg_price = np.round((other_pur_df["Price"].mean()),2)

## Total Purchase Value
total_male_pur_value = male_pur_df["Price"].sum()
total_female_pur_value = female_pur_df["Price"].sum()
total_other_pur_value = other_pur_df["Price"].sum()

## Normalized Totals
total_male_normal = np.round((total_male_pur_value / male_count), 2)
total_female_normal = np.round((total_female_pur_value / female_count), 2)
total_other_normal = np.round((total_other_pur_value / other_count), 2)

### Output
pur_analysis_gen = pd.DataFrame({"Purchase Count": [male_pur_count, female_pur_count, other_pur_count], 
                                    "Average Purchase Price": [male_avg_price, female_avg_price, other_avg_price],
                                    "Total Purchase Value": [total_male_pur_value, total_female_pur_value, total_other_pur_value],
                                    "Normalized Total": [total_male_normal, total_female_normal, total_female_normal],
                                    "Gender": ["Male", "Female", "Other / Non-Disclosed"]
                                   })

pur_analysis_gen = pur_analysis_gen.set_index("Gender")
pur_analysis_gen

Unnamed: 0_level_0,Average Purchase Price,Normalized Total,Purchase Count,Total Purchase Value
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Male,2.95,4.02,633,1867.68
Female,2.82,3.83,136,382.91
Other / Non-Disclosed,3.25,3.83,11,35.74


In [251]:
## Age Demographics
# The below each broken into bins of 4 years (i.e. <10, 10-14, 15-19, etc.)

#---create bins, bin names, and relevant grouping
bins = [0, 10, 15, 20, 25, 30, 35, 40, 45]
bin_names = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]

age_pur_df = pd.DataFrame(purchase_data_df)
age_pur_df["Age Range"] = pd.cut(age_pur_df["Age"], bins, labels = bin_names)

age_group = age_pur_df.groupby("Age Range")

#--- find unique players per age bin
age_group_vcount = age_group["SN"].value_counts()

# Purchase Count
age_group_count = age_group["Price"].count()

pur_count_bin0 = age_group_count[0]
pur_count_bin1 = age_group_count[1]
pur_count_bin2 = age_group_count[2]
pur_count_bin3 = age_group_count[3]
pur_count_bin4 = age_group_count[4]
pur_count_bin5 = age_group_count[5]
pur_count_bin6 = age_group_count[6]
pur_count_bin7 = age_group_count[7]

# Average Purchase Price
age_group_avg_pur = age_group["Price"].mean()

avg_pur_bin0 = np.round(age_group_avg_pur[0],2)
avg_pur_bin1 = np.round(age_group_avg_pur[1],2)
avg_pur_bin2 = np.round(age_group_avg_pur[2],2)
avg_pur_bin3 = np.round(age_group_avg_pur[3],2)
avg_pur_bin4 = np.round(age_group_avg_pur[4],2)
avg_pur_bin5 = np.round(age_group_avg_pur[5],2)
avg_pur_bin6 = np.round(age_group_avg_pur[6],2)
avg_pur_bin7 = np.round(age_group_avg_pur[7],2)

# Total Purchase Value
total_group_pur = age_group["Price"].sum()

total_pur_bin0 = total_group_pur[0]
total_pur_bin1 = total_group_pur[1]
total_pur_bin2 = total_group_pur[2]
total_pur_bin3 = total_group_pur[3]
total_pur_bin4 = total_group_pur[4]
total_pur_bin5 = total_group_pur[5]
total_pur_bin6 = total_group_pur[6]
total_pur_bin7 = total_group_pur[7]

# Normalized Totals
total_normal_bin0 = np.round((total_pur_bin0 / age_group_vcount[0]), 2)
total_normal_bin1 = np.round((total_pur_bin1 / age_group_vcount[1]), 2)
total_normal_bin2 = np.round((total_pur_bin2 / age_group_vcount[2]), 2)
total_normal_bin3 = np.round((total_pur_bin3 / age_group_vcount[3]), 2)
total_normal_bin4 = np.round((total_pur_bin4 / age_group_vcount[4]), 2)
total_normal_bin5 = np.round((total_pur_bin5 / age_group_vcount[5]), 2)
total_normal_bin6 = np.round((total_pur_bin6 / age_group_vcount[6]), 2)
total_normal_bin7 = np.round((total_pur_bin7 / age_group_vcount[7]), 2)

## Output
age_demographics = pd.DataFrame({"Purchase Count": [pur_count_bin0, pur_count_bin1,pur_count_bin2,pur_count_bin3,pur_count_bin4,pur_count_bin5,pur_count_bin6, pur_count_bin7], 
                                    "Average Purchase Price": [avg_pur_bin0, avg_pur_bin1, avg_pur_bin2, avg_pur_bin3, avg_pur_bin4, avg_pur_bin5, avg_pur_bin6, avg_pur_bin7],
                                    "Total Purchase Value": [total_pur_bin0, total_pur_bin1, total_pur_bin2, total_pur_bin3, total_pur_bin4, total_pur_bin5, total_pur_bin6, total_pur_bin7],
                                    "Normalized Total": [total_normal_bin0, total_normal_bin1, total_normal_bin2, total_normal_bin3, total_normal_bin4, total_normal_bin5, total_normal_bin6, total_normal_bin7],
                                    "Bins": bin_names
                                   })


age_demographics = age_demographics.set_index("Bins")
age_demographics.index.name = None 
age_demographics


Unnamed: 0,Average Purchase Price,Normalized Total,Purchase Count,Total Purchase Value
<10,3.02,32.21,32,96.62
10-14,2.87,112.08,78,224.15
15-19,2.87,264.37,184,528.74
20-24,2.96,451.31,305,902.61
25-29,2.89,109.91,76,219.82
30-34,3.07,89.13,58,178.26
35-39,2.9,63.75,44,127.49
40+,2.88,4.32,3,8.64


In [252]:
## Top Spenders
## Identify the the top 5 spenders in the game by total purchase value, then list (in a table):
# SN
# Purchase Count
# Average Purchase Price
# Total Purchase Value

#----------------------------------------------------

# Basic Calculations
user_total = purchase_data_df.groupby(["SN"]).sum()["Price"].rename("Total Purchase Value")
user_average = purchase_data_df.groupby(["SN"]).mean()["Price"].rename("Average Purchase Price")
user_count = purchase_data_df.groupby(["SN"]).count()["Price"].rename("Purchase Count")

# Convert to DataFrame
user_data = pd.DataFrame({"Total Purchase Value": user_total, "Average Purchase Price": user_average, "Purchase Count": user_count})

# Minor Data Munging
user_data["Average Purchase Price"] = user_data["Average Purchase Price"].map("${:,.2f}".format)
user_data["Total Purchase Value"] = user_data["Total Purchase Value"].map("${:,.2f}".format)
user_data = user_data.loc[:,["Purchase Count", "Average Purchase Price", "Total Purchase Value"]]


# Display Table
user_data.sort_values("Total Purchase Value", ascending=False).head(5)

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Qarwen67,4,$2.49,$9.97
Sondim43,3,$3.13,$9.38
Tillyrin30,3,$3.06,$9.19
Lisistaya47,3,$3.06,$9.19
Tyisriphos58,2,$4.59,$9.18


In [257]:
## Most Popular Items
## Identify the 5 most popular items by purchase count, then list (in a table):
# Item ID
# Item Name
# Purchase Count
# Item Price
# Total Purchase Value

#-----------------------------------

# Extract item Data
item_data = purchase_data_df.loc[:,["Item ID", "Item Name", "Price"]]

# Perform basic calculations
total_item_purchase = item_data.groupby(["Item ID", "Item Name"]).sum()["Price"].rename("Total Purchase Value")
average_item_purchase = item_data.groupby(["Item ID", "Item Name"]).mean()["Price"]
item_count = item_data.groupby(["Item ID", "Item Name"]).count()["Price"].rename("Purchase Count")

# Minor Data Munging
item_data_pd = pd.DataFrame({"Total Purchase Value": total_item_purchase, "Item Price": average_item_purchase, "Purchase Count": item_count})
item_data_pd["Item Price"] = item_data_pd["Item Price"].map("${:,.2f}".format)
item_data_pd ["Purchase Count"] = item_data_pd["Purchase Count"].map("{:,}".format)
item_data_pd["Total Purchase Value"] = item_data_pd["Total Purchase Value"].map("${:,.2f}".format)
item_data_pd = item_data_pd.loc[:,["Purchase Count", "Item Price", "Total Purchase Value"]]

# Display the Item Table
item_data_pd.sort_values("Purchase Count", ascending=False).head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
39,"Betrayal, Whisper of Grieving Widows",11,2.35,25.85
84,Arcane Gem,11,2.23,24.53
31,Trickster,9,2.07,18.63
175,Woeful Adamantite Claymore,9,1.24,11.16
13,Serenity,9,1.49,13.41


In [254]:
## Most Profitable Items
## Identify the 5 most profitable items by total purchase value, then list (in a table):
# Item ID
# Item Name
# Purchase Count
# Item Price
# Total Purchase Value

#--------------------------

# Minor Data Munging

# Display the Item Table (Sorted by Total Purchase Value)
item_data_pd.sort_values("Total Purchase Value", ascending=False).head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
170,Shadowsteel,5,$1.98,$9.90
21,Souleater,3,$3.27,$9.81
37,"Shadow Strike, Glory of Ending Hope",5,$1.93,$9.65
127,"Heartseeker, Reaver of Souls",3,$3.21,$9.63
120,Agatha,5,$1.91,$9.55
