In [1]:
import os
import pandas as pd
import numpy as np

In [114]:
#store file path in a variable  
json_path = os.path.join ("Resources","purchase_data.json") 

In [115]:
#Import the purchase data json file as a DataFrame

purchase_data = pd.read_json(json_path)
purchase_data.head()

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46
2,34,Male,174,Primitive Blade,2.46,Assastnya25
3,21,Male,92,Final Critic,1.36,Pheusrical25
4,23,Male,63,Stormfury Mace,1.27,Aela59


# Player Count

In [49]:

#Player Count- Using the length of the unique screen names
player_count = len(purchase_data['SN'].unique())

#Convert the player count series into a dataframe
player_count_df = pd.DataFrame([{'Total Players': player_count}])

#resetting the index to total players
Total_players = player_count_df.set_index('Total Players')
Total_players

573


# Purchasing Analysis (Total)

In [120]:
# Number of Unique Items
unique_items = len(purchase_data["Item ID"].unique())
unique_items 

# Average Purchase Price
ave_price = round (purchase_data["Price"].mean(), 2)
ave_price

# Total Number of Purchases
tot_pur = purchase_data["Price"].count()
tot_pur

# Total Revenue
tot_rev = round (purchase_data["Price"].sum(),2)
tot_rev

total_analysis_df = pd.DataFrame ({ 'Number of Unique Items': [unique_items],
                                    'Average Purchase Price': [ave_price],
                                    'Total Number of Purchases': [tot_pur],
                                    'Total Revenue': [tot_rev]
                                   })

 
 
total_analysis_df.style.format({'Average Purchase Price': '${:.2f}', 'Total Revenue': '${:,.2f}'})



Unnamed: 0,Average Purchase Price,Number of Unique Items,Total Number of Purchases,Total Revenue
0,$2.93,183,780,"$2,286.33"


# Gender Demographic

In [104]:

#Get the total number of unique players
totalcount = purchase_data["SN"].nunique()

#Number of male players
malecount = purchase_data[purchase_data["Gender"] == "Male"]["SN"].nunique()

#Number of female players
femalecount = purchase_data[purchase_data["Gender"] == "Female"]["SN"].nunique()

#Number of non_disclosed players
non_disclosed = totalcount - (malecount + femalecount)

#Calculating percentages
perc_male = (malecount/totalcount)*100
perc_female = (femalecount/totalcount)*100
perc_non_disclosed = (non_disclosed/totalcount)*100

#Creating a data frame

gender_demo_df = pd.DataFrame ({"Gender":["Male","Female","Non-disclosed"],
                                           "Number of players":[malecount, femalecount, non_disclosed],
                                           "Percentage of players":[perc_male, perc_female,perc_non_disclosed]})
                                           
gender_demo_df

reset_index = gender_demo_df.set_index("Gender")
reset_index

reset_index.style.format({"Percentage of Players": "{:.2f}%"})
 

Unnamed: 0_level_0,Number of players,Percentage of players
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Male,465,81.1518
Female,100,17.452
Non-disclosed,8,1.39616


# Purchasing Analysis by Gender

In [108]:
 

male_purchase = purchase_data[purchase_data["Gender"] == "Male"]["Price"].count()
female_purchase = purchase_data[purchase_data["Gender"] == "Female"]["Price"].count()
other_purchase = total_purchases - (male_purchase + female_purchase)

ave_price_male = purchase_data[purchase_data["Gender"] == "Male"]['Price'].mean()
ave_price_female = purchase_data[purchase_data["Gender"] == "Female"]['Price'].mean()
ave_price_other = purchase_data[purchase_data["Gender"] == "Non-disclosed"]['Price'].mean()

tot_price_male = purchase_data[purchase_data ["Gender"] == "Male"]['Price'].sum()
tot_price_female = purchase_data[purchase_data["Gender"] == "Female"]['Price'].sum()
tot_price_other = purchase_data[purchase_data["Gender"] == "Non-disclosed"]['Price'].sum()

male_norm = tot_price_male/malecount
female_norm = tot_price_female/femalecount
other_norm = tot_price_other/non_disclosed

gender_purchase_df = pd.DataFrame({"Gender": ["Male", "Female", "Non-Disclosed"], 
                                   "Purchase Count": [male_purchase, female_purchase, other_purchase],
                                    "Average Purchase Price": [ave_price_male,ave_price_female,ave_price_other], 
                                    "Total Purchase Value": [tot_price_male, tot_price_female, tot_price_other],
                                    "Normalized Totals": [male_norm, female_norm, other_norm]}, 
                                  columns = 
                ["Gender", "Purchase Count", "Average Purchase Price", "Total Purchase Value", "Normalized Totals"])
                                        
gender_purchase = gender_purchase_df.set_index("Gender")
gender_purchase.style.format({"Average Purchase Price": "${:.2f}", 
                              "Total Purchase Value": "${:.2f}", "Normalized Totals": "${:.2f}"})

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Totals
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Male,633,$2.95,$1867.68,$4.02
Female,136,$2.82,$382.91,$3.83
Non-Disclosed,11,$nan,$0.00,$0.00


# Age Demographic

In [153]:
#Create bins
bins = [0,10,15,20,25,30,35,40, 45]

#Create list with age ranges
age_ranges = ["<10", "10-14","15-19", "20-24", "25-29", "30-34", "35-39", ">=40"]

# Cut purchase data and place the ages into bins
pd.cut(purchase_data["Age"], bins, labels=age_ranges)

#Create a new column for age range

purchase_data["Age Range"] = pd.cut(purchase_data["Age"], bins, labels= age_ranges)
purchase_data.head()
 
#Players percentage by age range
age_group_percentage = round(purchase_data["Age Range"].value_counts()/player_count ,2)*100
age_group_percentage


# Purchase count by age range
age_group_count = purchase_data.groupby("Age Range")["Item Name"]
age_group_count.count()
 
# Average purchase price by age range
age_group_average = purchase_data.groupby("Age Range")["Price"].mean()
age_group_average

#Total purchase value by age range.
age_group_total = purchase_data.groupby("Age Range")["Price"].sum()
age_group_total

#Normalized totals by age range.
normalized_age_total = age_group_total/player_count
normalized_age_total


# Convert to DataFrame.

age_range_df = pd.DataFrame({"Purchase Count":age_group_count,
                            "Average Purchase Price":age_group_average,
                            "Total Purchase Value": age_group_total,
                            "Normalized Totals": normalized_age_total
})
age_range_df

Unnamed: 0_level_0,Average Purchase Price,Normalized Totals,Purchase Count,Total Purchase Value
Age Range,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
<10,3.019375,0.168621,"(<10, [Darkheart, Butcher of the Champion, Woe...",96.62
10-14,2.873718,0.391187,"(10-14, [Phantomlight, Brimstone, Conqueror Ad...",224.15
15-19,2.873587,0.922757,"(15-19, [Sleepwalker, Mercenary Sabre, Alpha, ...",528.74
20-24,2.959377,1.575236,"(20-24, [Stormbringer, Dark Blade of Ending Mi...",902.61
25-29,2.892368,0.38363,"(25-29, [Interrogator, Blood Blade of the Quee...",219.82
30-34,3.073448,0.311099,"(30-34, [Primitive Blade, Expiration, Warscyth...",178.26
35-39,2.8975,0.222496,"(35-39, [Bone Crushing Silver Skewer, Bonecarv...",127.49
>=40,2.88,0.015079,"(>=40, [Venom Claymore, Suspension, Despair, F...",8.64


# Top Spenders

In [113]:
 # Identify the the top 5 spenders in the game by total purchase value:
 #SN
 #Purchase Count
 #Average Purchase Price
 #Total Purchase Value
        
sn_price= purchase_data.groupby(["SN"])['Price'].sum()
sn_pur = purchase_data.groupby(["SN"])['Price'].count()
sn_users = purchase_data.groupby(["SN"])
avg_sn = round(sn_price/sn_pur,2)

top_sn = pd.DataFrame({"Purchase Count": sn_pur, "Average Purchase Price":avg_sn, "Total Purchase Value":sn_price})
top_sn= top_sn.sort_values("Total Purchase Value", ascending=False)
top_sn= top_sn[["Purchase Count", "Average Purchase Price", "Total Purchase Value"]]

top_sn.reset_index(inplace=True)
top_sn.round(2)
top_sn.head(5)


Unnamed: 0,SN,Purchase Count,Average Purchase Price,Total Purchase Value
0,Undirrala66,5,3.41,17.06
1,Saedue76,4,3.39,13.56
2,Mindimnya67,4,3.18,12.74
3,Haellysu29,3,4.24,12.73
4,Eoda93,3,3.86,11.58


# Most Popular Items

In [138]:
items_purchase_count = purchase_data.groupby(["Item ID", "Item Name"]).count()["Price"].rename("Purchase Count")
items_average_price= purchase_data.groupby(["Item ID", "Item Name"]).mean()["Price"].rename("Average Purchase Price")
items_value_total = purchase_data.groupby(["Item ID", "Item Name"]).sum()["Price"].rename("Total Purchase Value")

# Convert to DataFrame

items_purchased = pd.DataFrame({"Purchase Count":items_purchase_count,
                                   "Item Price":items_average_price,
                                   "Total Purchase Value":items_value_total})

most_popular_items = items_purchased.sort_values("Purchase Count", ascending=False)
most_popular_items.head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Item Price,Purchase Count,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
39,"Betrayal, Whisper of Grieving Widows",2.35,11,25.85
84,Arcane Gem,2.23,11,24.53
31,Trickster,2.07,9,18.63
175,Woeful Adamantite Claymore,1.24,9,11.16
13,Serenity,1.49,9,13.41


# Most profitable Items

In [154]:
items_purchase_count = purchase_data.groupby(["Item ID", "Item Name"]).count()["Price"].rename("Purchase Count")
items_average_price = purchase_data.groupby(["Item ID", "Item Name"]).mean()["Price"].rename("Average Purchase Price")
items_value_total = purchase_data.groupby(["Item ID", "Item Name"]).sum()["Price"].rename("Total Purchase Value")

# Convert to DataFrame
items_purchased = pd.DataFrame({"Purchase Count":items_purchase_count,
                                   "Item Price":items_average_price,
                                   "Total Purchase Value":items_value_total})

#items_purchased.head()
most_profitable_items = items_purchased.sort_values("Total Purchase Value", ascending=False)
most_profitable_items.head(5)


Unnamed: 0_level_0,Unnamed: 1_level_0,Item Price,Purchase Count,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
34,Retribution Axe,4.14,9,37.26
115,Spectral Diamond Doomblade,4.25,7,29.75
32,Orenmir,4.95,6,29.7
103,Singed Scalpel,4.87,6,29.22
107,"Splitter, Foe Of Subtlety",3.61,8,28.88
