In [225]:
import pandas as pd

purchase_df = pd.read_json('purchase_data.json')

purchase_df.head()

# Player Count

In [227]:
total_players = purchase_df["SN"].nunique()
total_players_df = pd.DataFrame({"Total Players" : [total_players]})
total_players_df

Unnamed: 0,Total Players
0,573


# Purchasing Analysis (Total)

In [228]:
unique_items = purchase_df["Item ID"].nunique()
average_price = purchase_df["Price"].mean()
total_purchases = purchase_df["Item ID"].count()
total_revenue = purchase_df["Price"].sum()

purchase_analysis_df = pd.DataFrame({"Number of Unique Items" : [unique_items]
                                     ,"Average Price" : [average_price]
                                     ,"Number of Purchases" : [total_purchases]
                                     ,"Total Revenue" : [total_revenue]
                                     })
purchase_analysis_df["Average Price"] = purchase_analysis_df["Average Price"].map("${:,.2f}".format)
purchase_analysis_df["Total Revenue"] = purchase_analysis_df["Total Revenue"].map("${:,.2f}".format)
purchase_analysis_df = purchase_analysis_df[["Number of Unique Items", "Average Price", "Number of Purchases", "Total Revenue"]]
purchase_analysis_df


Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,183,$2.93,780,"$2,286.33"


In [249]:
grouped_gender = purchase_df.groupby('Gender').nunique()
grouped_gender["pct_gender"] = (grouped_gender["SN"]/grouped_gender["SN"].sum())*100

gender_demographics = grouped_gender[["pct_gender", "SN"]]
gender_demographics["pct_gender"] = gender_demographics["pct_gender"].map("{0:.2f}".format)
gender_demographics.columns = ["Percent of Players", "Total Count"]
gender_demographics

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0_level_0,Percent of Players,Total Count
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,17.45,100
Male,81.15,465
Other / Non-Disclosed,1.4,8


# Purchase Analysis (Gender)

In [231]:
purchase_by_gender_df = purchase_df.groupby("Gender").count()

purchase_by_gender_df["Average Purchase Price"] = purchase_df.groupby("Gender")["Price"].mean()
purchase_by_gender_df["Total Purchase Value"] = purchase_df.groupby("Gender")["Price"].sum()
purchase_by_gender_df["Normalized Totals"] = purchase_df.groupby("Gender")["Price"].sum()/purchase_df.groupby("Gender")["SN"].nunique()

purchase_by_gender_renamed_df = purchase_by_gender_df.rename(columns = {"Item ID" : "Purchase Count"})

purchase_by_gender_renamed_df["Average Purchase Price"] = purchase_by_gender_renamed_df["Average Purchase Price"].map("${:,.2f}".format)
purchase_by_gender_renamed_df["Total Purchase Value"] = purchase_by_gender_renamed_df["Total Purchase Value"].map("${:,.2f}".format)
purchase_by_gender_renamed_df["Normalized Totals"] = purchase_by_gender_renamed_df["Normalized Totals"].map("${:,.2f}".format)

purchase_analysis_by_gender = purchase_by_gender_renamed_df[["Purchase Count", 
                                                             "Average Purchase Price", 
                                                             "Total Purchase Value", 
                                                             "Normalized Totals"]]
purchase_analysis_by_gender



Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Totals
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,136,$2.82,$382.91,$3.83
Male,633,$2.95,"$1,867.68",$4.02
Other / Non-Disclosed,11,$3.25,$35.74,$4.47


# Age Demographics

In [232]:
min_age = purchase_df["Age"].min()
min_age
max_age = purchase_df["Age"].max()
max_age

bins = [0, 9, 14, 19, 24, 29, 34, 39, 47]

group_names = ['<10', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39', '40+']
age_binning = pd.cut(purchase_df["Age"], bins, labels=group_names)
purchase_df['Age Group'] = age_binning
age_grouped = purchase_df.groupby("Age Group")
age_demographics_group = age_grouped.nunique()
age_demographics_group ["Percentage of Players"] = (purchase_df.groupby("Age Group")["SN"].nunique()/age_demographics_group["SN"].sum()) * 100
age_demographics_renamed = age_demographics_group.rename(columns = {"SN": "Total Count"})
age_demographics_renamed["Percentage of Players"] = age_demographics_renamed["Percentage of Players"].map("{:,.2f}".format)
age_demographics = age_demographics_renamed[["Percentage of Players", "Total Count"]]
age_demographics


Unnamed: 0_level_0,Percentage of Players,Total Count
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1
<10,3.32,19
10-14,4.01,23
15-19,17.45,100
20-24,45.2,259
25-29,15.18,87
30-34,8.2,47
35-39,4.71,27
40+,1.92,11


# Purchase Analysis (Age)

In [233]:
purchase_age = purchase_df.groupby("Age Group").count()
purchase_age["Average Purchase Price"] = purchase_df.groupby("Age Group")["Price"].mean()
purchase_age["Total Purchase Value"] = purchase_df.groupby("Age Group")["Price"].sum()
purchase_age["Normalized Totals"] = purchase_df.groupby("Age Group")["Price"].sum()/purchase_df.groupby("Age Group")["SN"].nunique()

purchase_age_renamed_df = purchase_age.rename(columns = {"Item ID" : "Purchase Count"})

purchase_age_renamed_df["Average Purchase Price"] = purchase_age_renamed_df["Average Purchase Price"].map("${:,.2f}".format)
purchase_age_renamed_df["Total Purchase Value"] = purchase_age_renamed_df["Total Purchase Value"].map("${:,.2f}".format)
purchase_age_renamed_df["Normalized Totals"] = purchase_age_renamed_df["Normalized Totals"].map("${:,.2f}".format)


purchase_analysis_by_age = purchase_age_renamed_df[["Purchase Count", 
                                                             "Average Purchase Price", 
                                                             "Total Purchase Value", 
                                                             "Normalized Totals"]]
purchase_analysis_by_age

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Totals
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
<10,28,$2.98,$83.46,$4.39
10-14,35,$2.77,$96.95,$4.22
15-19,133,$2.91,$386.42,$3.86
20-24,336,$2.91,$978.77,$3.78
25-29,125,$2.96,$370.33,$4.26
30-34,64,$3.08,$197.25,$4.20
35-39,42,$2.84,$119.40,$4.42
40+,17,$3.16,$53.75,$4.89


# Top Spenders

In [255]:
spender_by_sn = purchase_df.groupby(["SN"])

item_count = spender_by_sn["Item ID"].count()
avg_price = spender_by_sn["Price"].mean()
total_value = spender_by_sn["Price"].sum()

spender_combine = pd.DataFrame({"Purchase Count" : item_count
                              ,"Average Purchase Price": avg_price
                              ,"Total Purchase Value": total_value})


spender_combine["Average Purchase Price"] = spender_combine["Average Purchase Price"].map("${:,.2f}".format)
spender_combine["Total Purchase Value"] = spender_combine["Total Purchase Value"].map("${:,.2f}".format)

# spender_combine

spender_sort = spender_combine.sort_values("Total Purchase Value", ascending=False) 
spender_sort.head()
spender_df = spender_sort.reset_index(drop=0)
spender_5_df = spender_sort.iloc[0:5]

spender_5_df_final = spender_5_df[["Purchase Count", "Average Purchase Price" , "Total Purchase Value"]]

spender_5_df_final


Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Qarwen67,4,$2.49,$9.97
Sondim43,3,$3.13,$9.38
Tillyrin30,3,$3.06,$9.19
Lisistaya47,3,$3.06,$9.19
Tyisriphos58,2,$4.59,$9.18


# Most Popular Items

In [239]:
item_by_id = purchase_df.groupby(["Item ID", "Item Name"])

item_count = item_by_id["Item ID"].count()
purchase_price = item_by_id["Price"].max()
total_value = item_by_id["Price"].sum()

popular_items_combine = pd.DataFrame({"Purchase Count" : item_count
                              ,"Item Price": purchase_price
                              ,"Total Purchase Value": total_value})

popular_items_combine["Item Price"] = popular_items_combine["Item Price"].map("${:,.2f}".format)
popular_items_combine["Total Purchase Value"] = popular_items_combine["Total Purchase Value"].map("${:,.2f}".format)


popular_items_sort = popular_items_combine.sort_values("Purchase Count", ascending=False)
# popular_items = popular_items_sort.reset_index(drop=0)
popular_5_items = popular_items_sort.iloc[0:5]


popular_5_items_final = popular_5_items[["Purchase Count", "Item Price" , "Total Purchase Value"]]

popular_5_items_final


Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
39,"Betrayal, Whisper of Grieving Widows",11,$2.35,$25.85
84,Arcane Gem,11,$2.23,$24.53
31,Trickster,9,$2.07,$18.63
175,Woeful Adamantite Claymore,9,$1.24,$11.16
13,Serenity,9,$1.49,$13.41


# Most Profitable Items

In [247]:
item_by_profit = purchase_df.groupby(["Item ID", "Item Name"])

item_count = item_by_profit["Item ID"].count()
item_price = item_by_profit["Price"].max()
total_value = item_by_profit["Price"].sum()

item_by_profit_combine = pd.DataFrame({"Purchase Count" : item_count
                              , "Item Price": item_price
                              ,"Total Purchase Value": total_value})


item_by_profit_combine["Item Price"] = item_by_profit_combine["Item Price"].map("${:,.2f}".format)
item_by_profit_combine["Total Purchase Value"] = item_by_profit_combine["Total Purchase Value"].map("${:,.2f}".format)


profit_sort = item_by_profit_combine.sort_values("Total Purchase Value", ascending=False)
# spender_df = spender_sort.reset_index(drop=0)
profit_5_df = profit_sort.iloc[0:5]

profit_5_df_final = profit_5_df[["Purchase Count", "Item Price" , "Total Purchase Value"]]

profit_5_df_final


Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
170,Shadowsteel,5,$1.98,$9.90
21,Souleater,3,$3.27,$9.81
37,"Shadow Strike, Glory of Ending Hope",5,$1.93,$9.65
127,"Heartseeker, Reaver of Souls",3,$3.21,$9.63
120,Agatha,5,$1.91,$9.55
