In [None]:
import pandas as pd 
import json
import numpy as np

In [385]:
t# Read data file and create dataframe. 

json_path1 = "./purchase_data2.json"
purchase_data = pd.read_json(json_path1)
purchase_df = pd.DataFrame(purchase_data)

# Count players by using unique function

player_count = len(purchase_df["SN"].unique()) 
player_count_table = pd.DataFrame({"Total Players":[player_count]})

print("\nPlayer Count\n")
player_count_table


Player Count



Unnamed: 0,Total Players
0,74


In [386]:
# Calculate asked values by using standard functions, such as, unique, mean, len, sum

number_unique_items = len(purchase_df["Item ID"].unique())   
average_purchase_price = purchase_df["Price"].mean()
total_number_of_purchases = len(purchase_df["Item ID"])
total_revenue = purchase_df["Price"].sum()

# Create table and fill it with required data. Format amounts with '$' sign & 2 decimal places

purchasing_analysis_table = pd.DataFrame({"Number of Unique Items":[number_unique_items],
                                          "Average Price":["$%.2f" % average_purchase_price],
                                          "Number of Purchases":[total_number_of_purchases],
                                          "Total Revenue ":["$%.2f" % total_revenue]})

print("\nPurchasing Analysis (Total)\n")
purchasing_analysis_table


Purchasing Analysis (Total)



Unnamed: 0,Average Price,Number of Purchases,Number of Unique Items,Total Revenue
0,$2.92,78,64,$228.10


In [387]:
# create new dataframe of all players
unique_players_df = purchase_df.drop_duplicates("SN")     

# create new dataframes of each category - unique male players, unique female players, unique other players                                         
unique_male_df = unique_players_df.loc[unique_players_df["Gender"] == "Male", :]  
unique_female_df = unique_players_df.loc[unique_players_df["Gender"] == "Female", :]
unique_other_df = unique_players_df.loc[unique_players_df["Gender"] == "Other / Non-Disclosed", :]

# calculate percentages of male, female, and other players 
percent_male = (len(unique_male_df["SN"])/len(unique_players_df["SN"]))*100
percent_female = (len(unique_female_df["SN"])/len(unique_players_df["SN"]))*100
percent_other = (len(unique_other_df["SN"])/len(unique_players_df["SN"]))*100

#create gender_demographics_table
gender_demographics_table = pd.DataFrame({"Gender":["Male", "Female", "Other / Non-Disclosed"],
                                "Percentage of Players":["%.2f" % percent_male, "%.2f" % percent_female, "%.2f" % percent_other],
                                "Total Count":[len(unique_male_df["SN"]), len(unique_female_df["SN"]), len(unique_other_df["SN"])]})
gender_demographics_table.set_index('Gender', inplace=True)
print("\nGender Demographics \n")
gender_demographics_table


Gender Demographics 



Unnamed: 0_level_0,Percentage of Players,Total Count
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Male,81.08,60
Female,17.57,13
Other / Non-Disclosed,1.35,1


In [388]:
# create new dataframes of each category - male players, female players, other players                                         
male_df = purchase_df.loc[purchase_df["Gender"] == "Male", :]  
female_df = purchase_df.loc[purchase_df["Gender"] == "Female", :]
other_df = purchase_df.loc[purchase_df["Gender"] == "Other / Non-Disclosed", :]

Purchase_Count_male = len(male_df["Item ID"])   
Purchase_Count_female = len(female_df["Item ID"])
Purchase_Count_other = len(other_df["Item ID"])

Average_Purchase_Price_male = male_df["Price"].sum()/Purchase_Count_male
Average_Purchase_Price_female = female_df["Price"].sum()/Purchase_Count_female
Average_Purchase_Price_other = other_df["Price"].sum()/Purchase_Count_other

Total_Purchase_Value_male = male_df["Price"].sum()
Total_Purchase_Value_female = female_df["Price"].sum()
Total_Purchase_Value_other = other_df["Price"].sum()

Normalized_Totals_male = male_df["Price"].mean()
Normalized_Totals_female = female_df["Price"].mean()
Normalized_Totals_other = other_df["Price"].mean()

gender_purchase_table = pd.DataFrame({" ":["Gender", "Female", "Male", "Other / Non-Disclosed"],
                                "Purchase Count":["", Purchase_Count_female, Purchase_Count_male, Purchase_Count_other],
                                "Average Purchase Price":["", "$%.2f" % Average_Purchase_Price_female, "$%.2f" % Average_Purchase_Price_male, "$%.2f" % Average_Purchase_Price_other],
                                "Total Purchase Value":["" , "$%.2f" % Total_Purchase_Value_female, "$%.2f" % Total_Purchase_Value_male, "$%.2f" % Total_Purchase_Value_other],      
                                "Normalized Totals":["" , "$%.2f" % Normalized_Totals_female, "$%.2f" % Normalized_Totals_male, "$%.2f" % Normalized_Totals_other]})
gender_purchase_table.set_index(" ", inplace=True)

print("\nPurchasing Analysis (Gender)\n")
gender_purchase_table


Purchasing Analysis (Gender)



Unnamed: 0,Average Purchase Price,Normalized Totals,Purchase Count,Total Purchase Value
,,,,
Gender,,,,
Female,$3.18,$3.18,13.0,$41.38
Male,$2.88,$2.88,64.0,$184.60
Other / Non-Disclosed,$2.12,$2.12,1.0,$2.12


In [389]:
# Create bins as requested
age_bins = [0,9,14,19,24,29,34,39,150] 

# Create labels for these bins
bin_labels = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]

#Create a reduced_df with relevant data /columns
reduced_df = purchase_df[["Age", "Price"]]

# Slice the data and place it into bins
# Place the data series into a new column inside of the DataFrame

reduced_df["Age Group"] = pd.cut(reduced_df["Age"],age_bins,labels=bin_labels)
Age_Group = reduced_df.groupby("Age Group")

Purchase_Count = Age_Group.size().to_frame(name= "Purchase_Count")          
Average_Purchase_Price = Age_Group["Price"].mean().to_frame(name= "Average_Purchase_Price")   
Total_Purchase_Value = Age_Group["Price"].sum().to_frame(name= "Total_Purchase_Value")     
Normalized_Totals = Age_Group["Price"].sum().to_frame(name= "Normalized_Totals")

Age_Demo_df = pd.concat([Purchase_Count, Average_Purchase_Price, Total_Purchase_Value, Normalized_Totals], axis=1)
Age_Demo_df['Average_Purchase_Price'] = Age_Demo_df['Average_Purchase_Price'].map("${:.2f}".format)
Age_Demo_df['Total_Purchase_Value'] = Age_Demo_df['Total_Purchase_Value'].map("${:.2f}".format)
Age_Demo_df['Normalized_Totals'] = Age_Demo_df['Normalized_Totals'].map("${:.2f}".format)

print("\nPurchasing Analysis (Age)\n")
Age_Demo_df



Purchasing Analysis (Age)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  del sys.path[0]


Unnamed: 0_level_0,Purchase_Count,Average_Purchase_Price,Total_Purchase_Value,Normalized_Totals
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
<10,5,$2.76,$13.82,$13.82
10-14,3,$2.99,$8.96,$8.96
15-19,11,$2.76,$30.41,$30.41
20-24,36,$3.02,$108.89,$108.89
25-29,9,$2.90,$26.11,$26.11
30-34,7,$1.98,$13.89,$13.89
35-39,6,$3.56,$21.37,$21.37
40+,1,$4.65,$4.65,$4.65


In [390]:
SN_Group = purchase_df.groupby("SN")

Purchase_Count = SN_Group.size().to_frame(name= "Purchase_Count")           
Average_Purchase_Price = SN_Group["Price"].mean().to_frame(name= "Average_Purchase_Price")   
Total_Purchase_Value = SN_Group["Price"].sum().to_frame(name= "Total_Purchase_Value")     

SN_df = pd.concat([Purchase_Count, Average_Purchase_Price, Total_Purchase_Value], axis=1)
SN_df['Average_Purchase_Price'] = SN_df['Average_Purchase_Price'].map("${:.2f}".format)
SN_df['Total_Purchase_Value'] = SN_df['Total_Purchase_Value'].map("${:.2f}".format)

SN1_df = pd.DataFrame(SN_df)

# Sort the DataFrame by the values in the "Total_Purchase_Value" column
SN1_df = SN1_df.sort_values("Total_Purchase_Value", ascending=False)

print("\nTop Spenders\n")
SN1_df.head()


Top Spenders



Unnamed: 0_level_0,Purchase_Count,Average_Purchase_Price,Total_Purchase_Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Sundaky74,2,$3.71,$7.41
Aidaira26,2,$2.56,$5.13
Eusty71,1,$4.81,$4.81
Chanirra64,1,$4.78,$4.78
Alarap40,1,$4.71,$4.71


In [391]:
item_group = purchase_df.groupby("Item ID")

Purchase_Count = item_group["Item ID"].size().to_frame(name= "Purchase Count")  
Item_Price = item_group["Price"].mean().to_frame(name= "Item_Price")  
Item_Name = item_group["Item Name"].unique().to_frame(name= "Item Name") 
Total_Purchase_Value = item_group["Price"].sum().to_frame(name= "Total_Purchase_Value")

item_group_df = pd.concat([Item_Name, Purchase_Count, Item_Price, Total_Purchase_Value], axis=1)

item_group_df['Total_Purchase_Value'] = item_group_df['Total_Purchase_Value'].map("${:.2f}".format)
item_group_df['Item_Price'] = item_group_df['Item_Price'].map("${:.2f}".format)

item_group1_df = pd.DataFrame(item_group_df)

# Sort the DataFrame by the values in the "Purchase Count" column
item_group1_df = item_group1_df.sort_values("Purchase Count", ascending=False)

print("\nMost Popular Items\n")
item_group1_df.head()     # 5 most popular items by Purchase Count 



Most Popular Items



Unnamed: 0_level_0,Item Name,Purchase Count,Item_Price,Total_Purchase_Value
Item ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
94,[Mourning Blade],3,$3.64,$10.92
90,[Betrayer],2,$4.12,$8.24
111,[Misery's End],2,$1.79,$3.58
64,[Fusion Pummel],2,$2.42,$4.84
154,[Feral Katana],2,$4.11,$8.22


In [392]:
# Sort the DataFrame by the values in the "Total_Purchase_Value" column
item_group1_df = item_group1_df.sort_values("Total_Purchase_Value", ascending=False)

print("\nMost Profitable Items\n")
item_group1_df.head()     # 5 most profitable items by Total_Purchase_Value


Most Profitable Items



Unnamed: 0_level_0,Item Name,Purchase Count,Item_Price,Total_Purchase_Value
Item ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
117,"[Heartstriker, Legacy of the Light]",2,$4.71,$9.42
93,[Apocalyptic Battlescythe],2,$4.49,$8.98
90,[Betrayer],2,$4.12,$8.24
154,[Feral Katana],2,$4.11,$8.22
180,[Stormcaller],2,$2.77,$5.54
