Heroes Of Pymoli Data Analysis

1. One of the most significant demographics to purchase items in-game are males with ages ranging from 20 to 24.
2. Males also account for over 81% of in-game purchases.
3. The top 5 items that bring in the most revenue account for less than 7% of total revenue from item purchases. 

In [1]:
# Environment Setup
# ----------------------------------------------------------------
# Dependencies
import csv
import pandas as pd
import random
import numpy as np

# Output File Name
file_output_purchases_json = "generated_data/purchase_data.json"

Total Number of Players 

In [25]:
players = pd.read_json(file_output_purchases_json)
unique_players = players["SN"].unique()
total_unique_players = len(unique_players)
players_total_dict = [{"Total Players": total_unique_players}]
total_players_summary = pd.DataFrame(players_total_dict)
total_players_summary

Unnamed: 0,Total Players
0,573


 Purchasing Analysis (Total)

In [3]:
purchase_df = pd.read_json(file_output_purchases_json)
#Number of unique items purhcased
unique_item_count = len(purchase_df["Item ID"].unique())
#Average purchase price
average_item_price = purchase_df["Price"].mean()
#Total number of purchases 
total_purchases = len(purchase_df)
#Total revenue
total_purchase_rev = purchase_df["Price"].sum()
#Purchasing analysis table
purchasing_analysis = pd.DataFrame({"Number of Unique Items": unique_item_count,
                        "Average Price": average_item_price,
                        "Number of Purchases": total_purchases,
                        "Total Revenue": [total_purchase_rev],
})
#Organizing and formatting analysis 
organized_pur_ana = purchasing_analysis[["Number of Unique Items", "Average Price", "Number of Purchases", "Total Revenue"]]
organized_pur_ana["Average Price"] = organized_pur_ana["Average Price"].map("${:.2f}".format)
organized_pur_ana["Total Revenue"] = organized_pur_ana["Total Revenue"].map("${:.2f}".format)
organized_pur_ana



Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,183,$2.93,780,$2286.33


Gender Demographics

In [41]:
player_gender_df = pd.read_json(file_output_purchases_json)
number_in_set = len(player_gender_df)
gender_count_raw = player_gender_df["Gender"].value_counts()
per_gender = ((gender_count_raw / number_in_set)*100)
#Summmary table for gender demographics
gender_summary = pd.DataFrame({"Percentage of players": round(per_gender,2),
                   "Total Count": gender_count_raw
})
gender_summary

Unnamed: 0,Percentage of players,Total Count
Male,81.15,633
Female,17.44,136
Other / Non-Disclosed,1.41,11


Purchasing Analysis (Gender)

In [5]:
grouped_gender_df = purchase_df.groupby(["Gender"])
grouped_gender_df
purchase_counts = grouped_gender_df["Item ID"].count()
avg_gen_pur = grouped_gender_df["Price"].mean()
total_gen_pur = grouped_gender_df["Price"].sum()
gen_pur_norm = total_gen_pur/(purchase_counts.sum())
purchasing_gen_df = pd.DataFrame({"Purchase Count": purchase_counts,
                                  "Average Purchase Price": avg_gen_pur,
                                  "Total Purchase Value": total_gen_pur,
                                  "Normalized Totals": gen_pur_norm
                                })
purchasing_gen_df["Average Purchase Price"] = purchasing_gen_df["Average Purchase Price"].map("${:.2f}".format)
purchasing_gen_df["Total Purchase Value"] = purchasing_gen_df["Total Purchase Value"].map("${:.2f}".format)
purchasing_gen_df["Normalized Totals"] = purchasing_gen_df["Normalized Totals"].map("${:.2f}".format)
purchasing_gen_org = purchasing_gen_df[["Purchase Count", "Average Purchase Price", "Total Purchase Value", "Normalized Totals"]]
purchasing_gen_org

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Totals
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,136,$2.82,$382.91,$0.49
Male,633,$2.95,$1867.68,$2.39
Other / Non-Disclosed,11,$3.25,$35.74,$0.05


Purchasing Analysis (Age)

In [6]:
age_demo_df = purchase_df.groupby(["Age"])
age_bins = [0, 9, 14, 19, 24, 29, 34, 39, 100]
age_labels = ["< 10", "10-14","15-19","20-24","25-29","30-34","35-39","40+"]
#Put players into bins based on their age
purchase_df["Age Group"] = pd.cut(purchase_df["Age"],age_bins,labels=age_labels)
age_group = purchase_df.groupby("Age Group")
age_group_total = age_group.count()
age_count = age_group["Age"].count()
age_total = age_group["Price"].sum()
age_avg = age_total / age_count
age_pur_norm = age_total/(age_count.sum())
age_group_analysis = pd.DataFrame({"Purchase Count": age_count,
                                   "Average Purchase Price": age_avg,
                                   "Total Purchase Value": age_total,
                                   "Normalized Totals": age_pur_norm
                                    })
age_group_analysis = age_group_analysis[["Purchase Count","Average Purchase Price","Total Purchase Value","Normalized Totals"]]
age_group_analysis["Average Purchase Price"] = age_group_analysis["Average Purchase Price"].map("${:.2f}".format)
age_group_analysis["Normalized Totals"] = age_group_analysis["Normalized Totals"].map("${:.2f}".format)
age_group_analysis["Total Purchase Value"] = age_group_analysis["Total Purchase Value"].map("${:.2f}".format)
age_group_analysis.head()

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Totals
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
< 10,28,$2.98,$83.46,$0.11
10-14,35,$2.77,$96.95,$0.12
15-19,133,$2.91,$386.42,$0.50
20-24,336,$2.91,$978.77,$1.25
25-29,125,$2.96,$370.33,$0.47


Top Spenders

In [7]:
spenders_df = purchase_df.groupby(["SN"])
spender_count = spenders_df["Item Name"].count()
spender_total = spenders_df["Price"].sum()
spender_avg = spender_total / spender_count
top_spenders_df = pd.DataFrame({"Total Purchases": spender_count,
                    "Average Purchase Value": spender_avg,
                    "Total Purchase Value": spender_total})
spenders_analysis = top_spenders_df[["Total Purchases","Average Purchase Value", "Total Purchase Value"]]
spenders_analysis = spenders_analysis.sort_values(by = "Total Purchase Value", ascending = False)
spenders_analysis["Average Purchase Value"] = spenders_analysis["Average Purchase Value"].map("${:.2f}".format)
spenders_analysis["Total Purchase Value"] = spenders_analysis["Total Purchase Value"].map("${:.2f}".format)
spenders_analysis.head()

Unnamed: 0_level_0,Total Purchases,Average Purchase Value,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Undirrala66,5,$3.41,$17.06
Saedue76,4,$3.39,$13.56
Mindimnya67,4,$3.18,$12.74
Haellysu29,3,$4.24,$12.73
Eoda93,3,$3.86,$11.58


Most Popular Items

In [8]:
items_df = purchase_df.groupby(["Item ID", "Item Name"])
item_purchase_count = items_df["Item ID"].count()
item_price = items_df["Price"].unique()
item_purchase_total = items_df["Price"].sum()
items_analysis = pd.DataFrame({"Purchase Count": item_purchase_count,
                               "Item Price": item_price,
                               "Total Purchase Value": item_purchase_total
                              })
items_analysis = items_analysis.sort_values(by = "Purchase Count",ascending = False)
items_analysis = items_analysis[["Purchase Count", "Item Price", "Total Purchase Value"]]
items_analysis["Total Purchase Value"] = items_analysis["Total Purchase Value"].map("${:.2f}".format)
items_analysis.head()
#items_analysis.dtypes

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
39,"Betrayal, Whisper of Grieving Widows",11,[2.35],$25.85
84,Arcane Gem,11,[2.23],$24.53
31,Trickster,9,[2.07],$18.63
175,Woeful Adamantite Claymore,9,[1.24],$11.16
13,Serenity,9,[1.49],$13.41


Most Profitable Items

In [9]:
items_df = purchase_df.groupby(["Item ID", "Item Name"])
item_purchase_count = items_df["Item ID"].count()
item_price = items_df["Price"].unique()
item_purchase_total = items_df["Price"].sum()
items_analysis = pd.DataFrame({"Purchase Count": item_purchase_count,
                               "Item Price": item_price,
                               "Total Purchase Value": item_purchase_total
                              })
items_analysis = items_analysis.sort_values(by = "Total Purchase Value",ascending = False)
items_analysis = items_analysis[["Purchase Count", "Item Price", "Total Purchase Value"]]
items_analysis["Total Purchase Value"] = items_analysis["Total Purchase Value"].map("${:.2f}".format)
items_analysis.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
34,Retribution Axe,9,[4.14],$37.26
115,Spectral Diamond Doomblade,7,[4.25],$29.75
32,Orenmir,6,[4.95],$29.70
103,Singed Scalpel,6,[4.87],$29.22
107,"Splitter, Foe Of Subtlety",8,[3.61],$28.88
