# Heroes Of Pymoli Data Analysis

1. One of the most significant demographics to purchase items in-game are males with ages ranging from 20 to 24.
2. Males also account for over 81% of in-game purchases.
3. The top 5 items that bring in the most revenue account for less than 7% of total revenue from item purchases. 

In [11]:
# Environment Setup
# ----------------------------------------------------------------
# Dependencies
import csv
import pandas as pd
import random
import numpy as np

# Output File Name
file_output_purchases_json = "generated_data/purchase_data.json"

# Total Number of Players 

In [12]:
players = pd.read_json(file_output_purchases_json)
unique_players = players["SN"].unique()
total_unique_players = len(unique_players)
players_total_dict = [{"Total Players": total_unique_players}]
total_players_summary = pd.DataFrame(players_total_dict)
total_players_summary

Unnamed: 0,Total Players
0,573


 # Purchasing Analysis (Total)

In [13]:
purchase_df = pd.read_json(file_output_purchases_json)
#Number of unique items purhcased
unique_item_count = len(purchase_df["Item ID"].unique())
#Average purchase price
average_item_price = purchase_df["Price"].mean()
#Total number of purchases 
total_purchases = len(purchase_df)
#Total revenue
total_purchase_rev = purchase_df["Price"].sum()
#Purchasing analysis table
purchasing_analysis = pd.DataFrame({"Number of Unique Items": unique_item_count,
                        "Average Price": average_item_price,
                        "Number of Purchases": total_purchases,
                        "Total Revenue": [total_purchase_rev],
})
#Organizing and formatting analysis 
organized_pur_ana = purchasing_analysis[["Number of Unique Items", "Average Price", "Number of Purchases", "Total Revenue"]]
organized_pur_ana["Average Price"] = organized_pur_ana["Average Price"].map("${:.2f}".format)
organized_pur_ana["Total Revenue"] = organized_pur_ana["Total Revenue"].map("${:.2f}".format)
organized_pur_ana



Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,183,$2.93,780,$2286.33


# Gender Demographics

In [14]:
player_gender_df = pd.read_json(file_output_purchases_json)
no_dupe_players = player_gender_df.drop_duplicates("SN")
number_in_set = len(no_dupe_players)
gender_count_raw = no_dupe_players["Gender"].value_counts()
per_gender = ((gender_count_raw / number_in_set)*100)
#Summmary table for gender demographics
gender_summary = pd.DataFrame({"Percentage of players": round(per_gender,2),
                   "Total Count": gender_count_raw
})
gender_summary

Unnamed: 0,Percentage of players,Total Count
Male,81.15,465
Female,17.45,100
Other / Non-Disclosed,1.4,8


# Purchasing Analysis (Gender)

In [15]:
grouped_gender_df = purchase_df.groupby(["Gender"])
purchases_count = grouped_gender_df["Item ID"].count()
avg_gen_pur = grouped_gender_df["Price"].mean()
total_gen_pur = grouped_gender_df["Price"].sum()
drop_gender_df = purchase_df.drop_duplicates(["SN"])
gen_pur_norm = total_gen_pur/drop_gender_df["Gender"].value_counts()
purchasing_gen_df = pd.DataFrame({"Purchase Count": purchases_count,
                                  "Average Purchase Price": avg_gen_pur,
                                  "Total Purchase Value": total_gen_pur,
                                  "Normalized Totals": gen_pur_norm
                                })
purchasing_gen_df["Average Purchase Price"] = purchasing_gen_df["Average Purchase Price"].map("${:.2f}".format)
purchasing_gen_df["Total Purchase Value"] = purchasing_gen_df["Total Purchase Value"].map("${:.2f}".format)
purchasing_gen_df["Normalized Totals"] = purchasing_gen_df["Normalized Totals"].map("${:.2f}".format)
purchasing_gen_org = purchasing_gen_df[["Purchase Count", "Average Purchase Price", "Total Purchase Value", "Normalized Totals"]]
purchasing_gen_org

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Totals
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,136,$2.82,$382.91,$3.83
Male,633,$2.95,$1867.68,$4.02
Other / Non-Disclosed,11,$3.25,$35.74,$4.47


# Purchasing Analysis (Age)

In [16]:
age_demo_df = purchase_df.groupby(["Age"])
age_bins = [0, 9, 14, 19, 24, 29, 34, 39, 100]
age_labels = ["< 10", "10-14","15-19","20-24","25-29","30-34","35-39","40+"]
#Put players into bins based on their age
purchase_df["Age Group"] = pd.cut(purchase_df["Age"],age_bins,labels=age_labels)
age_group = purchase_df.groupby("Age Group")
age_group_total = age_group.count()
age_count = age_group["Age"].count()
age_total = age_group["Price"].sum()
age_avg = age_total / age_count
dupe_age = purchase_df.drop_duplicates(["SN"])
age_pur_norm = age_total/(dupe_age["Age Group"].value_counts())
age_group_analysis = pd.DataFrame({"Purchase Count": age_count,
                                   "Average Purchase Price": age_avg,
                                   "Total Purchase Value": age_total,
                                   "Normalized Totals": age_pur_norm
                                    })
age_group_analysis = age_group_analysis[["Purchase Count","Average Purchase Price","Total Purchase Value","Normalized Totals"]]
age_group_analysis["Average Purchase Price"] = age_group_analysis["Average Purchase Price"].map("${:.2f}".format)
age_group_analysis["Normalized Totals"] = age_group_analysis["Normalized Totals"].map("${:.2f}".format)
age_group_analysis["Total Purchase Value"] = age_group_analysis["Total Purchase Value"].map("${:.2f}".format)
age_group_analysis

Unnamed: 0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Totals
10-14,35,$2.77,$96.95,$4.22
15-19,133,$2.91,$386.42,$3.86
20-24,336,$2.91,$978.77,$3.78
25-29,125,$2.96,$370.33,$4.26
30-34,64,$3.08,$197.25,$4.20
35-39,42,$2.84,$119.40,$4.42
40+,17,$3.16,$53.75,$4.89
< 10,28,$2.98,$83.46,$4.39


# Top Spenders

In [17]:
spenders_df = purchase_df.groupby(["SN"])
spender_count = spenders_df["Item Name"].count()
spender_total = spenders_df["Price"].sum()
spender_avg = spender_total / spender_count
top_spenders_df = pd.DataFrame({"Total Purchases": spender_count,
                    "Average Purchase Value": spender_avg,
                    "Total Purchase Value": spender_total})
spenders_analysis = top_spenders_df[["Total Purchases","Average Purchase Value", "Total Purchase Value"]]
spenders_analysis = spenders_analysis.sort_values(by = "Total Purchase Value", ascending = False)
spenders_analysis["Average Purchase Value"] = spenders_analysis["Average Purchase Value"].map("${:.2f}".format)
spenders_analysis["Total Purchase Value"] = spenders_analysis["Total Purchase Value"].map("${:.2f}".format)
spenders_analysis.head()

Unnamed: 0_level_0,Total Purchases,Average Purchase Value,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Undirrala66,5,$3.41,$17.06
Saedue76,4,$3.39,$13.56
Mindimnya67,4,$3.18,$12.74
Haellysu29,3,$4.24,$12.73
Eoda93,3,$3.86,$11.58


# Most Popular Items

In [18]:
items_df = purchase_df.groupby(["Item ID", "Item Name"])
item_purchase_count = items_df["Item ID"].count()
item_price = items_df["Price"].unique()
item_purchase_total = items_df["Price"].sum()
items_analysis = pd.DataFrame({"Purchase Count": item_purchase_count,
                               "Item Price": item_price.str.get(0),
                               "Total Purchase Value": item_purchase_total
                              })
items_analysis = items_analysis.sort_values(by = "Purchase Count",ascending = False)
#items_analysis = items_analysis[["Purchase Count", "Item Price", "Total Purchase Value"]]
items_analysis["Total Purchase Value"] = items_analysis["Total Purchase Value"].map("${:.2f}".format)
items_analysis["Item Price"] = items_analysis["Item Price"].map("${:.2f}".format)
popular_five = items_analysis.nlargest(5,"Purchase Count")
popular_five

Unnamed: 0_level_0,Unnamed: 1_level_0,Item Price,Purchase Count,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
39,"Betrayal, Whisper of Grieving Widows",$2.35,11,$25.85
84,Arcane Gem,$2.23,11,$24.53
31,Trickster,$2.07,9,$18.63
175,Woeful Adamantite Claymore,$1.24,9,$11.16
13,Serenity,$1.49,9,$13.41


# Most Profitable Items

In [19]:
item_id_df = purchase_df.groupby(["Item ID", "Item Name"])
item_purchase_count = item_id_df["Item ID"].count()
item_price = item_id_df["Price"].unique()
item_purchase_total = item_id_df["Price"].sum()
item_id_analysis = pd.DataFrame({"Purchase Count": item_purchase_count,
                               "Item Price": item_price.str.get(0),
                               "Total Purchase Value": item_purchase_total
                              })
profitable_five_temp = item_id_analysis.nlargest(5,"Total Purchase Value")
profitable_five_temp["Total Purchase Value"] = profitable_five_temp["Total Purchase Value"].map("${:.2f}".format)
profitable_five_temp["Item Price"] = profitable_five_temp["Item Price"].map("${:.2f}".format)
profitable_five_temp

Unnamed: 0_level_0,Unnamed: 1_level_0,Item Price,Purchase Count,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
34,Retribution Axe,$4.14,9,$37.26
115,Spectral Diamond Doomblade,$4.25,7,$29.75
32,Orenmir,$4.95,6,$29.70
103,Singed Scalpel,$4.87,6,$29.22
107,"Splitter, Foe Of Subtlety",$3.61,8,$28.88
