# Heroes Of Pymoli Data Analysis
Of the 573 active players, the vast majority are male (81.2%). There also exists, a smaller, but notable proportion of female players (17.4%).

Our peak age demographic falls between 20-24 (58.6%), with secondary groups falling between 15-19 (23.2%), and 25-29 (21.2%).

In [1]:
# Dependencies
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [2]:
# Import json into data frame
purchase_data_df = pd.read_json("purchase_data.json")
purchase_data_df.head()

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46
2,34,Male,174,Primitive Blade,2.46,Assastnya25
3,21,Male,92,Final Critic,1.36,Pheusrical25
4,23,Male,63,Stormfury Mace,1.27,Aela59


## Player Count

In [3]:
# Count the total number of players by grouping the unique SN's
players_df = purchase_data_df.groupby("SN")["SN"].unique()
number_of_players = players_df.count() 

# Display number of players in dataframe 
players_df = pd.DataFrame([{ "Number of Players": number_of_players}])
players_df

Unnamed: 0,Number of Players
0,573


## Purchasing Analysis (Total)

In [4]:
# Number of Unique Items
item_count = len(purchase_data_df["Item ID"].unique())

# Average Purchase Price: mean of all purchases
average_price_df = purchase_data_df ["Price"].mean()

# Total Number of Purchases: count of all purchases
total_purchases_df = purchase_data_df["Price"].count()
total_purchases_df

# Total Revenue: sum of all purchases
total_revenue_df = purchase_data_df["Price"].sum()

# Create data frame
purchasing_analysis_df = pd.DataFrame({ "Number of Unique Items" : [item_count],
                                        "Average Price" : [round(average_price_df, 2)],
                                        "Total Number of Purchases" : [total_purchases_df],
                                        "Total Revenue" : [total_revenue_df]})

# Presenting the data  
purchasing_analysis_df ["Average Price"] = purchasing_analysis_df["Average Price"].map("${:,.2f}".format)
purchasing_analysis_df ["Total Number of Purchases"] = purchasing_analysis_df["Total Number of Purchases"].map("{:,}".format)
purchasing_analysis_df ["Total Revenue"] = purchasing_analysis_df["Total Revenue"].map("${:,.2f}".format)
purchasing_analysis_df = purchasing_analysis_df.loc[:,["Number of Unique Items", "Average Price", "Total Number of Purchases", "Total Revenue"]]
purchasing_analysis_df

Unnamed: 0,Number of Unique Items,Average Price,Total Number of Purchases,Total Revenue
0,183,$2.93,780,"$2,286.33"


## Gender Demographics

In [5]:
# Drop duplicate players names 
no_duplicate_players = purchase_data_df.drop_duplicates(['SN'], keep ='last')

# Count and percentage of Male Players
male_df = purchase_data_df.loc[purchase_data_df["Gender"] == "Male",:]
male_count = len(male_df["SN"].unique())
percent_male = round((len(male_df)/len(purchase_data_df)) * 100, 2)

# Count  and percentage of Female Players
female_df = purchase_data_df.loc[purchase_data_df["Gender"] == "Female",:]
female_count = len(female_df["SN"].unique())
percent_female = round((len(female_df)/len(purchase_data_df)) * 100, 2)

# Count and percentage of Other / Non-Disclosed
others_df = purchase_data_df.loc[purchase_data_df["Gender"] == "Other / Non-Disclosed",:]
others_count = len(others_df["SN"].unique())
percent_others = round((len(others_df)/len(purchase_data_df)) * 100, 2)

# Creating a total gender dataframe of counts and percentages
gender_demo_dict = {"Percentage Of Players" : [percent_male, percent_female, percent_others],
                    "Gender" : ["Male","Female","Other/Non-Disclosed"],
                    "Total Count" : [male_count, female_count, others_count]}

# Create as dataframe 
gender_demo_df = pd.DataFrame(gender_demo_dict)

# Present top of the data frame 
gender_demo_df

Unnamed: 0,Gender,Percentage Of Players,Total Count
0,Male,81.15,465
1,Female,17.44,100
2,Other/Non-Disclosed,1.41,8


## Purchasing Analysis (Gender)

In [6]:
# Purchase Count of Males, Females, and Others/ Non-disclosed
male_purchase = len (male_df)
female_purchase = len (female_df)
others_purchase = len (others_df)

# Average Purchase Price of Males, Females, and Others/ Non-disclosed
average_price_male = round((male_df["Price"].sum())/len(male_df["Price"]),2)
average_price_female =round((female_df["Price"].sum())/len(female_df["Price"]),2)
average_price_others = round((others_df["Price"].sum())/len(others_df["Price"]),2)

# Total Purchase Value of Males, Females, and Others/ Non-disclosed
total_value_male = round(male_df["Price"].sum(),2)
total_value_female = round(female_df["Price"].sum(),2)
total_value_others = round(others_df["Price"].sum(),2)

# Normalized Totals of Males, Females, and Others/ Non-disclosed
normalized_male = round((total_value_male/male_count), 2)
normalized_female = round((total_value_female/female_count), 2)
normalized_others = round((total_value_others/others_count), 2)

#Creating a total purchasing analysis (gender) dataframe
purchasing_gender = {"Purchase Count" : [male_purchase, female_purchase, others_purchase],
                     "Gender" : ["Male","Female","Other/Non-Disclosed"],
                     "Average Purchase Price" : [average_price_male, average_price_female, average_price_others],
                     "Total Purchase Value" : [total_value_male, total_value_female,total_value_others ],
                     "Normalized Totals" : [normalized_male, normalized_female, normalized_others ]}

purchasing_gender_df = pd.DataFrame(purchasing_gender)
purchasing_gender_df = purchasing_gender_df.set_index("Gender")

# Presenting the data
purchasing_gender_df["Average Purchase Price"] = purchasing_gender_df["Average Purchase Price"].map("${:,.2f}".format)
purchasing_gender_df["Total Purchase Vale"] = purchasing_gender_df["Total Purchase Value"].map("${:,.2f}".format)
purchasing_gender_df ["Purchase Count"] = purchasing_gender_df["Purchase Count"].map("{:,}".format)
purchasing_gender_df["Normalized Totals"] = purchasing_gender_df["Normalized Totals"].map("${:,.2f}".format)
purchasing_gender_df = purchasing_gender_df.loc[:, ["Purchase Count", "Average Purchase Price", "Total Purchase Value", "Normalized Totals"]]

# Display the Gender Table
purchasing_gender_df

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Totals
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Male,633,$2.95,1867.68,$4.02
Female,136,$2.82,382.91,$3.83
Other/Non-Disclosed,11,$3.25,35.74,$4.47


## Age Demographics

In [7]:
# Establish the bins to put the ages into
age_bins = [0, 9.90, 14.90, 19.90, 24.90, 29.90, 34.90, 39.90, 99999]
group_names = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]

# Categorize the existing players using the age bins
purchase_data_df["Age Ranges"] = pd.cut(purchase_data_df["Age"], age_bins, labels = group_names)

# Calculate the Numbers and Percentages by Age Group
age_demographics_totals = purchase_data_df["Age Ranges"].value_counts()
age_demographics_percents = age_demographics_totals / number_of_players * 100
age_demographics = pd.DataFrame({"Total Count": age_demographics_totals, "Percentage of Players": age_demographics_percents})

# Presenting the data
age_demographics = age_demographics.round(2)

# Display Age Demographics Table
age_demographics.sort_index()

Unnamed: 0,Percentage of Players,Total Count
<10,4.89,28
10-14,6.11,35
15-19,23.21,133
20-24,58.64,336
25-29,21.82,125
30-34,11.17,64
35-39,7.33,42
40+,2.97,17


## Purchasing Analysis (Age)

In [8]:
# Bin the Purchasing Data
purchase_data_df["Age Ranges"] = pd.cut(purchase_data_df["Age"], age_bins, labels=group_names)

# Run basic calculations
age_purchase_total = purchase_data_df.groupby(["Age Ranges"]).sum()["Price"].rename("Total Purchase Value")
age_average = purchase_data_df.groupby(["Age Ranges"]).mean()["Price"].rename("Average Purchase Price")
age_counts = purchase_data_df.groupby(["Age Ranges"]).count()["Price"].rename("Purchase Count")

# Calculate Normalized Purchasing
normalized_total = age_purchase_total / age_demographics["Total Count"]

# Convert to a Purchasing Analysis (age) DataFrame
age_data = pd.DataFrame({"Purchase Count": age_counts, "Average Purchase Price": age_average, "Total Purchase Value": age_purchase_total, "Normalized Totals": normalized_total})

# Presenting the Data
age_data["Average Purchase Price"] = age_data["Average Purchase Price"].map("${:,.2f}".format)
age_data["Total Purchase Value"] = age_data["Total Purchase Value"].map("${:,.2f}".format)
age_data ["Purchase Count"] = age_data["Purchase Count"].map("{:,}".format)
age_data["Normalized Totals"] = age_data["Normalized Totals"].map("${:,.2f}".format)
age_data = age_data.loc[:, ["Purchase Count", "Average Purchase Price", "Total Purchase Value", "Normalized Totals"]]

# Display the Age Table
age_data

Unnamed: 0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Totals
10-14,35,$2.77,$96.95,$2.77
15-19,133,$2.91,$386.42,$2.91
20-24,336,$2.91,$978.77,$2.91
25-29,125,$2.96,$370.33,$2.96
30-34,64,$3.08,$197.25,$3.08
35-39,42,$2.84,$119.40,$2.84
40+,17,$3.16,$53.75,$3.16
<10,28,$2.98,$83.46,$2.98


## Top Spenders: Identify the the top 5 spenders in the game by total purchase value, then list (in a table)

In [9]:
# Basic Calculations
user_total = purchase_data_df.groupby(["SN"]).sum()["Price"]
user_average = purchase_data_df.groupby(["SN"]).mean()["Price"]
user_count = purchase_data_df.groupby(["SN"]).count()["Price"]

# Convert to DataFrame
user_data = pd.DataFrame({"Total Purchase Value": user_total, 
                          "Average Purchase Price": user_average, 
                          "Purchase Count": user_count})

# Display Table
user_sorted = user_data.sort_values("Total Purchase Value", ascending = False)

# Presenting the data 
user_sorted["Average Purchase Price"] = user_sorted["Average Purchase Price"].map("${:,.2f}".format)
user_sorted["Total Purchase Value"] = user_sorted["Total Purchase Value"].map("${:,.2f}".format)
user_sorted = user_sorted.loc[:,["Purchase Count", "Average Purchase Price", "Total Purchase Value"]]

# Display head of the  DataFrame
user_sorted.head()

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Undirrala66,5,$3.41,$17.06
Saedue76,4,$3.39,$13.56
Mindimnya67,4,$3.18,$12.74
Haellysu29,3,$4.24,$12.73
Eoda93,3,$3.86,$11.58


## Most Popular Items: Identify the 5 most popular items by purchase count, then list (in a table)

In [10]:
# Extract item Data
item_data = purchase_data_df.loc[:,["Item ID", "Item Name", "Price"]]

# Perform calculations 
total_item_purchase = item_data.groupby(["Item ID", "Item Name"]).sum()["Price"]
average_item_purchase = item_data.groupby(["Item ID", "Item Name"]).mean()["Price"]
item_count = item_data.groupby(["Item ID", "Item Name"]).count()["Price"]

# Create new DataFrame
item_data_pd = pd.DataFrame({"Total Purchase Value": total_item_purchase, 
                             "Item Price": average_item_purchase, 
                             "Purchase Count": item_count})

# Sort Values
item_data_count_sorted = item_data_pd.sort_values("Purchase Count", ascending = False)

# Present the data
item_data_count_sorted["Item Price"] = item_data_count_sorted["Item Price"].map("${:,.2f}".format)
item_data_count_sorted["Purchase Count"] = item_data_count_sorted["Purchase Count"].map("{:,}".format)
item_data_count_sorted["Total Purchase Value"] = item_data_count_sorted["Total Purchase Value"].map("${:,.2f}".format)
item_popularity = item_data_count_sorted.loc[:,["Purchase Count", "Item Price", "Total Purchase Value"]]

# Display the head of the data 
item_popularity.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
39,"Betrayal, Whisper of Grieving Widows",11,$2.35,$25.85
84,Arcane Gem,11,$2.23,$24.53
31,Trickster,9,$2.07,$18.63
175,Woeful Adamantite Claymore,9,$1.24,$11.16
13,Serenity,9,$1.49,$13.41


## Most Profitable Items

In [11]:
# Item Table (Sorted by Total Purchase Value)
item_total_purchase = item_data_pd.sort_values("Total Purchase Value", ascending = False)

# Presenting the data
item_total_purchase["Item Price"] = item_total_purchase["Item Price"].map("${:,.2f}".format)
item_total_purchase["Purchase Count"] = item_total_purchase["Purchase Count"].map("{:,}".format)
item_total_purchase["Total Purchase Value"] = item_total_purchase["Total Purchase Value"].map("${:,.2f}".format)
item_profitable = item_total_purchase.loc[:,["Purchase Count", "Item Price", "Total Purchase Value"]]

# Display the head of the data 
item_profitable.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
34,Retribution Axe,9,$4.14,$37.26
115,Spectral Diamond Doomblade,7,$4.25,$29.75
32,Orenmir,6,$4.95,$29.70
103,Singed Scalpel,6,$4.87,$29.22
107,"Splitter, Foe Of Subtlety",8,$3.61,$28.88
