# Heroes Of Pymoli Data Analysis

OBSERVED TREND 1: The percentage of female and male in players (17.45% and 81.15%) is similar to the percentage of purchases made by female and male(16.74 and 81.69%).

In [None]:
OBSERVED TREND 2: Item with ID 39 and name "Betrayal, Whisper of Grieving Widows" and item with ID 84 and name "Arcane Gem" are the most popular items, with 11 purchases each.  

In [None]:
OBSERVED TREND 3: Item with ID 34 and name "Retribution Axe" is the most profitable item, bringing in $37.26 revenue.

In [1]:
# Import dependencies
import pandas as pd
import numpy as np

#set max rows viewable
pd.options.display.max_rows = 400

# Read the .json file
file_path = "./purchase_data.json"
game_df = pd.read_json(file_path, orient = "columns", encoding = "UTF-8")

# Optional: explore the data
# print(game_df.count().head(), "\n", "\n",
# game_df.columns, "\n", "\n",
# game_df["Item ID"].value_counts().head(), "\n", "\n",
# game_df["Item Name"].value_counts().head(), "\n", "\n",
# game_df["SN"].value_counts().head(), "\n", "\n",
# game_df.head())

# Player Count

In [2]:
# Count unique players
total_players = len(game_df["SN"].unique())

# Convert the value into a DataFrame
pd.DataFrame(
    {
        "Total Players":[str(total_players)]
    }
)

Unnamed: 0,Total Players
0,573


# Purchasing Analysis (Total)

In [3]:
# Calculations
number_of_unique_items = len(game_df["Item ID"].unique())
total_revenue = game_df["Price"].sum()
average_purchase_price = total_revenue / len(game_df)
total_number_of_purchases = len(game_df)

# Save results into a dataframe
analysis_total_df = pd.DataFrame(
    {
    "Number of Unique Items" : [str(number_of_unique_items)],
    "Average Purchase Price" : ["$" + ("{:.2f}".format(average_purchase_price))],
    "Total Number of Purchases" : [str(total_number_of_purchases)],
    "Total Revenue" : ["$" + str(total_revenue)]
    }
)

# Arrange columns of the dataframe
analysis_total_df[["Number of Unique Items","Average Purchase Price","Total Number of Purchases","Total Revenue"]]

Unnamed: 0,Number of Unique Items,Average Purchase Price,Total Number of Purchases,Total Revenue
0,183,$2.93,780,$2286.33


# Gender Demographics

In [4]:
# Group the dataframe by players, and convert the data series into a dataframe
sn_group = game_df.groupby(game_df["SN"])
sn_df = pd.DataFrame(sn_group.max())

# Count males and females, and calculate percentage of males and females
gender_df = pd.DataFrame(sn_df["Gender"].value_counts())
gender_df["Percentage of Players"] = gender_df["Gender"] / total_players * 100

# Rename the Gender column
gender_df = gender_df.rename(columns = {"Gender" : "Total Count"})

# Format percentage, and view in the desired column sequence
gender_df["Percentage of Players"] = gender_df["Percentage of Players"].map("{:.2f}".format)

# Arrange columns for viewing
gender_df[["Percentage of Players","Total\xa0Count"]]

Unnamed: 0,Percentage of Players,Total Count
Male,81.15,465
Female,17.45,100
Other / Non-Disclosed,1.4,8


# Purchasing Analysis (Gender)

In [5]:
# Using GroupBy in order to separate the data into fields according to "Gender" values
gender_group = game_df.groupby(["Gender"])
gender_group.count()
purchase_count = gender_group["Price"].count()
purchase_total = gender_group["Price"].sum()

# Creating a new DataFrame using both values and count
gender_analysis = pd.DataFrame(
    {
        "Purchase Count" : purchase_count,
        "Total Purchase Value" : purchase_total
    }
)

# Calculate average purchase price, and normalized totals
gender_analysis["Average Purchase Price"] = purchase_total / purchase_count
a = gender_analysis["Total Purchase Value"]
gender_analysis["Normalized Totals"] = (a-a.min())/(a.max()-a.min())

# Format columns to two decimals, and add $
gender_analysis["Total Purchase Value"] = gender_analysis["Total Purchase Value"].map("${:.2f}".format)
gender_analysis["Average Purchase Price"] = gender_analysis["Average Purchase Price"].map("${:.2f}".format)
gender_analysis["Normalized Totals"] = gender_analysis["Normalized Totals"].map("{:.2f}".format)

# Arrange columns for viewing
gender_analysis[["Purchase Count", "Average Purchase Price", "Total Purchase Value", "Normalized Totals"]]

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Totals
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,136,$2.82,$382.91,0.19
Male,633,$2.95,$1867.68,1.0
Other / Non-Disclosed,11,$3.25,$35.74,0.0


# Age Demographics

In [6]:
# Find out the mininmum and maximum age
# print(game_df["Age"].min(), game_df["Age"].max())

# Generate bins and labels based on min and max
bins = []
for i in range(1,13):
    bins.append(4 * i)
group_names = ["04-08", "08-12", "12-16", "16-20", "20-24", "24-28", "28-32", "32-36", "36-40", "40-44", "44-48"]

# Cut Age and place the values into bins
game_df["Age Summary"] = pd.cut(game_df["Age"], bins, labels=group_names)
age_group = game_df.groupby("Age Summary")

# Get data series of purchase_count and data frame of total_purchase_value, and create a new dataframe
purchase_count = game_df["Age Summary"].value_counts()
total_purchase_value = age_group["Price"].sum()
age_demo = pd.DataFrame(
    {
        "Purchase Count" : purchase_count,
        "Total Purchase Value" : total_purchase_value
    }
)

# Calculate average and normalized value:
age_demo["Average Purchase Price"] = age_demo["Total Purchase Value"] / age_demo["Purchase Count"]
a = age_demo["Total Purchase Value"]
age_demo["Normalized Totals"] = (a-a.min())/(a.max()-a.min())

# Format columns to two decimals, and add $
age_demo["Total Purchase Value"] = age_demo["Total Purchase Value"].map("${:.2f}".format)
age_demo["Average Purchase Price"] = age_demo["Average Purchase Price"].map("${:.2f}".format)
age_demo["Normalized Totals"] = age_demo["Normalized Totals"].map("{:.2f}".format)

# Arrange rows and columns for viewing
age_demo[["Purchase Count", "Average Purchase Price", "Total Purchase Value","Normalized Totals"]]

Unnamed: 0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Totals
04-08,22,$2.79,$61.34,0.08
08-12,24,$3.39,$81.25,0.11
12-16,87,$2.75,$238.89,0.34
16-20,161,$2.91,$468.03,0.67
20-24,238,$2.92,$696.09,1.0
24-28,104,$2.97,$309.37,0.44
28-32,66,$3.06,$202.09,0.29
32-36,38,$2.98,$113.28,0.16
36-40,37,$2.90,$107.35,0.15
40-44,2,$2.96,$5.92,0.0


# Top Spenders

In [19]:
# Groupby and get data series of purchase_count and total_purchase_value
purchase_count = game_df["SN"].value_counts()
total_purchase_value = sn_group["Price"].sum()

# Create a new dataframe from data series
top_spenders = pd.DataFrame(
    {
        "Purchase Count" : purchase_count,
        "Total Purchase Value" : total_purchase_value
    }
)

#calculate average
top_spenders["Average Purchase Price"] = total_purchase_value / purchase_count

#format into decimals and add $
top_spenders["Average Purchase Price"] = top_spenders["Average Purchase Price"].map("${:.2f}".format)
top_spenders["Total Purchase Value"] = top_spenders["Total Purchase Value"].map("${:.2f}".format)

# Arrange columns for viewing
top_spenders[["Purchase Count", "Average Purchase Price", "Total Purchase Value"]]

# Sort dataframe by purchase count
top_spenders.sort_values(by = ["Purchase Count"],ascending=False)

Unnamed: 0,Purchase Count,Total Purchase Value,Average Purchase Price
Undirrala66,5,$17.06,$3.41
Mindimnya67,4,$12.74,$3.18
Qarwen67,4,$9.97,$2.49
Saedue76,4,$13.56,$3.39
Sondastan54,4,$10.24,$2.56
Hailaphos89,4,$5.87,$1.47
Yadanun74,3,$9.09,$3.03
Lirtosia72,3,$8.37,$2.79
Seorithstilis90,3,$8.39,$2.80
Eusri70,3,$10.55,$3.52


# Most Popular Items

In [8]:
# Using groupby to get data series of purchase count, item price and total purchase value
item_group = game_df.groupby(["Item ID", "Item Name"])
purchase_count = item_group["Gender"].count()
item_price = item_group["Price"].mean()
total_purchase_value = item_group["Price"].sum()

# Create a dataframe using data series
popular_items = pd.DataFrame(
    {
        "Purchase Count" : purchase_count,
        "Item Price" : item_price,
        "Total Purchase Value" : total_purchase_value
    }
)

# Sort the dataframe by purchase count, and format
popular_items = popular_items.sort_values("Purchase Count", ascending = False)
popular_items["Item Price"] = popular_items["Item Price"].map("${:.2f}".format)
popular_items["Total Purchase Value"] = popular_items["Total Purchase Value"].map("${:.2f}".format)

# Arrange columns for viewing
columns = ["Purchase Count", "Item Price", "Total Purchase Value"]
popular_items[columns].head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
39,"Betrayal, Whisper of Grieving Widows",11,$2.35,$25.85
84,Arcane Gem,11,$2.23,$24.53
31,Trickster,9,$2.07,$18.63
175,Woeful Adamantite Claymore,9,$1.24,$11.16
13,Serenity,9,$1.49,$13.41


# Most Profitable Items

In [9]:
# Using groupby to get data series of purchase count, item price and total purchase value
item_group = game_df.groupby(["Item ID", "Item Name"])
purchase_count = item_group["Gender"].count()
item_price = item_group["Price"].mean()
total_purchase_value = item_group["Price"].sum()

# Create a dataframe using data series
popular_items = pd.DataFrame(
    {
        "Purchase Count" : purchase_count,
        "Item Price" : item_price,
        "Total Purchase Value" : total_purchase_value
    }
)

# Sort the dataframe by total purchase value, and format
popular_items = popular_items.sort_values("Total Purchase Value", ascending = False)
popular_items["Item Price"] = popular_items["Item Price"].map("${:.2f}".format)
popular_items["Total Purchase Value"] = popular_items["Total Purchase Value"].map("${:.2f}".format)

# Arrange columns for viewing
columns = ["Purchase Count", "Item Price", "Total Purchase Value"]
popular_items[columns].head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
34,Retribution Axe,9,$4.14,$37.26
115,Spectral Diamond Doomblade,7,$4.25,$29.75
32,Orenmir,6,$4.95,$29.70
103,Singed Scalpel,6,$4.87,$29.22
107,"Splitter, Foe Of Subtlety",8,$3.61,$28.88
