# Heroes Of Pymoli Data Analysis

OBSERVED TREND 1: The normalized total values for women vs. men ($3.83 vs. $4.02) are very close, although male players make up a higher percentage of total players than female (81.15% vs 17.45%).

OBSERVED TREND 2: Players between 20 and 24 years old spend the most ($978.77) among all ages. However, their normalized total value is the lowest ($3.78). Players elder than 40 years old has the highest normalized total value($4.89). 

OBSERVED TREND 3: Item with ID 39 and name "Betrayal, Whisper of Grieving Widows" and item with ID 84 and name "Arcane Gem" are the most popular items, with 11 purchases each. Item with ID 34 and name "Retribution Axe" is the most profitable item, bringing in $37.26 of revenue.

In [1]:
# Import dependencies
import pandas as pd
import numpy as np

#set max rows viewable
pd.options.display.max_rows = 400

# Read the .json file
file_path = "./purchase_data.json"
game_df = pd.read_json(file_path, orient = "columns", encoding="ISO-8859-1")

# Optional: explore the data
# print(game_df.count().head(), "\n", "\n",
# game_df.columns, "\n", "\n",
# game_df["Item ID"].value_counts().head(), "\n", "\n",
# game_df["Item Name"].value_counts().head(), "\n", "\n",
# game_df["SN"].value_counts().head(), "\n", "\n",
# game_df.head())

# Player Count

In [2]:
# Count unique players
total_players = len(game_df["SN"].unique())

# Convert the value into a DataFrame
pd.DataFrame(
    {
        "Total Players":[str(total_players)]
    }
)

Unnamed: 0,Total Players
0,573


# Purchasing Analysis (Total)

In [3]:
# Calculations
number_of_unique_items = len(game_df["Item ID"].unique())
total_revenue = game_df["Price"].sum()
average_purchase_price = total_revenue / len(game_df)
total_number_of_purchases = len(game_df)

# Save results into a dataframe
analysis_total_df = pd.DataFrame(
    {
    "Number of Unique Items" : [str(number_of_unique_items)],
    "Average Purchase Price" : ["$" + ("{:.2f}".format(average_purchase_price))],
    "Total Number of Purchases" : [str(total_number_of_purchases)],
    "Total Revenue" : ["$" + str(total_revenue)]
    }
)

# Arrange columns of the dataframe
analysis_total_df[["Number of Unique Items","Average Purchase Price","Total Number of Purchases","Total Revenue"]]

Unnamed: 0,Number of Unique Items,Average Purchase Price,Total Number of Purchases,Total Revenue
0,183,$2.93,780,$2286.33


# Gender Demographics

In [4]:
# Group the dataframe by players, and convert the data series into a dataframe
sn_group = game_df.groupby(game_df["SN"])
sn_df = pd.DataFrame(sn_group.max())

# Count males and females, and calculate percentage of males and females
gender_df = pd.DataFrame(sn_df["Gender"].value_counts())
gender_df["Percentage of Players"] = gender_df["Gender"] / total_players * 100

# Rename the Gender column
gender_df = gender_df.rename(columns = {"Gender" : "Total Count"})

# Format percentage, and view in the desired column sequence
gender_df["Percentage of Players"] = gender_df["Percentage of Players"].map("{:.2f}".format)

# Arrange columns for viewing
gender_df[["Percentage of Players","Total\xa0Count"]]

Unnamed: 0,Percentage of Players,Total Count
Male,81.15,465
Female,17.45,100
Other / Non-Disclosed,1.4,8


# Purchasing Analysis (Gender)

In [5]:
# Using GroupBy in order to separate the data into fields according to "Gender" values
gender_group = game_df.groupby(["Gender"])
gender_group.count()
purchase_count = gender_group["Price"].count()
purchase_total = gender_group["Price"].sum()

# Creating a new DataFrame using both values and count
gender_analysis = pd.DataFrame(
    {
        "Purchase Count" : purchase_count,
        "Total Purchase Value" : purchase_total
    }
)

# Calculate average purchase price, and normalized totals
gender_analysis["Average Purchase Price"] = purchase_total / purchase_count
gender_analysis["Normalized Totals"] = gender_analysis["Total Purchase Value"]/gender_df["Total\xa0Count"]

# Format columns to two decimals, and add $
gender_analysis["Total Purchase Value"] = gender_analysis["Total Purchase Value"].map("${:.2f}".format)
gender_analysis["Average Purchase Price"] = gender_analysis["Average Purchase Price"].map("${:.2f}".format)
gender_analysis["Normalized Totals"] = gender_analysis["Normalized Totals"].map("${:.2f}".format)

# Arrange columns for viewing
gender_analysis[["Purchase Count", "Average Purchase Price", "Total Purchase Value", "Normalized Totals"]]

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Totals
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,136,$2.82,$382.91,$3.83
Male,633,$2.95,$1867.68,$4.02
Other / Non-Disclosed,11,$3.25,$35.74,$4.47


# Age Demographics

In [6]:
# Find out the mininmum and maximum age
# print(game_df["Age"].min(), game_df["Age"].max())

# Generate bins and labels based on min and max
bins = [0, 9, 14, 19, 24, 29, 34, 39, 46]
group_names = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]

# Cut Age and place the values into bins
age_demo = pd.DataFrame(game_df.groupby("SN")["Age"].mean())
age_demo["Age Summary"] = pd.cut(age_demo["Age"], bins, labels=group_names)

# Using value_counts to get number of players in each age bin
age_demo = pd.DataFrame(age_demo["Age Summary"].value_counts())

# Rename column
age_demo = age_demo.rename(columns = {"Age Summary" : "Total Count"})

# Calculate percentage, format cells to %, arrange column display, and sort dataframe index
age_demo["Percentages of Players"] = age_demo["Total Count"] / age_demo["Total Count"].sum()
age_demo["Percentages of Players"] = (age_demo["Percentages of Players"] *100).map("{:.2f}%".format)
age_demo[["Percentages of Players","Total Count"]]
age_demo.sort_index()

Unnamed: 0,Total Count,Percentages of Players
<10,19,3.32%
10-14,23,4.01%
15-19,100,17.45%
20-24,259,45.20%
25-29,87,15.18%
30-34,47,8.20%
35-39,27,4.71%
40+,11,1.92%


# Purchasing Analysis (Age)

In [7]:
# Find out the mininmum and maximum age
# print(game_df["Age"].min(), game_df["Age"].max())

# Generate bins and labels based on min and max
bins = [0, 9, 14, 19, 24, 29, 34, 39, 46]
group_names = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]

# Cut Age and place the values into bins
game_df["Age Summary"] = pd.cut(game_df["Age"], bins, labels=group_names)
age_group = game_df.groupby("Age Summary")

# Get data series of purchase_count and data frame of total_purchase_value, and create a new dataframe
purchase_count = game_df["Age Summary"].value_counts()
total_purchase_value = age_group["Price"].sum()
age_analysis = pd.DataFrame(
    {
        "Purchase Count" : purchase_count,
        "Total Purchase Value" : total_purchase_value
    }
)

# Calculate average and normalized value:
age_analysis["Average Purchase Price"] = age_analysis["Total Purchase Value"] / age_analysis["Purchase Count"]
age_analysis["Normalized Totals"] = age_analysis["Total Purchase Value"]/ age_demo["Total Count"]

# Format columns to two decimals, and add $
age_analysis["Total Purchase Value"] = age_analysis["Total Purchase Value"].map("${:.2f}".format)
age_analysis["Average Purchase Price"] = age_analysis["Average Purchase Price"].map("${:.2f}".format)
age_analysis["Normalized Totals"] = age_analysis["Normalized Totals"].map("${:.2f}".format)

# Arrange rows and columns for viewing
age_analysis[["Purchase Count", "Average Purchase Price", "Total Purchase Value","Normalized Totals"]]

Unnamed: 0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Totals
10-14,35,$2.77,$96.95,$4.22
15-19,133,$2.91,$386.42,$3.86
20-24,336,$2.91,$978.77,$3.78
25-29,125,$2.96,$370.33,$4.26
30-34,64,$3.08,$197.25,$4.20
35-39,42,$2.84,$119.40,$4.42
40+,17,$3.16,$53.75,$4.89
<10,28,$2.98,$83.46,$4.39


# Top Spenders

In [15]:
# Groupby and get data series of purchase_count and total_purchase_value
purchase_count = game_df["SN"].value_counts()
total_purchase_value = sn_group["Price"].sum()

# Create a new dataframe from data series
top_spenders = pd.DataFrame(
    {
        "Purchase Count" : purchase_count,
        "Total Purchase Value" : total_purchase_value
    }
)

#calculate average
top_spenders["Average Purchase Price"] = total_purchase_value / purchase_count

#format into decimals and add $
top_spenders["Average Purchase Price"] = top_spenders["Average Purchase Price"].map("${:.2f}".format)
top_spenders["Total Purchase Value"] = top_spenders["Total Purchase Value"].map("${:.2f}".format)

# Arrange columns for viewing
top_spenders = top_spenders[["Purchase Count", "Average Purchase Price", "Total Purchase Value"]]

# Sort dataframe by purchase count
top_spenders.sort_values(by = ["Purchase Count"],ascending=False).head()

Unnamed: 0,Purchase Count,Average Purchase Price,Total Purchase Value
Undirrala66,5,$3.41,$17.06
Mindimnya67,4,$3.18,$12.74
Qarwen67,4,$2.49,$9.97
Saedue76,4,$3.39,$13.56
Sondastan54,4,$2.56,$10.24


# Most Popular Items

In [9]:
# Using groupby to get data series of purchase count, item price and total purchase value
item_group = game_df.groupby(["Item ID", "Item Name"])
purchase_count = item_group["Gender"].count()
item_price = item_group["Price"].mean()
total_purchase_value = item_group["Price"].sum()

# Create a dataframe using data series
popular_items = pd.DataFrame(
    {
        "Purchase Count" : purchase_count,
        "Item Price" : item_price,
        "Total Purchase Value" : total_purchase_value
    }
)

# Sort the dataframe by purchase count, and format
popular_items = popular_items.sort_values("Purchase Count", ascending = False)
popular_items["Item Price"] = popular_items["Item Price"].map("${:.2f}".format)
popular_items["Total Purchase Value"] = popular_items["Total Purchase Value"].map("${:.2f}".format)

# Arrange columns for viewing
columns = ["Purchase Count", "Item Price", "Total Purchase Value"]
popular_items[columns].head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
39,"Betrayal, Whisper of Grieving Widows",11,$2.35,$25.85
84,Arcane Gem,11,$2.23,$24.53
31,Trickster,9,$2.07,$18.63
175,Woeful Adamantite Claymore,9,$1.24,$11.16
13,Serenity,9,$1.49,$13.41


# Most Profitable Items

In [10]:
# Using groupby to get data series of purchase count, item price and total purchase value
item_group = game_df.groupby(["Item ID", "Item Name"])
purchase_count = item_group["Gender"].count()
item_price = item_group["Price"].mean()
total_purchase_value = item_group["Price"].sum()

# Create a dataframe using data series
popular_items = pd.DataFrame(
    {
        "Purchase Count" : purchase_count,
        "Item Price" : item_price,
        "Total Purchase Value" : total_purchase_value
    }
)

# Sort the dataframe by total purchase value, and format
popular_items = popular_items.sort_values("Total Purchase Value", ascending = False)
popular_items["Item Price"] = popular_items["Item Price"].map("${:.2f}".format)
popular_items["Total Purchase Value"] = popular_items["Total Purchase Value"].map("${:.2f}".format)

# Arrange columns for viewing
columns = ["Purchase Count", "Item Price", "Total Purchase Value"]
popular_items[columns].head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
34,Retribution Axe,9,$4.14,$37.26
115,Spectral Diamond Doomblade,7,$4.25,$29.75
32,Orenmir,6,$4.95,$29.70
103,Singed Scalpel,6,$4.87,$29.22
107,"Splitter, Foe Of Subtlety",8,$3.61,$28.88
