In [2]:
import pandas as pd
import numpy as np
import os

In [3]:
file_to_load = os.path.join("../HeroesofPymoli", "purchase_data.json")

In [4]:
df = pd.read_json(file_to_load, orient='records')
df.head()

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46
2,34,Male,174,Primitive Blade,2.46,Assastnya25
3,21,Male,92,Final Critic,1.36,Pheusrical25
4,23,Male,63,Stormfury Mace,1.27,Aela59


In [5]:
# total player count
player_demographics = df.loc[:, ["Gender", "Age", "SN"]]
player_demographics = player_demographics.drop_duplicates()
player_count = player_demographics.count()[0]

pd.DataFrame({"Total Players":[player_count]})

Unnamed: 0,Total Players
0,573


# Purchasing Analysis

In [6]:
# unique item count
unique_item_count = len(df['Item ID'].unique())

# average purchase price
average_purchase_price = df['Price'].mean()

# total number of purchase
total_purchase_count = df.count()[0]

# total revenue
total_revenue = df['Price'].sum()

# convert to purchasing analysis dataframe
purchasing_summary = pd.DataFrame({"Total Item Count":[unique_item_count], "Average Purchase Price":[average_purchase_price],
                 "Total Purchase Count":[total_purchase_count], "Total Revenue":[total_revenue]})

# quick data munging
purchasing_summary = round(purchasing_summary, 2)
purchasing_summary["Average Purchase Price"] = purchasing_summary["Average Purchase Price"].map("${:,.2f}".format)
purchasing_summary["Total Revenue"] = purchasing_summary["Total Revenue"].map("${:,.2f}".format)

# # printed out, non-truncated data
# purchasing_analysis = (
#     f"---------------------------------"
#     f"\nTotal item count: {unique_item_count}"
#     f"\nAverage purchase price: {average_purchase_price}"
#     f"\nTotal purchase count: {total_purchase_count}"
#     f"\nTotal revenue: {total_revenue}"
#     f"\n---------------------------------"
# )

# print(purchasing_analysis)

# in dataframe format, truncated
purchasing_summary

Unnamed: 0,Average Purchase Price,Total Item Count,Total Purchase Count,Total Revenue
0,$2.93,183,780,"$2,286.33"


# Gender Demographics

In [7]:
gender_demographics_totals = player_demographics["Gender"].value_counts()
gender_demographics_percents = gender_demographics_totals / player_count * 100
gender_summary = pd.DataFrame({"Total Count": gender_demographics_totals, "Percentage of Players": gender_demographics_percents})

# quick data munging
gender_summary = round(gender_summary, 2)
gender_summary['Percentage of Players'] = gender_summary['Percentage of Players'].map("{:,.2f}%".format)

gender_summary

Unnamed: 0,Percentage of Players,Total Count
Male,81.15%,465
Female,17.45%,100
Other / Non-Disclosed,1.40%,8


# Purchasing Analysis by Gender

In [8]:
# group df by gender
gender_group = df.groupby(df['Gender'])

# get stats by gender
gender_purchase_count = gender_group['SN'].count()
gender_average_purchase_price = gender_group['Price'].mean()
gender_total_purchase_value = gender_group['Price'].sum()
gender_normalized_totals = (gender_total_purchase_value / gender_purchase_count) * 100

# dump gender purchasing stats into summary dataframe
gender_purchase_summary = pd.DataFrame({"Purchase Count":gender_purchase_count,
                                      "Average Purchase Price":gender_average_purchase_price,
                                       "Total Purchase Value":gender_total_purchase_value,
                                       "Normalized Totals":gender_normalized_totals})

# quick data munging
gender_purchase_summary = round(gender_purchase_summary, 2)
gender_purchase_summary['Average Purchase Price'] = gender_purchase_summary['Average Purchase Price'].map("${:,.2f}".format)
gender_purchase_summary['Normalized Totals'] = gender_purchase_summary['Normalized Totals'].map("${:,.2f}".format)
gender_purchase_summary['Total Purchase Value'] = gender_purchase_summary['Total Purchase Value'].map("${:,.2f}".format)

gender_purchase_summary


Unnamed: 0_level_0,Average Purchase Price,Normalized Totals,Purchase Count,Total Purchase Value
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,$2.82,$281.55,136,$382.91
Male,$2.95,$295.05,633,"$1,867.68"
Other / Non-Disclosed,$3.25,$324.91,11,$35.74


# Age Demographics

In [9]:
# split data into bins on age
bins = [0, 10, 18, 25, 100]

# groups for each bin
group_names = ['Child', 'Teenager', 'Young Adult', 'Adult']

In [10]:
# add age group column to dataframe
df['Age Group'] = pd.cut(df['Age'], bins, labels=group_names)
df.head()

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN,Age Group
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34,Adult
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46,Young Adult
2,34,Male,174,Primitive Blade,2.46,Assastnya25,Adult
3,21,Male,92,Final Critic,1.36,Pheusrical25,Young Adult
4,23,Male,63,Stormfury Mace,1.27,Aela59,Young Adult


In [11]:
# analysis by age group
age_group = df.groupby(df['Age Group'])

# gather stats by age group
age_group_purchase_count = age_group['Price'].count()
age_group_average_purchase_price = age_group['Price'].mean()
age_group_total_purchase_value = age_group['Price'].sum()
age_group_normalized_totals = (age_group_total_purchase_value/age_group_purchase_count) * 100

# build age group summary dataframe
age_group_summary = pd.DataFrame({
    "Purchase Count":age_group_purchase_count,
    "Average Purchase Price":age_group_average_purchase_price,
    "Total Purchase Value":age_group_total_purchase_value,
    "Normalized Totals":age_group_normalized_totals
})

# quick data mungin
age_group_summary = round(age_group_summary, 2)
age_group_summary['Average Purchase Price'] = age_group_summary['Average Purchase Price'].map("${:,.2f}".format)
age_group_summary['Normalized Totals'] = age_group_summary['Normalized Totals'].map("${:,.2f}".format)
age_group_summary['Total Purchase Value'] = age_group_summary['Total Purchase Value'].map("${:,.2f}".format)

age_group_summary

Unnamed: 0_level_0,Average Purchase Price,Normalized Totals,Purchase Count,Total Purchase Value
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Child,$3.02,$301.94,32,$96.62
Teenager,$2.84,$283.88,142,$403.11
Young Adult,$2.95,$294.68,425,"$1,252.39"
Adult,$2.95,$295.14,181,$534.21


# Top Spenders

In [12]:
# group by player (SN)
spenders_group = df.groupby(['SN'])

# top spender statistics
spenders_sum = spenders_group['Price'].sum()
spenders_count = spenders_group['Price'].count()
spenders_average = spenders_group['Price'].mean()

# convert spender summary to dataframe
spender_summary = pd.DataFrame({
    "Total Spent":spenders_sum,
    "Purchase Count":spenders_count,
    "Average Purchase Price":spenders_average
})


# sort table by total spent, pull top 5 rows (spenders)
spender_summary = spender_summary.sort_values('Total Spent', ascending=False)
spender_summary = spender_summary[:5]

# quick data mungin
spender_summary = round(spender_summary, 2)
spender_summary['Average Purchase Price'] = spender_summary['Average Purchase Price'].map("${:,.2f}".format)
spender_summary['Total Spent'] = spender_summary['Total Spent'].map("${:,.2f}".format)
spender_summary

Unnamed: 0_level_0,Average Purchase Price,Purchase Count,Total Spent
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Undirrala66,$3.41,5,$17.06
Saedue76,$3.39,4,$13.56
Mindimnya67,$3.18,4,$12.74
Haellysu29,$4.24,3,$12.73
Eoda93,$3.86,3,$11.58


# Most Popular Items

In [25]:
# group by item
items_group = df.groupby(['Item ID', 'Item Name'])

# popular items statistics
purchase_count = items_group['Item Name'].count()
item_price = items_group['Price'].mean()
total_purchase_value = items_group['Price'].sum()

# convert items summary to dataframe
items_summary = pd.DataFrame({
    "Purchase Count": purchase_count,
    "Item Price": item_price,
    "Total Purchase Value": total_purchase_value
})

# sort table by purchase count, pull top 5 rows (items)
popular_summary = items_summary.sort_values('Purchase Count', ascending=False)
popular_summary = popular_summary[:5]

# data mungin
popular_summary['Item Price'] = popular_summary['Item Price'].map("${:,.2f}".format)
popular_summary['Total Purchase Value'] = popular_summary['Total Purchase Value'].map("${:,.2f}".format)


popular_summary

Unnamed: 0_level_0,Unnamed: 1_level_0,Item Price,Purchase Count,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
39,"Betrayal, Whisper of Grieving Widows",$2.35,11,$25.85
84,Arcane Gem,$2.23,11,$24.53
31,Trickster,$2.07,9,$18.63
175,Woeful Adamantite Claymore,$1.24,9,$11.16
13,Serenity,$1.49,9,$13.41


# Most Profitable Items

In [30]:
# this time, sort items_summary dataframe by total purchase value--not purchase count
profitable_items = items_summary.sort_values('Total Purchase Value', ascending=False)

# slice top 5 rows (top 5 items by total purchase value)
profitable_items = profitable_items[:5]

In [31]:
profitable_items

Unnamed: 0_level_0,Unnamed: 1_level_0,Item Price,Purchase Count,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
34,Retribution Axe,4.14,9,37.26
115,Spectral Diamond Doomblade,4.25,7,29.75
32,Orenmir,4.95,6,29.7
103,Singed Scalpel,4.87,6,29.22
107,"Splitter, Foe Of Subtlety",3.61,8,28.88
