# Heroes of Pymoli Data Analysis 
## Melvin Garcia

In [486]:
# Import Dependencies

import pandas as pd
import os

In [487]:
# load data with purchase data 1
file = os.path.join('..', 'HeroesofPymoli', 'purchase_data.json')

df = pd.read_json(file)

In [488]:
# Peek at data

df.head()

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46
2,34,Male,174,Primitive Blade,2.46,Assastnya25
3,21,Male,92,Final Critic,1.36,Pheusrical25
4,23,Male,63,Stormfury Mace,1.27,Aela59


## Player Count

In [489]:
player_count = len(df['SN'].unique())
total_players= {'Total Players': [player_count]}
total_players_df = pd.DataFrame(total_players)
total_players_df

Unnamed: 0,Total Players
0,573


## Purchasing Analysis (Total)

In [490]:
unique_item_num = len(df['Item Name'].unique())
avg_purchase_price = round(df['Price'].mean(), 2)
total_num_purchase = len(df['Price'])
total_revenue = round(df['Price'].sum(), 2)

purchasing_dict = {'Number of Unique Items': [unique_item_num],
                'Average Price': ['$'+str(avg_purchase_price)],
                'Number of Purchases': [total_num_purchase],
                'Total Revenue': ['$'+str(total_revenue)]}

purchasing_df = pd.DataFrame(purchasing_dict)

purchasing_df

Unnamed: 0,Average Price,Number of Purchases,Number of Unique Items,Total Revenue
0,$2.93,780,179,$2286.33


## Gender Demographics

In [491]:
df_del_dup = df.drop_duplicates(['SN'], keep='last')

male_count = len(df_del_dup.loc[df_del_dup['Gender'] == 'Male'])
female_count = len(df_del_dup.loc[df_del_dup['Gender'] == 'Female'])
other_count = len(df_del_dup.loc[df_del_dup['Gender'] == 'Other / Non-Disclosed'])

total = male_count + female_count + other_count

gender_count = {'Male': [round((male_count/total)*100, 2), male_count],
                  'Female': [round((female_count/total)*100, 2), female_count],
                  'Other / Non-Disclosed' : [round((other_count/total)*100, 2), other_count]}

cols = ['Percentage of Players', 'Total Count']

gender_count_df = pd.DataFrame.from_items(gender_count.items(), 
                                          orient='index',
                                         columns = cols)

gender_count_df

Unnamed: 0,Percentage of Players,Total Count
Male,81.15,465
Female,17.45,100
Other / Non-Disclosed,1.4,8


## Purchasing Analysis (Gender)

In [492]:
# Set values

# Set male and female purchase count
gen_purchase_count = df.groupby(['Gender']).count()['Price']

# Set Average Purchase Price

gen_avg_purchase = round(df.groupby(['Gender']).mean()['Price'], 2)

# Set Total Purchase Value

gen_total_purchase = df.groupby(['Gender']).sum()['Price']

# Set normalized totals

gen_normalized = round(gen_total_purchase / gender_count_df['Total Count'], 2)

# Create df

purchase_gen_analysis = {'Purchase Count' : gen_purchase_count,
                           'Average Purchase Count' : gen_avg_purchase,
                           'Total Purchase Value': gen_total_purchase,
                           'Normalized Totals': gen_normalized}

purchase_gen_analysis_df = pd.DataFrame(purchase_gen_analysis)


# Formatting to $

purchase_gen_analysis_df['Average Purchase Count'] = purchase_gen_analysis_df['Average Purchase Count'].map("${:.2f}".format)

purchase_gen_analysis_df['Total Purchase Value'] = purchase_gen_analysis_df['Total Purchase Value'].map("${:.2f}".format)

purchase_gen_analysis_df['Normalized Totals'] = purchase_gen_analysis_df['Normalized Totals'].map("${:.2f}".format)


# Display data

purchase_gen_analysis_df

Unnamed: 0_level_0,Average Purchase Count,Normalized Totals,Purchase Count,Total Purchase Value
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,$2.82,$3.83,136,$382.91
Male,$2.95,$4.02,633,$1867.68
Other / Non-Disclosed,$3.25,$4.47,11,$35.74


## Age Demographics

In [493]:
# Create bins to organize data

bins = [0, 9.9, 14.9, 19.9, 24.9, 29.9, 34.9, 39.9, 1000000000]

age_labels = ['<10', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39', '40+']

# Organize data according to set bins and create df

count_per_age = pd.cut(df_del_dup['Age'], bins, labels=age_labels).value_counts().rename('Total Count')
count_age_df = pd.DataFrame(count_per_age)


percent_by_age = round(count_per_age/ count_per_age.sum()*100, 2).rename('Percentage of Players')
percent_age_df = pd.DataFrame(percent_by_age)

# Create df

age_demo_df = pd.concat([percent_age_df, count_age_df], axis=1).reindex(age_labels)

age_demo_df

Unnamed: 0,Percentage of Players,Total Count
<10,3.32,19
10-14,4.01,23
15-19,17.45,100
20-24,45.2,259
25-29,15.18,87
30-34,8.2,47
35-39,4.71,27
40+,1.92,11


## Purchasing Analysis (Age)

In [494]:
# Filter main df by age groups
df.loc[(df['Age'] < 10), 'Age_label'] = '<10'

df.loc[(df['Age'] >= 10) & (df['Age'] <= 14), 'Age_label'] = '10-14'

df.loc[(df['Age'] >= 15) & (df['Age'] <= 19), 'Age_label'] = '15-19'

df.loc[(df['Age'] >= 20) & (df['Age'] <= 24), 'Age_label'] = '20-24'

df.loc[(df['Age'] >= 25) & (df['Age'] <= 29), 'Age_label'] = '25-29'

df.loc[(df['Age'] >= 30) & (df['Age'] <= 34), 'Age_label'] = '30-34'

df.loc[(df['Age'] >= 35) & (df['Age'] <= 39), 'Age_label'] = '35-39'

df.loc[(df['Age'] >= 40), 'Age_label'] = '40+'


# Set count per age group
age_purchase_count = df.groupby(['Age_label']).count()['SN']

# Set avg purchase per age group
age_avg_purchase = df.groupby(['Age_label'])['Price'].mean()

# Set total purchase per age group
age_total_purchase = df.groupby(['Age_label'])['Price'].sum().reindex(age_labels)

# Set normalized total purchase per age group
age_normalized = age_total_purchase / age_demo_df['Total Count']

# # Create dict to create df

purchase_age_analysis = {'Purchase Count' : age_purchase_count,
                           'Average Purchase Count' : age_avg_purchase,
                           'Total Purchase Value': age_total_purchase,
                           'Normalized Totals': age_normalized}

purchase_age_analysis_df = pd.DataFrame(purchase_age_analysis).reindex(age_labels)

# Formatting to $ 

purchase_age_analysis_df['Average Purchase Count'] = purchase_age_analysis_df['Average Purchase Count'].map("${:.2f}".format)

purchase_age_analysis_df['Total Purchase Value'] = purchase_age_analysis_df['Total Purchase Value'].map("${:.2f}".format)

purchase_age_analysis_df['Normalized Totals'] = purchase_age_analysis_df['Normalized Totals'].map("${:.2f}".format)


# Display data

purchase_age_analysis_df

Unnamed: 0,Average Purchase Count,Normalized Totals,Purchase Count,Total Purchase Value
<10,$2.98,$4.39,28,$83.46
10-14,$2.77,$4.22,35,$96.95
15-19,$2.91,$3.86,133,$386.42
20-24,$2.91,$3.78,336,$978.77
25-29,$2.96,$4.26,125,$370.33
30-34,$3.08,$4.20,64,$197.25
35-39,$2.84,$4.42,42,$119.40
40+,$3.16,$4.89,17,$53.75


## Top Spenders

In [495]:
# Identify top 5 spenders

top5_spenders = df.groupby(['SN'])['Price'].sum().sort_values(ascending=False).head().index

top5_SN_df = df.loc[df['SN'].isin(top5_spenders)]

# Organize data

top5_count = top5_SN_df.groupby(['SN']).count()['Price']

top5_avg = top5_SN_df.groupby(['SN'])['Price'].mean()

top5_total = top5_SN_df.groupby(['SN'])['Price'].sum()

# Create dict to create df

purchase_top5_analysis = {'Purchase Count' : top5_count,
                           'Average Purchase Count' : top5_avg,
                           'Total Purchase Value': top5_total}

purchase_top5_analysis_df = pd.DataFrame(purchase_top5_analysis).sort_values(['Purchase Count'], ascending=False)

# Formatting with $ 
purchase_top5_analysis_df['Average Purchase Count'] = purchase_top5_analysis_df['Average Purchase Count'].map("${:.2f}".format)

purchase_top5_analysis_df['Total Purchase Value'] = purchase_top5_analysis_df['Total Purchase Value'].map("${:.2f}".format)

purchase_top5_analysis_df

Unnamed: 0_level_0,Average Purchase Count,Purchase Count,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Undirrala66,$3.41,5,$17.06
Mindimnya67,$3.18,4,$12.74
Saedue76,$3.39,4,$13.56
Eoda93,$3.86,3,$11.58
Haellysu29,$4.24,3,$12.73


## Most Popular Items

In [496]:
# Organize data

top_items = df['Item ID'].value_counts().head().index

top_items_df = df.loc[df['Item ID'].isin(top_items)]

top_items_count = top_items_df.groupby(['Item ID', 'Item Name']).count()['Price']

top_items_price = top_items_df.groupby(['Item ID', 'Item Name']).Price.unique().astype(float)

top_items_total = top_items_df.groupby(['Item ID', 'Item Name'])['Price'].sum()

# Create dict to create df

top_items_analysis = {'Purchase Count' : top_items_count,
                           'Item Price' : top_items_price,
                           'Total Purchase Value': top_items_total}

top_items_analysis_df = pd.DataFrame(top_items_analysis).sort_values(['Purchase Count'], ascending=False)

# Formatting to $

top_items_analysis_df['Item Price'] = top_items_analysis_df['Item Price'].map("${:.2f}".format)
top_items_analysis_df['Total Purchase Value'] = top_items_analysis_df['Total Purchase Value'].map("${:.2f}".format)

# Display data

top_items_analysis_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Item Price,Purchase Count,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
39,"Betrayal, Whisper of Grieving Widows",$2.35,11,$25.85
84,Arcane Gem,$2.23,11,$24.53
31,Trickster,$2.07,9,$18.63
34,Retribution Axe,$4.14,9,$37.26
175,Woeful Adamantite Claymore,$1.24,9,$11.16


## Most Profitable Items

In [497]:
# Organize data

top5_profit = pd.DataFrame(df.groupby(['Item ID'])['Price'].sum()).sort_values('Price', ascending=False)

top5_profit = top5_profit.head().index

top5_profit_df = df.loc[df['Item ID'].isin(top5_profit)]

top5_profit_count = top5_profit_df.groupby(['Item ID', 'Item Name']).count()['Price']

top5_profit_price = top5_profit_df.groupby(['Item ID', 'Item Name']).Price.unique().astype(float)

top5_profit_total = top5_profit_df.groupby(['Item ID', 'Item Name'])['Price'].sum()

# Create dict to create df

top5_profit_analysis = {'Purchase Count' : top5_profit_count,
                           'Item Price' : top5_profit_price,
                           'Total Purchase Value': top5_profit_total}

top5_profit_analysis_df = pd.DataFrame(top5_profit_analysis).sort_values(['Purchase Count'], ascending=False)

# Formatting to $

top5_profit_analysis_df['Item Price'] = top5_profit_analysis_df['Item Price'].map("${:.2f}".format)
top5_profit_analysis_df['Total Purchase Value'] = top5_profit_analysis_df['Total Purchase Value'].map("${:.2f}".format)

# Display data

top5_profit_analysis_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Item Price,Purchase Count,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
34,Retribution Axe,$4.14,9,$37.26
107,"Splitter, Foe Of Subtlety",$3.61,8,$28.88
115,Spectral Diamond Doomblade,$4.25,7,$29.75
32,Orenmir,$4.95,6,$29.70
103,Singed Scalpel,$4.87,6,$29.22


In [518]:
df.loc[df['SN'] == 'Undirrala66']

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN,Age_label
79,29,Male,144,Blood Infused Guardian,2.86,Undirrala66,25-29
107,29,Male,115,Spectral Diamond Doomblade,4.25,Undirrala66,25-29
131,29,Male,62,Piece Maker,4.36,Undirrala66,25-29
537,29,Male,18,"Torchlight, Bond of Storms",1.77,Undirrala66,25-29
596,29,Male,133,Faith's Scimitar,3.82,Undirrala66,25-29


## Observable Trends
### - The age group that has the most players, 20-24, spends the least amongst all other age groups.
### - No player has spent more than 20 dollars on items.
### - The top spender, Undirrala66,  has only purchased 1 item that appears in either most popular and most profitable items