# Heroes Of Pymoli Data Analysis
* Of the 1163 active players, the vast majority are male (82%). There also exists, a smaller, but notable proportion of female players (16%).

* Our peak age demographic falls between 20-24 (42%) with secondary groups falling between 15-19 (17.80%) and 25-29 (15.48%).

* Our players are putting in significant cash during the lifetime of their gameplay. Across all major age and gender demographics, the average purchase for a user is roughly $491.   
-----

In [25]:
import pandas as pd
json_file = "purchase_data.json"
df = pd.read_json(json_file)
df.head()

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46
2,34,Male,174,Primitive Blade,2.46,Assastnya25
3,21,Male,92,Final Critic,1.36,Pheusrical25
4,23,Male,63,Stormfury Mace,1.27,Aela59


## Player Count

In [26]:
total_players = df['SN'].unique()
total_players_count = total_players.size
player_count_df = pd.DataFrame([{'Total Players': total_players_count}])
# df w/ duplicates removed
removed_duplicates_df = df.drop_duplicates(['SN'], keep='first')
player_count_df

Unnamed: 0,Total Players
0,573


## Purchasing Analysis (Total)

In [27]:
# formatting
money_fmt = "${:,.2f}".format
fmt = '{:.2f}'.format

unique_items = df['Item ID'].unique()
unique_items_count = unique_items.size
average_price = df['Price'].sum()/df['Price'].count()
purchases_count = df['Item ID'].count()
total_revenue = df['Price'].sum()
#purchase_analysis_df = pd.DataFrame([{'Number of Unique Items': unique_items_count,
#                                     'Average Price': money_fmt(average_price) }])

purchase_analysis_df = pd.DataFrame([[unique_items_count,money_fmt(average_price), purchases_count, money_fmt(total_revenue)]],
                                     columns=['Number of Unique Items','Average Price','Number of Purchases','Total Revenue'])
purchase_analysis_df

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,183,$2.93,780,"$2,286.33"


## Gender Demographics

In [28]:
# use the built-in normalize in value_counts to get the percentage
percents = removed_duplicates_df['Gender'].value_counts(normalize=True)*100
# total count
total = removed_duplicates_df['Gender'].value_counts()

gender_demographics =total.to_frame()
gender_demographics= gender_demographics.rename(columns={'Gender':'Total Count'})
gender_demographics['Percentage of Player'] = percents.map(fmt)
gender_demographics

Unnamed: 0,Total Count,Percentage of Player
Male,465,81.15
Female,100,17.45
Other / Non-Disclosed,8,1.4



## Purchasing Analysis (Gender)

In [29]:
# group by gender 
group_by_gender = df.groupby(['Gender'])
purchase_analysis_df = pd.DataFrame()
# purchase count
purchase_analysis_df['Purchase Count']=group_by_gender['Item ID'].count()
# total purchase value
purchase_analysis_df['Total Purchase Value'] = group_by_gender['Price'].sum()
# average purchase price
purchase_analysis_df['Average Purchase Price'] = purchase_analysis_df['Total Purchase Value']/purchase_analysis_df['Purchase Count']
# normalization
purchase_analysis_df['Normalized Totals'] = purchase_analysis_df['Total Purchase Value']/gender_demographics['Total Count']

# formatting
purchase_analysis_df['Total Purchase Value'] = purchase_analysis_df['Total Purchase Value'].map(money_fmt)
purchase_analysis_df['Average Purchase Price'] = purchase_analysis_df['Average Purchase Price'].map(money_fmt)
purchase_analysis_df['Normalized Totals'] = purchase_analysis_df['Normalized Totals'].map(money_fmt)

# display
purchase_analysis_df

Unnamed: 0_level_0,Purchase Count,Total Purchase Value,Average Purchase Price,Normalized Totals
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,136,$382.91,$2.82,$3.83
Male,633,"$1,867.68",$2.95,$4.02
Other / Non-Disclosed,11,$35.74,$3.25,$4.47


## Age Demographics

In [40]:
max_age = removed_duplicates_df['Age'].max()
# create bins and categories
bins = [0,9,14,19,24,29,34,39,max_age]
age_categories = ['<10','10-14','15-19','20-24','25-29','30-34','35-39','40+']
# cerate a df for age demographics
age_demographics_df = pd.DataFrame()
# categorize
categorized_by_age_df = pd.cut(removed_duplicates_df['Age'], bins, labels=age_categories, right=True)

# add total count that is sorted by the index to the df
age_demographics_df['Total Count'] = categorized_by_age_df.value_counts().sort_index()
# percent and formatting
age_demographics_df['Percentage of Players'] = (categorized_by_age_df.value_counts(normalize=True)*100).map(fmt)

# display
age_demographics_df

Unnamed: 0,Total Count,Percentage of Players
<10,19,3.32
10-14,23,4.01
15-19,100,17.45
20-24,259,45.2
25-29,87,15.18
30-34,47,8.2
35-39,27,4.71
40+,11,1.92


## Purchasing Analysis (Age)

In [66]:
# create a df for this portion
purchase_analysis_df = pd.DataFrame()

max_age_in_duplicates = df['Age'].max()
# create new bins for this part
bins_in_duplicates = [0,9,14,19,24,29,34,39,max_age_in_duplicates]
# reuse the age category
age_categories_in_duplicates = age_categories

# categorize 
categorized_by_age_in_duplicates_df = pd.cut(df['Age'],bins=bins_in_duplicates, labels=age_categories_in_duplicates, right=True)

# purchase count
purchase_analysis_df['Purchase Count'] = categorized_by_age_in_duplicates_df.value_counts().sort_index()

# total purchase value by age categories
purchase_analysis_df['Total Purchase Value'] = df.groupby(categorized_by_age_in_duplicates_df)['Price'].sum()

# average purchase price
purchase_analysis_df['Average Purchase Price'] = purchase_analysis_df['Total Purchase Value']/purchase_analysis_df['Purchase Count']

# normalized totals by age demographics
purchase_analysis_df['Normalized Totals'] = purchase_analysis_df['Total Purchase Value']/age_demographics_df['Total Count']

# formatting
purchase_analysis_df['Total Purchase Value'] = purchase_analysis_df['Total Purchase Value'].map(money_fmt)
purchase_analysis_df['Average Purchase Price'] = purchase_analysis_df['Average Purchase Price'].map(money_fmt)
purchase_analysis_df['Normalized Totals'] = purchase_analysis_df['Normalized Totals'].map(money_fmt)


# display
purchase_analysis_df

Unnamed: 0,Purchase Count,Total Purchase Value,Average Purchase Price,Normalized Totals
<10,28,$83.46,$2.98,$4.39
10-14,35,$96.95,$2.77,$4.22
15-19,133,$386.42,$2.91,$3.86
20-24,336,$978.77,$2.91,$3.78
25-29,125,$370.33,$2.96,$4.26
30-34,64,$197.25,$3.08,$4.20
35-39,42,$119.40,$2.84,$4.42
40+,17,$53.75,$3.16,$4.89


## Top Spenders

In [126]:
# create a new df
top_spenders_df = pd.DataFrame()
# group by SN
group_by_SN_df = df.groupby(['SN'])

# total purchase value
total_purchase_value = group_by_SN_df['Price'].sum().sort_values(ascending=False).head(5)
top_spenders_df['Total Purchase Value']  = total_purchase_value

# purchase count
top_total_purchase_sns =df[df['SN'].isin(total_purchase_value.index)]
group_by_sns =top_total_purchase_sns.groupby(['SN'])
purchase_count = group_by_sns['Item ID'].count()
top_spenders_df['Purchase Count'] = purchase_count

# average purchase price
top_spenders_df['Average Purchase Price'] = total_purchase_value/purchase_count

# formatting
top_spenders_df['Average Purchase Price'] = top_spenders_df['Average Purchase Price'].map(money_fmt)
top_spenders_df['Total Purchase Value']  = top_spenders_df['Total Purchase Value'] .map(money_fmt)

# display
top_spenders_df

Unnamed: 0_level_0,Total Purchase Value,Purchase Count,Average Purchase Price
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Undirrala66,$17.06,5,$3.41
Saedue76,$13.56,4,$3.39
Mindimnya67,$12.74,4,$3.18
Haellysu29,$12.73,3,$4.24
Eoda93,$11.58,3,$3.86


## Most Popular Items

In [149]:
group_by_id_and_name = df.groupby(['Item ID','Item Name'])
group_by_id_and_name['Price'].count().sort_values(ascending=False).head(10)

Item ID  Item Name                           
84       Arcane Gem                              11
39       Betrayal, Whisper of Grieving Widows    11
31       Trickster                                9
34       Retribution Axe                          9
175      Woeful Adamantite Claymore               9
13       Serenity                                 9
106      Crying Steel Sickle                      8
44       Bonecarvin Battle Axe                    8
92       Final Critic                             8
65       Conqueror Adamantite Mace                8
Name: Price, dtype: int64

## Most Profitable Items

In [159]:
group_by_id_and_name['Price'].value_counts()

Item ID  Item Name                                     Price
0        Splinter                                      1.82     1
1        Crucifer                                      2.28     4
2        Verdict                                       3.40     1
3        Phantomlight                                  1.79     1
4        Bloodlord's Fetish                            2.28     1
5        Putrid Fan                                    1.32     3
6        Rusty Skull                                   1.20     3
7        Thorn, Satchel of Dark Souls                  4.51     6
8        Purgatory, Gem of Regret                      3.91     6
9        Thorn, Conqueror of the Corrupted             2.04     2
10       Sleepwalker                                   1.73     6
11       Brimstone                                     2.52     7
12       Dawne                                         4.30     5
13       Serenity                                      1.49     9
14       Posses