# Heroes of Pymoli Data Analysis

- OBSERVED TREND 1 : Male players dominate the user base with more than 80% of the total players. 

- OBSERVED TREND 2 : Players in the age group 'Between 20 & 25' are highest consumer group.

- OBSERVED TREND 3 : Players in the age group '40+' spend maximum Dollars per head, but currently account for lowest % share of all consumer age-groups.

In [1]:
import pandas as pd
import numpy as np
import os

purchaseDataFile = os.path.join("raw_data","purchase_data.json")
purchase = pd.read_json(purchaseDataFile)
purchaseData = purchase

### Player Count

In [2]:
totPlayers = purchaseData['SN'].nunique()
totPlayers_df = pd.DataFrame({'Total Players': totPlayers}, index=list(range(1)))
totPlayers_df

Unnamed: 0,Total Players
0,573


### Purchasing Analysis (Total)

In [3]:
noOfUnqItems = purchaseData['Item Name'].nunique()
avgPrice = purchaseData['Price'].sum() / purchaseData['Item Name'].count()
noOfPurchases = purchaseData['Item Name'].count()
totRevenue = purchaseData['Price'].sum()
purAnalysis_df = pd.DataFrame({'Number Of Unique Items': noOfUnqItems,
                   'Average Price': avgPrice,
                   'Number of Purchases': noOfPurchases,
                   'Total Revenue': totRevenue},
                   index=list(range(1)))
purAnalysis_df['Average Price'] = purAnalysis_df['Average Price'].map('${:,.2f}'.format) 
purAnalysis_df['Total Revenue'] = purAnalysis_df['Total Revenue'].map('${:,.2f}'.format) 
purAnalysis_df = purAnalysis_df[['Number Of Unique Items', 'Average Price', 'Number of Purchases', 'Total Revenue']]
purAnalysis_df

Unnamed: 0,Number Of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,$2.93,780,"$2,286.33"


### Gender Demographics

In [4]:
genDemographics_df = pd.DataFrame(purchaseData.groupby('Gender').SN.nunique())
genDemographics_df.columns = [['Total Count']]
genDemographics_df.index.name = ''
genDemographics_df['Percentage of Players'] = round(((genDemographics_df['Total Count'] / totPlayers) * 100),2)
genDemographics_df = genDemographics_df[['Percentage of Players','Total Count']]
genDemographics_df.sort_values(by=['Total Count'], ascending=False)

Unnamed: 0,Percentage of Players,Total Count
,,
Male,81.15,465.0
Female,17.45,100.0
Other / Non-Disclosed,1.4,8.0


### Purchasing Analysis (Gender)

In [5]:
pd.DataFrame(purchaseData.groupby('Gender').SN.nunique())
pd.DataFrame(purchaseData.groupby('Gender').Price.sum())
pd.DataFrame(purchaseData.groupby('Gender').Price.mean())

purAnaByGen_df = pd.DataFrame({'Purchase Count': purchaseData.groupby('Gender').SN.nunique(),
                   'Average Purchase Price': purchaseData.groupby('Gender').Price.mean(),
                   'Total Purchase Value': purchaseData.groupby('Gender').Price.sum()})
purAnaByGen_df['Normalized Totals'] = purAnaByGen_df['Total Purchase Value'] / purAnaByGen_df['Purchase Count']
purAnaByGen_df['Average Purchase Price'] = purAnaByGen_df['Average Purchase Price'].map('${:,.2f}'.format) 
purAnaByGen_df['Total Purchase Value'] = purAnaByGen_df['Total Purchase Value'].map('${:,.2f}'.format) 
purAnaByGen_df['Normalized Totals'] = purAnaByGen_df['Normalized Totals'].map('${:,.2f}'.format) 
purAnaByGen_df = purAnaByGen_df[['Purchase Count', 'Average Purchase Price', 'Total Purchase Value', 'Normalized Totals']]
purAnaByGen_df

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Totals
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,100,$2.82,$382.91,$3.83
Male,465,$2.95,"$1,867.68",$4.02
Other / Non-Disclosed,8,$3.25,$35.74,$4.47


### Age Demographies

In [6]:
bins = [0,9,14,19,24,29,34,39,(purchaseData['Age'].max() + 1)]
group_names = ['<10','10-14','15-19','20-24','25-29','30-34','35-39','40+']
ageGroup_df = purchaseData
ageGroup_df['AgeGroup'] = pd.cut(ageGroup_df['Age'], bins, labels=group_names)
ageDemographics_df = pd.DataFrame({'Total Count' : ageGroup_df.groupby('AgeGroup').SN.nunique()})
ageDemographics_df = ageDemographics_df.reindex(index=group_names)
ageDemographics_df['Percentage Of Players'] = round(((ageDemographics_df['Total Count'] / totPlayers) * 100),2)
ageDemographics_df = ageDemographics_df[['Percentage Of Players','Total Count']]
ageDemographics_df.index.name = ''
ageDemographics_df

Unnamed: 0,Percentage Of Players,Total Count
,,
<10,3.32,19.0
10-14,4.01,23.0
15-19,17.45,100.0
20-24,45.2,259.0
25-29,15.18,87.0
30-34,8.2,47.0
35-39,4.71,27.0
40+,1.92,11.0


### Purchasing Analysis (Age)

In [7]:
purAnaByAge_df = pd.DataFrame({'Purchase Count' : ageGroup_df.groupby('AgeGroup').SN.nunique(), 
                     'Average Purchase Price': ageGroup_df.groupby('AgeGroup').Price.mean(),
                    'Total Purchase Value': ageGroup_df.groupby('AgeGroup').Price.sum(),
                    'User Count': ageGroup_df.groupby('AgeGroup').SN.count()})
purAnaByAge_df = purAnaByAge_df.reindex(index=group_names)
purAnaByAge_df['Normalized Totals'] = purAnaByAge_df['Total Purchase Value'] / purAnaByAge_df['Purchase Count']
purAnaByAge_df = purAnaByAge_df[['Purchase Count', 'Average Purchase Price', 'Total Purchase Value', 'Normalized Totals']]
purAnaByAge_df['Average Purchase Price'] = purAnaByAge_df['Average Purchase Price'].map('${:,.2f}'.format) 
purAnaByAge_df['Total Purchase Value'] = purAnaByAge_df['Total Purchase Value'].map('${:,.2f}'.format) 
purAnaByAge_df['Normalized Totals'] = purAnaByAge_df['Normalized Totals'].map('${:,.2f}'.format) 
purAnaByAge_df.index.name = ''
purAnaByAge_df

Unnamed: 0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Totals
,,,,
<10,19.0,$2.98,$83.46,$4.39
10-14,23.0,$2.77,$96.95,$4.22
15-19,100.0,$2.91,$386.42,$3.86
20-24,259.0,$2.91,$978.77,$3.78
25-29,87.0,$2.96,$370.33,$4.26
30-34,47.0,$3.08,$197.25,$4.20
35-39,27.0,$2.84,$119.40,$4.42
40+,11.0,$3.16,$53.75,$4.89


### Top Spenders

In [8]:
topSpenders_df = pd.DataFrame({'Total Purchase Value': purchaseData.groupby('SN').Price.sum(),
                    'Average Purchase Price': purchaseData.groupby('SN').Price.mean(),
                    'Purchase Count': purchaseData.groupby('SN').Price.count()})
topSpenders_df = topSpenders_df[['Purchase Count','Average Purchase Price','Total Purchase Value']]
topSpenders_df = topSpenders_df.sort_values('Total Purchase Value', ascending=False).head()
topSpenders_df['Average Purchase Price'] = topSpenders_df['Average Purchase Price'].map('${:,.2f}'.format) 
topSpenders_df['Total Purchase Value'] = topSpenders_df['Total Purchase Value'].map('${:,.2f}'.format) 
topSpenders_df

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Undirrala66,5,$3.41,$17.06
Saedue76,4,$3.39,$13.56
Mindimnya67,4,$3.18,$12.74
Haellysu29,3,$4.24,$12.73
Eoda93,3,$3.86,$11.58


### Most Popular Items

In [9]:
items_df = pd.DataFrame({'Purchase Count': purchaseData.groupby(['Item ID','Item Name']).Price.count(),
                    'Item Price': purchaseData.groupby(['Item ID','Item Name']).Price.mean(),
                    'Total Purchase Value': purchaseData.groupby(['Item ID','Item Name']).Price.sum()})
items_df = items_df[['Purchase Count','Item Price','Total Purchase Value']]
popItems_df = items_df.sort_values(['Purchase Count','Total Purchase Value'], ascending=False).head()
popItems_df['Item Price'] = popItems_df['Item Price'].map('${:,.2f}'.format) 
popItems_df['Total Purchase Value'] = popItems_df['Total Purchase Value'].map('${:,.2f}'.format) 
popItems_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
39,"Betrayal, Whisper of Grieving Widows",11,$2.35,$25.85
84,Arcane Gem,11,$2.23,$24.53
34,Retribution Axe,9,$4.14,$37.26
31,Trickster,9,$2.07,$18.63
13,Serenity,9,$1.49,$13.41


### Most Profitable Items

In [10]:
profItems_df = items_df.sort_values(['Total Purchase Value','Purchase Count','Item Price'], ascending=False).head()
profItems_df['Item Price'] = profItems_df['Item Price'].map('${:,.2f}'.format) 
profItems_df['Total Purchase Value'] = profItems_df['Total Purchase Value'].map('${:,.2f}'.format) 
profItems_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
34,Retribution Axe,9,$4.14,$37.26
115,Spectral Diamond Doomblade,7,$4.25,$29.75
32,Orenmir,6,$4.95,$29.70
103,Singed Scalpel,6,$4.87,$29.22
107,"Splitter, Foe Of Subtlety",8,$3.61,$28.88
