In [1]:
# import dependencies
import pandas as pd
import os

In [2]:
# assign file path to variable
pymo_file = os.path.join('purchase_data.json')

In [3]:
# create dataframe with purchase data
pymo_raw = pd.read_json(pymo_file)
pymo_raw.head()

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46
2,34,Male,174,Primitive Blade,2.46,Assastnya25
3,21,Male,92,Final Critic,1.36,Pheusrical25
4,23,Male,63,Stormfury Mace,1.27,Aela59


In [4]:
player_count = pymo_raw['SN'].nunique()

# create summary DataFrame with purchase data
play_count = pd.DataFrame({'Player Count': player_count} , index=[0])
play_count

Unnamed: 0,Player Count
0,573


In [5]:
unique_items = len(pymo_raw['Item Name'].unique())

In [6]:
avg_pur_price = round(pymo_raw['Price'].mean(),2)

In [7]:
total_purchases = len(pymo_raw)

In [8]:
total_revenue = round(pymo_raw['Price'].sum(),2)

In [9]:
# Purchasing Anaysis (Total)
# create dictionary to hold calculated data points
purchase_data = {
    'Unique Items': unique_items,
    'Avg Price': avg_pur_price,
    'Total Purchases': total_purchases,
    'Total Revenue': total_revenue
}

# create summary DataFrame with purchase data
purch_analysis = pd.DataFrame(purchase_data , index=[0])

# format columns with currency values
purch_analysis['Avg Price'] = purch_analysis['Avg Price'].map("${:.2f}".format)
purch_analysis['Total Revenue'] = purch_analysis['Total Revenue'].map("${:.2f}".format)

purch_analysis

Unnamed: 0,Avg Price,Total Purchases,Total Revenue,Unique Items
0,$2.93,780,$2286.33,179


In [10]:
# Gender Demographics
# group raw data by gender
pymo_gdr = pymo_raw.groupby(['Gender'])

# get count of players by gender
pymo_gdr_count = pymo_gdr['SN'].nunique()

# calculate count by gender as a percentage of all players
pymo_gdr_pct = round(pymo_gdr_count / player_count, 4) * 100

# add count and percentage to summary DataFrame
gdr_demo = pd.DataFrame({
    'Player count': pymo_gdr_count,
    'Percentage of Players': pymo_gdr_pct
})
gdr_demo

Unnamed: 0_level_0,Percentage of Players,Player count
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,17.45,100
Male,81.15,465
Other / Non-Disclosed,1.4,8


In [11]:
# Gender Purchase Analysis
# create DataFrame with count mean and sum of Price series by gender
purch_analysis_gdr = pymo_gdr['Price'].agg(['count', 'mean', 'sum'])

# change DataFrame column names
purch_analysis_gdr = purch_analysis_gdr.rename(columns={
    'count': 'Total Purchases',
    'mean': 'Avg Price',
    'sum': 'Total Revenue'
})

# add gender count to gender purchase analysis
purch_analysis_gdr['Player Count'] = pymo_gdr_count

# calculate normalized total
normalized_total_gdr = purch_analysis_gdr['Total Revenue'] / purch_analysis_gdr['Player Count']
purch_analysis_gdr['Total Revenue (Normalized)'] = normalized_total_gdr

# format columns with currency values
purch_analysis_gdr['Avg Price'] = purch_analysis_gdr['Avg Price'].map("${:.2f}".format)
purch_analysis_gdr['Total Revenue'] = purch_analysis_gdr['Total Revenue'].map("${:.2f}".format)
purch_analysis_gdr['Total Revenue (Normalized)'] = purch_analysis_gdr['Total Revenue (Normalized)'].map("${:.2f}".format)

# rearrange columns
purch_analysis_gdr = purch_analysis_gdr[[
    'Player Count', 'Total Purchases',
    'Avg Price', 'Total Revenue',
    'Total Revenue (Normalized)'
]]   

purch_analysis_gdr

Unnamed: 0_level_0,Player Count,Total Purchases,Avg Price,Total Revenue,Total Revenue (Normalized)
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Female,100,136,$2.82,$382.91,$3.83
Male,465,633,$2.95,$1867.68,$4.02
Other / Non-Disclosed,8,11,$3.25,$35.74,$4.47


In [12]:
# Age Demographics
# find min and max age
min_age = pymo_raw['Age'].min()
max_age = pymo_raw['Age'].max()

print('Youngest Player: ' + str(min_age))
print('Oldest Player: ' + str(max_age))

# define bins limits for age groups
age_bins = [0, 9, 14, 19, 24, 29, 34, 39, 100]

# define bin categories for age groups
age_cat = ['<10', '10-14', '15-19', '20-24',
           '25-29', '30-34', '35-39', '40+']

# assign categories and add to pymoli age dataframe
pymo_age_assign = pd.cut(pymo_raw['Age'], age_bins, labels=age_cat)

# add age groups to pymo_raw DataFrame
pymo_raw['Age Groups'] = pymo_age_assign

# group raw data by Age Groups
pymo_age = pymo_raw.groupby('Age Groups')

# get count of players by gender
pymo_age_count = pymo_age['SN'].nunique()

# calculate count by gender as a percentage of all players
pymo_age_pct = round(pymo_age_count / player_count, 4) * 100

# add count and percentage to summary DataFrame
age_demo = pd.DataFrame({
    'Player Count': pymo_age_count,
    'Percentage of Players': pymo_age_pct
})
age_demo

Youngest Player: 7
Oldest Player: 45


Unnamed: 0_level_0,Percentage of Players,Player Count
Age Groups,Unnamed: 1_level_1,Unnamed: 2_level_1
<10,3.32,19
10-14,4.01,23
15-19,17.45,100
20-24,45.2,259
25-29,15.18,87
30-34,8.2,47
35-39,4.71,27
40+,1.92,11


In [13]:
# Age Purcahase Analysis
# create DataFrame with count mean and sum of Price series by Age Group
purch_analysis_age = pymo_age['Price'].agg(['count', 'mean', 'sum'])

# change DataFrame column names
purch_analysis_age = purch_analysis_age.rename(columns={
    'count': 'Total Purchases',
    'mean': 'Avg Price',
    'sum': 'Total Revenue'
})

# add gender count to gender purchase analysis
purch_analysis_age['Player Count'] = pymo_age_count

# calculate normalized total
normalized_total_age = purch_analysis_age['Total Revenue'] / purch_analysis_age['Player Count']
purch_analysis_age['Total Revenue (Normalized)'] = normalized_total_age

# format columns with currency values
purch_analysis_age['Avg Price'] = purch_analysis_age['Avg Price'].map("${:.2f}".format)
purch_analysis_age['Total Revenue'] = purch_analysis_age['Total Revenue'].map("${:.2f}".format)
purch_analysis_age['Total Revenue (Normalized)'] = purch_analysis_age['Total Revenue (Normalized)'].map("${:.2f}".format)

# rearrange columns
purch_analysis_age = purch_analysis_age[[
    'Player Count', 'Total Purchases',
    'Avg Price', 'Total Revenue',
    'Total Revenue (Normalized)'
]]   

purch_analysis_age

Unnamed: 0_level_0,Player Count,Total Purchases,Avg Price,Total Revenue,Total Revenue (Normalized)
Age Groups,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
<10,19,28,$2.98,$83.46,$4.39
10-14,23,35,$2.77,$96.95,$4.22
15-19,100,133,$2.91,$386.42,$3.86
20-24,259,336,$2.91,$978.77,$3.78
25-29,87,125,$2.96,$370.33,$4.26
30-34,47,64,$3.08,$197.25,$4.20
35-39,27,42,$2.84,$119.40,$4.42
40+,11,17,$3.16,$53.75,$4.89


In [14]:
# Top Spender Purchase Analysis
# group raw data by screen name
pymo_user = pymo_raw.groupby('SN')

# create DataFrame with count mean and sum of Price series by screen name
purch_analysis_user = pymo_user['Price'].agg(['count', 'mean', 'sum'])

# change DataFrame column names
purch_analysis_user = purch_analysis_user.rename(columns={
    'count': 'Total Purchases',
    'mean': 'Avg Price',
    'sum': 'Total Revenue'
})

# sort DataFrame values by Total Revenue in descending order 
purch_analysis_user = purch_analysis_user.sort_values(by='Total Revenue', ascending=False)

# reset index keeping old index (SN)
purch_analysis_user = purch_analysis_user.reset_index(drop=False)

# format columns with currecy values
purch_analysis_user['Avg Price'] = purch_analysis_user['Avg Price'].map("${:.2f}".format)
purch_analysis_user['Total Revenue'] = purch_analysis_user['Total Revenue'].map("${:.2f}".format)

# create new DataFrame with 5 top spenders
purch_analysis_top_spend = purch_analysis_user.iloc[:5]

# set SN as index
purch_analysis_top_spend.set_index('SN', inplace=True)

purch_analysis_top_spend

Unnamed: 0_level_0,Total Purchases,Avg Price,Total Revenue
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Undirrala66,5,$3.41,$17.06
Saedue76,4,$3.39,$13.56
Mindimnya67,4,$3.18,$12.74
Haellysu29,3,$4.24,$12.73
Eoda93,3,$3.86,$11.58


In [15]:
# Most Popular Item Purchase Analysis
# group raw data by Item Name
pymo_item = pymo_raw.groupby(['Item ID', 'Item Name', 'Price'])

# create DataFrame with count mean and sum of Price series by Item Name
purch_analysis_item = pymo_item['Price'].agg(['count', 'sum'])

# change DataFrame column names
purch_analysis_item = purch_analysis_item.rename(columns={
    'count': 'Total Purchases',
    'sum': 'Total Revenue'
})

# sort DataFrame values by Total Purchases in descending order
purch_analysis_most_pop = purch_analysis_item.sort_values(by='Total Purchases', ascending=False)

# create new DataFrame with grouped item data
purch_analysis_most_pop = purch_analysis_most_pop.reset_index(drop=False)

# format columns with currency values
purch_analysis_most_pop['Price'] = purch_analysis_most_pop['Price'].map('${:.2f}'.format)
purch_analysis_most_pop['Total Revenue'] = purch_analysis_most_pop['Total Revenue'].map('${:.2f}'.format)


# select rows of 5 most popular items based on purchase count
purch_analysis_most_pop = purch_analysis_most_pop.iloc[:5]

# set Item ID as index
purch_analysis_most_pop.set_index('Item ID', inplace=True)

purch_analysis_most_pop

Unnamed: 0_level_0,Item Name,Price,Total Purchases,Total Revenue
Item ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
39,"Betrayal, Whisper of Grieving Widows",$2.35,11,$25.85
84,Arcane Gem,$2.23,11,$24.53
31,Trickster,$2.07,9,$18.63
175,Woeful Adamantite Claymore,$1.24,9,$11.16
13,Serenity,$1.49,9,$13.41


In [16]:
# Most Profitable Item Purchase Analysis
# sort DataFrame by Total Revenue in descending order
purch_analysis_most_profit = purch_analysis_item.sort_values(by='Total Revenue', ascending=False)

# reset index keeping old index values
purch_analysis_most_profit = purch_analysis_most_profit.reset_index(drop=False)

# format columns with currency values
purch_analysis_most_profit['Price'] = purch_analysis_most_profit['Price'].map('${:.2f}'.format)
purch_analysis_most_profit['Total Revenue'] = purch_analysis_most_profit['Total Revenue'].map('${:.2f}'.format)


# select rows of 5 most profitable items
purch_analysis_most_profit = purch_analysis_most_profit.iloc[:5]

# set Item ID as index
purch_analysis_most_profit.set_index('Item ID', inplace=True)

purch_analysis_most_profit

Unnamed: 0_level_0,Item Name,Price,Total Purchases,Total Revenue
Item ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
34,Retribution Axe,$4.14,9,$37.26
115,Spectral Diamond Doomblade,$4.25,7,$29.75
32,Orenmir,$4.95,6,$29.70
103,Singed Scalpel,$4.87,6,$29.22
107,"Splitter, Foe Of Subtlety",$3.61,8,$28.88
