# Heroes Of Pymoli 

### Analysis

*This analysis uses data for an independent gaming company's most recent fantasy game, Heroes of Pymoli. The game is free to play but players are encouraged to purchase optional items that enhance their gaming experience.*

* Of the 576 players who purchased optional items, 84% of them were male. 

* Although males made significantly more purchases (652) compared to females (113), males spent less per purchase (\$4.07) than females (\$4.47). 

* The age group that made the most purchases were between 20 and 24 years old (44.79%). The age group that made the second most purchases were between 15 and 19 years old (18.58%).

* 20 to 24 years olds contributed the most to the total revenue. Of the \$2379.77 revenue, 20 to 24 year olds made purchases totaling \$1114.06 (46.8% of the revenue).

* Of the 183 items for sale, Oathbreaker, Last Hope of the Breaking Storm was purchased more than any other item (12 sold) and it was more profitable than any other item (\$50.76 worth sold)
_____

In [1]:
# Dependencies and Setup
import pandas as pd
import numpy as np
import os

In [2]:
def format_money(amt):
    return '${:,.2f}'.format(amt)

def format_percent(amt):
    amt = amt * 100
    return '{:.2f} %'.format(amt)

In [3]:
# File to Load (Remember to Change These)
file_to_load = os.path.join('Resources', 'purchase_data.csv')

# Read Purchasing File and store into Pandas data frame
purchase_df = pd.read_csv(file_to_load)
purchase_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


## Player Count

In [4]:
num_players = purchase_df['SN'].nunique()
pd.DataFrame({'Total Players': num_players}, index=[0])

Unnamed: 0,Total Players
0,576


## Purchasing Analysis (Total)

In [5]:
items = purchase_df['Item ID'].nunique()
ave_price = format_money(purchase_df['Price'].mean())

num_purchases = purchase_df['Purchase ID'].count()

total_rev = format_money(purchase_df['Price'].sum())

total_summary_df = pd.DataFrame({'Number of Unique Items': items,
                          'Average Price': ave_price,
                          'Number of Purchases': num_purchases, 
                          'Total Revenue': total_rev}, index=[0])

total_summary_df

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,183,$3.05,780,"$2,379.77"


## Gender Demographics

In [6]:
# added age to keep it as column for use later
purchasers_df = purchase_df[['Gender', 'SN', 'Age']].drop_duplicates()

gender_s = purchasers_df['Gender'].value_counts()

percent_s = (gender_s / gender_s.sum()).map(format_percent)

gender_demographics_df = pd.DataFrame({'Total Count': gender_s, 
             'Percentage of Players': percent_s})

gender_demographics_df

Unnamed: 0,Total Count,Percentage of Players
Male,484,84.03 %
Female,81,14.06 %
Other / Non-Disclosed,11,1.91 %



## Purchasing Analysis (Gender)

In [7]:
gender_gb = purchase_df.groupby('Gender')

purchase_ct_s = gender_gb['Purchase ID'].count()

ave_price_s = gender_gb['Price'].mean().map(format_money)

total_value_s = gender_gb['Price'].sum()
total_value_s_formatted = total_value_s.map(format_money)

# dividing by number of individuals since some individuals made more than one purchase (normalized)
ave_value_per_person_s = (total_value_s / gender_demographics_df['Total Count']).map(format_money)

pd.DataFrame({'Purchase_Count': purchase_ct_s,
             'Average Purchase Price': ave_price_s,
             'Total Purchase Value': total_value_s_formatted, 
             'Avg Total Purchase Per Person': ave_value_per_person_s})

Unnamed: 0_level_0,Purchase_Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase Per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,$3.20,$361.94,$4.47
Male,652,$3.02,"$1,967.64",$4.07
Other / Non-Disclosed,15,$3.35,$50.19,$4.56


## Age Demographics

In [8]:
age_bins = [0, 9, 14, 19, 24, 29, 34, 39, 150]
age_labels = ['<10', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39', '40+']

purchasers_df['Age Range'] = pd.cut(purchasers_df['Age'], age_bins, labels=age_labels)

purchasers_per_range_s = purchasers_df['Age Range'].value_counts()

total_purchasers = purchasers_df['Gender'].count() 

perc_players_s = (purchasers_per_range_s / total_purchasers).map(format_percent)

pd.DataFrame({'Total Count': purchasers_per_range_s,
             'Percentage of Players': perc_players_s}).sort_index()

Unnamed: 0,Total Count,Percentage of Players
<10,17,2.95 %
10-14,22,3.82 %
15-19,107,18.58 %
20-24,258,44.79 %
25-29,77,13.37 %
30-34,52,9.03 %
35-39,31,5.38 %
40+,12,2.08 %


## Purchasing Analysis (Age)

In [9]:
purchase_df['Age Range'] = pd.cut(purchase_df['Age'], age_bins, labels=age_labels)

age_range_gb = purchase_df.groupby('Age Range')

purchase_ct_s = age_range_gb['Purchase ID'].count()

ave_price_s = age_range_gb['Price'].mean().map(format_money)

total_value_s = age_range_gb['Price'].sum()
total_value_s_formatted = total_value_s.map(format_money)

# normalized
ave_value_per_person_s = (total_value_s / purchasers_per_range_s.sort_index()).map(format_money)

pd.DataFrame({'Purchase Count': purchase_ct_s,
             'Average Purchase Price': ave_price_s,
             'Total Purchase Value': total_value_s_formatted,
             'Avg Total Purchase Per Person': ave_value_per_person_s })

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase Per Person
Age Range,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
<10,23,$3.35,$77.13,$4.54
10-14,28,$2.96,$82.78,$3.76
15-19,136,$3.04,$412.89,$3.86
20-24,365,$3.05,"$1,114.06",$4.32
25-29,101,$2.90,$293.00,$3.81
30-34,73,$2.93,$214.00,$4.12
35-39,41,$3.60,$147.67,$4.76
40+,13,$2.94,$38.24,$3.19


## Top Spenders

In [10]:
sn_gb = purchase_df.groupby('SN')

purchase_ct_s = sn_gb['Purchase ID'].count()

ave_price_s = sn_gb['Price'].mean()
total_price_s = sn_gb['Price'].sum()

top_spenders_df = pd.DataFrame({
                            'Purchase Count': purchase_ct_s,
                            'Average Purchase Price': ave_price_s,
                            'Total Purchase Value': total_price_s
                        })

top_spenders_df_sorted = top_spenders_df.sort_values('Total Purchase Value', ascending=False)

top_spenders_df_sorted['Average Purchase Price'] = top_spenders_df_sorted['Average Purchase Price'].map('${:,.2f}'.format)
top_spenders_df_sorted['Total Purchase Value'] = top_spenders_df_sorted['Total Purchase Value'].map('${:.2f}'.format)

top_spenders_df_sorted.head()

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,$3.79,$18.96
Idastidru52,4,$3.86,$15.45
Chamjask73,3,$4.61,$13.83
Iral74,4,$3.40,$13.62
Iskadarya95,3,$4.37,$13.10


## Most Popular Items

In [11]:
# average_item_purchase = item_data.groupby(['Item ID', 'Item Name']).mean()['Price']

items_df = purchase_df.loc[:, ['Item ID', 'Item Name', 'Price']]
items_gb = items_df.groupby(['Item ID', 'Item Name'])

# don't format these here b/c will sort them differently in the next block
purchase_count_s = items_gb['Price'].count()
item_price_s = items_gb['Price'].mean()
total_value_s = items_gb['Price'].sum()

items_summary_df = pd.DataFrame({
                                'Purchase Count': purchase_count_s,
                                'Item Price': item_price_s,
                                'Total Purchase Price': total_value_s
                                })

items_summary_df_sorted_count = items_summary_df.sort_values('Purchase Count', ascending=False)

items_summary_df_sorted_count['Item Price'] = items_summary_df_sorted_count['Item Price'].map('${:.2f}'.format)
items_summary_df_sorted_count['Total Purchase Price'] = items_summary_df_sorted_count['Total Purchase Price'].map('${:.2f}'.format)

items_summary_df_sorted_count.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Price
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
145,Fiery Glass Crusader,9,$4.58,$41.22
108,"Extraction, Quickblade Of Trembling Hands",9,$3.53,$31.77
82,Nirvana,9,$4.90,$44.10
19,"Pursuit, Cudgel of Necromancy",8,$1.02,$8.16


## Most Profitable Items

In [12]:
items_summary_df_sorted_value = items_summary_df.sort_values('Total Purchase Price', ascending=False)

items_summary_df_sorted_value['Item Price'] = items_summary_df_sorted_value['Item Price'].map('${:.2f}'.format)
items_summary_df_sorted_value['Total Purchase Price'] = items_summary_df_sorted_value['Total Purchase Price'].map('${:.2f}'.format)

items_summary_df_sorted_value.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Price
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
82,Nirvana,9,$4.90,$44.10
145,Fiery Glass Crusader,9,$4.58,$41.22
92,Final Critic,8,$4.88,$39.04
103,Singed Scalpel,8,$4.35,$34.80
