### Heroes Of Pymoli Data Analysis
* Of the 1163 active players, the vast majority are male (84%). There also exists, a smaller, but notable proportion of female players (14%).

* Our peak age demographic falls between 20-24 (44.8%) with secondary groups falling between 15-19 (18.60%) and 25-29 (13.4%).  
-----

### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
file_to_load = r"purchase_data.csv"
# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)

purchase_data.head(10)

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44
5,5,Yalae81,22,Male,81,Dreamkiss,3.61
6,6,Itheria73,36,Male,169,"Interrogator, Blood Blade of the Queen",2.18
7,7,Iskjaskst81,20,Male,162,Abyssal Shard,2.67
8,8,Undjask33,22,Male,21,Souleater,1.1
9,9,Chanosian48,35,Other / Non-Disclosed,136,Ghastly Adamantite Protector,3.58


## Player Count

In [2]:
#total number of players in dataset
player_count = len(purchase_data["SN"].unique())

## Purchasing Analysis (Total)

In [3]:
#total number of unique items
item_count = len(purchase_data["Item Name"].unique())

#average price of items
avg_price = round(purchase_data['Price'].mean(),2)

#total number of purchases
purchase_count = len(purchase_data['Purchase ID'])

#total revenue
total_rev = round(purchase_data['Price'].sum(),2)

#summary table 
purchasing_df = pd.DataFrame({"Total Players": [player_count],
             "Item Count": [item_count],
             "Average Price": [avg_price],
             "Total Purchases": [purchase_count],
             "Total Revenues": [total_rev],})
purchasing_df

Unnamed: 0,Total Players,Item Count,Average Price,Total Purchases,Total Revenues
0,576,179,3.05,780,2379.77


## Gender Demographics

In [4]:
gender_sort = purchase_data.groupby("Gender")
gender_count = gender_sort.nunique()["SN"]
gender_percent = gender_count.apply(lambda x: (x/player_count)*100)
pd.concat([gender_count,gender_percent],axis = 1)

Unnamed: 0_level_0,SN,SN
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,81,14.0625
Male,484,84.027778
Other / Non-Disclosed,11,1.909722



## Purchasing Analysis (Gender)

In [5]:
purch_ct_gen = pd.DataFrame(gender_sort['Purchase ID'].count())
avg_price_gen = round(pd.DataFrame(gender_sort['Price'].mean()),2)
#average purchase total per person
sum_price = pd.DataFrame(gender_sort['Price'].sum())
#create summary table + format
gender_df = pd.merge(pd.merge(purch_ct_gen,avg_price_gen,on = "Gender"),sum_price,on="Gender",
                     suffixes = ["_Average","_Total"])
gender_df

Unnamed: 0_level_0,Purchase ID,Price_Average,Price_Total
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Female,113,3.2,361.94
Male,652,3.02,1967.64
Other / Non-Disclosed,15,3.35,50.19


## Age Demographics

In [6]:
# purchase_data.Age.max()
# purchase_data.Age.min()
# #bins should be from 5 to 50 

In [7]:
bins = [5,14,23,32,41,50]
labels = ['5-14','15-23','24-32','33-41','42-50']
age_int = pd.cut(purchase_data.Age,bins, labels = labels)
bins_ct = age_int.value_counts()
bins_pct = round(bins_ct.apply(lambda x: (x)/player_count)*100,2)
#create summary table
age_summary = pd.concat([bins_ct,bins_pct],axis=1,sort=True)
age_summary

Unnamed: 0,Age,Age.1
5-14,51,8.85
15-23,434,75.35
24-32,218,37.85
33-41,72,12.5
42-50,5,0.87


## Purchasing Analysis (Age)

In [8]:
age_sort = purchase_data.assign(Ages = age_int)
purch_ct_age = age_sort.groupby('Ages')['Purchase ID'].count()
avg_price_age = round(age_sort.groupby('Ages')['Price'].mean(),2)
sum_price_age = age_sort.groupby('Ages')['Price'].sum()
purchase_summary = pd.concat([purch_ct_age,avg_price_age,sum_price_age], axis = 1)
purchase_summary

Unnamed: 0_level_0,Purchase ID,Price,Price
Ages,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
5-14,51,3.14,159.91
15-23,434,3.03,1316.73
24-32,218,3.03,660.28
33-41,72,3.16,227.86
42-50,5,3.0,14.99


## Top Spenders

In [9]:
temp = purchase_data[['SN','Price']].groupby('SN').sum().sort_values('Price', ascending = False).head(5)
mask = list(temp.index)
spender_data = purchase_data.set_index('SN').loc[mask]
purch_ct_spend = spender_data.groupby('SN')['Purchase ID'].count()
avg_price_spend = round(spender_data.groupby('SN')['Price'].mean(),2)
purch_sum_spend = spender_data.groupby('SN')['Price'].sum()
#summary table
spender_summary = pd.concat([purch_ct_spend,avg_price_spend,purch_sum_spend],axis=1)
spender_summary

Unnamed: 0_level_0,Purchase ID,Price,Price
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Chamjask73,3,4.61,13.83
Idastidru52,4,3.86,15.45
Iral74,4,3.4,13.62
Iskadarya95,3,4.37,13.1
Lisosia93,5,3.79,18.96


## Most Popular Items

In [10]:
temp2 = purchase_data[['Item ID','Item Name','Price']].groupby('Item Name').sum().sort_values('Price', ascending = False).head(5)
mask2 = list(temp2.index)
item_data = purchase_data.set_index('Item Name').loc[mask2]
purch_ct_item = item_data.groupby('Item Name')['Purchase ID'].count()

#Took the average price for items instead of pulling price because there are two different prices for Final Critic

avg_price_item = round(item_data.groupby('Item Name')['Price'].mean(),2)
purch_sum_item = item_data.groupby('Item Name')['Price'].sum()
#summary table 
pd.concat([purch_ct_item,avg_price_item,purch_sum_item],axis=1)

Unnamed: 0_level_0,Purchase ID,Price,Price
Item Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Fiery Glass Crusader,9,4.58,41.22
Final Critic,13,4.61,59.99
Nirvana,9,4.9,44.1
"Oathbreaker, Last Hope of the Breaking Storm",12,4.23,50.76
Singed Scalpel,8,4.35,34.8
