In [104]:
import pandas as pd

In [105]:
fpath = "Resources/purchase_data.csv"
hop_df = pd.read_csv(fpath,encoding = 'latin1')
hop_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


## Player Count

In [106]:
player_list = hop_df[['SN','Gender']].drop_duplicates()
player_count = len(player_list['SN'])
#print(f'Total number of players: {player_count}')
pc_df = pd.DataFrame([{"Total Players":player_count}])
pc_df

Unnamed: 0,Total Players
0,576


## Purchasing Analysis (Total)
* Number Unique Items
* Average Purchase Price
* Total Number of Purchases
* Total Revenue

In [107]:
item_count = hop_df['Item Name'].nunique()
# print(f'Number of Unique Items:{item_count}')

ave_purch = hop_df['Price'].mean()
# print(f'Average Purchase Price: ${ave_purch:.2f}')

tot_purch_count = hop_df['Purchase ID'].nunique()
# print(f'Total Number of Purchases: {tot_purch_count}')

tot_rev = hop_df['Price'].sum()
# print(f'Total Revenue: ${tot_rev:.2f}')

purch_analysis = {"Num of Unique Items":item_count,
                  "Average Price":ave_purch,
                  "Num of Purchases":tot_purch_count,
                  "Total Revenue": tot_rev}
pa_df = pd.DataFrame(purch_analysis,index = [0])
pa_df

Unnamed: 0,Num of Unique Items,Average Price,Num of Purchases,Total Revenue
0,179,3.050987,780,2379.77


### Gender Demographics

* Percentage and Count of Male Players
* Percentage and Count of Female Players
* Percentage and Count of Other / Non-Disclosed

In [175]:
pcount_m = player_list['Gender'].value_counts()['Male']
pcount_f = player_list['Gender'].value_counts()['Female']
pcount_o = player_list['Gender'].value_counts()['Other / Non-Disclosed']

perc_m = round(pcount_m/player_count,4)
perc_f = round(pcount_f/player_count,4)
perc_o = round(pcount_o/player_count,4)

gender_dem = {"Count":[pcount_f,pcount_m,pcount_o],
              "Percentage of Players":[perc_f,perc_m,perc_o]}
gd_df = pd.DataFrame(gender_dem, index = ["Female","Male","Other/Non-Disclosed"])
gd_df["Percentage of Players"] = gd_df["Percentage of Players"].map('{:.2%}'.format)
gd_df

Unnamed: 0,Count,Percentage of Players
Female,81,14.06%
Male,484,84.03%
Other/Non-Disclosed,11,1.91%


### Purchasing Analysis (Gender)

* The below each broken by gender
  * Purchase Count
  * Average Purchase Price
  * Total Purchase Value
  * Average Purchase Total per Person by Gender

In [177]:
grouped_hop_df = hop_df.groupby(['Gender'])

purch_by_gend = grouped_hop_df['Gender'].count()
avepurch = grouped_hop_df['Price'].mean()
purchtotals = grouped_hop_df['Price'].sum()
avepurch_per = (purchtotals/gender_dem['Count'])    

pa_gen = {
    "Purchase Count":purch_by_gend,
    "Average Purchase": avepurch,
    "Total Purchase Value":purchtotals,
    "Avg Total Purchase per Person":avepurch_per
    }

summ_pagen_df = pd.DataFrame(pa_gen,index = ["Female","Male","Other / Non-Disclosed"])
summ_pagen_df = summ_pagen_df.style.format({"Total Purchase Value":"${:,.2f}", "Avg Total Purchase per Person":"${:,.2f}"})
summ_pagen_df


Unnamed: 0,Purchase Count,Average Purchase,Total Purchase Value,Avg Total Purchase per Person
Female,113,3.203009,$361.94,$4.47
Male,652,3.017853,"$1,967.64",$4.07
Other / Non-Disclosed,15,3.346,$50.19,$4.56


### Age Demographics

* The below each broken into bins of 4 years (i.e. &lt;10, 10-14, 15-19, etc.)
  * Purchase Count
  * Average Purchase Price
  * Total Purchase Value
  * Average Purchase Total per Person by Age Group

### Purchasing Analysis (age)

* The below each broken into bins of 4 years (i.e. &lt;10, 10-14, 15-19, etc.)
  * Purchase Count
  * Average Purchase Price
  * Total Purchase Value
  * Average Purchase Total per Person by Age

### Top Spenders

* Identify the the top 5 spenders in the game by total purchase value, then list (in a table):
  * SN
  * Purchase Count
  * Average Purchase Price
  * Total Purchase Value

### Most Popular Items

* Identify the 5 most popular items by purchase count, then list (in a table):
  * Item ID
  * Item Name
  * Purchase Count
  * Item Price
  * Total Purchase Value

### Most Profitable Items

* Identify the 5 most profitable items by total purchase value, then list (in a table):
  * Item ID
  * Item Name
  * Purchase Count
  * Item Price
  * Total Purchase Value