### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [92]:
# Dependencies and Setup
import pandas as pd
import os
from functools import reduce

# File to Load (Remember to Change These)
file_to_load =os.path.join('Resources', 'purchase_data.csv')

# Read Purchasing File and store into Pandas data frame
purchase_df = pd.read_csv(file_to_load)

purchase_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


## Player Count

* Display the total number of players


In [93]:
total_num_players=purchase_df['SN'].nunique()
print("Total Number of Players: " + str(total_num_players))

Total Number of Players: 576


## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [98]:
num_unique_items=purchase_df['Item ID'].nunique()


average_item_price=f"${(purchase_df['Price'].sum())/num_unique_items:,.2f}"

#This assumes that for each purchase there is an SN
total_num_purchases=purchase_df['SN'].count()

total_revenue=purchase_df['Price'].sum()


summary_dict={'unique_items': num_unique_items, 
              'average_price': average_item_price, 
              'total_purchase_num': total_num_purchases, 
              'revenue_total': total_revenue
             }


summary_df=pd.DataFrame(summary_dict, index=['results'])


summary_df



Unnamed: 0,unique_items,average_price,total_purchase_num,revenue_total
results,183,$13.00,780,2379.77


## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [None]:
gender_count_series=purchase_df.groupby('Gender')['SN'].nunique()
gender_percent_series=(purchase_df.groupby('Gender')['SN'].nunique())/total_num_players
print(gender_count_series)
print(gender_percent_series)





## Purchasing Analysis (Gender)

In [None]:
# Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender





gender_df = purchase_df.groupby('Gender').agg({'Price': ['count', 'mean', 'sum', 'max']})
gender_df.columns = ['puchase_count_by_gender', 'price_mean_by_gender', 'price_sum_by_gender', 'price_max_by_gender']
gender_df['average_per_person_by_gender']=(gender_df['price_sum_by_gender'])/gender_df['puchase_count_by_gender']



gender_df








In [None]:
player_df = purchase_df.groupby(['Gender', 'SN']).agg({'Price': ['count', 'mean', 'sum', 'max']})
player_df.columns = ['puchase_count_by_person', 'price_mean_by_person', 'price_sum_by_person', 'price_max_by_person']

player_df[]                                                


## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


In [None]:

bins=pd.cut(purchase_df['Age'],[5,10,15,20, 25, 30, 35, 40, 120], labels=['<10', '10-14', '15-19', '20-24', '25-29', '30-34', '35-40', '40+'], right=False)  


age_df=purchase_df.groupby(bins).agg({'Age': ['mean'], 'SN':['nunique'], 'Price': ['mean', 'min', 'max', 'sum']})


age_df.columns=['mean_age_in_range', 'num_SNs', 'price_average', 'lowest_price', 'highest_price', 'total_price']

age_df['percent_in_age_group']=(age_df['num_SNs']*age_df['price_average'])/total_num_players* 100

age_df







## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [95]:
total_price_sum=age_df['total_price'].sum()

age_df['per_purchase_total_by_age']=(age_df['total_price'])/total_price_sum * 100


age_df


Unnamed: 0_level_0,mean_age_in_range,num_SNs,price_average,lowest_price,highest_price,total_price,percent_in_age_group,per_purchase_total_by_age
Age,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
<10,7.869565,17,3.353478,1.29,4.93,77.13,9.897418,3.24107
10-14,11.392857,22,2.956429,1.03,4.94,82.78,11.291915,3.478487
15-19,16.794118,107,3.035956,1.01,4.91,412.89,56.397097,17.349996
20-24,21.838356,258,3.052219,1.0,4.99,1114.06,136.713984,46.813768
25-29,26.0,77,2.90099,1.0,4.94,293.0,38.780597,12.312114
30-34,31.383562,52,2.931507,1.02,4.93,214.0,26.464992,8.992466
35-40,36.707317,31,3.601707,1.6,4.91,147.67,19.384189,6.205222
40+,41.538462,12,2.941538,1.33,4.93,38.24,6.128205,1.606878


## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [96]:


top_spenders_df=purchase_df.groupby('SN').agg({'Item ID':['count'],'Price': ['mean', 'min', 'max', 'sum' ], })

top_spenders_df.columns=['num_purchases', 'average_per_person', 'min_per_person', 'max_per_person', 'total_per_person']
sorted_top_spenders=top_spenders_df.sort_values('total_per_person', ascending=False)
sorted_top_spenders

Unnamed: 0_level_0,num_purchases,average_per_person,min_per_person,max_per_person,total_per_person
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Lisosia93,5,3.792000,2.52,4.80,18.96
Idastidru52,4,3.862500,1.60,4.93,15.45
Chamjask73,3,4.610000,4.23,4.84,13.83
Iral74,4,3.405000,1.63,4.14,13.62
Iskadarya95,3,4.366667,4.03,4.90,13.10
...,...,...,...,...,...
Ililsasya43,1,1.020000,1.02,1.02,1.02
Irilis75,1,1.020000,1.02,1.02,1.02
Aidai61,1,1.010000,1.01,1.01,1.01
Chanirra79,1,1.010000,1.01,1.01,1.01


## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [97]:
most_pop_df=purchase_df.groupby(['Item ID', 'Item Name']).agg({'Item ID':['count'],'Price': ['sum' ] })

most_pop_df.columns=['purchase_count', 'price_total']

sorted_most_pop_df=most_pop_df.sort_values('purchase_count', ascending=False)
sorted_most_pop_df.head()







Unnamed: 0_level_0,Unnamed: 1_level_0,purchase_count,price_total
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1
178,"Oathbreaker, Last Hope of the Breaking Storm",12,50.76
145,Fiery Glass Crusader,9,41.22
108,"Extraction, Quickblade Of Trembling Hands",9,31.77
82,Nirvana,9,44.1
19,"Pursuit, Cudgel of Necromancy",8,8.16


## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame

