### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [15]:
# PART 1: DEPENDENCIES & SET UP
# Import Pandas
import pandas as pd
import re

# Load in CSV file
csvfile = "Resources/purchase_data.csv"

# Read purchase_data.csv and store into Pandas dataframe called purchase_data_df
purchase_data_df = pd.read_csv(csvfile)
purchase_data_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


## Player Count

* Display the total number of players


In [16]:
# PART 2: ANALYZE ALL PLAYERS 

#Define index & find total number of unique players (num_unique_players)
#Source: https://www.kite.com/python/answers/how-to-count-the-number-of-rows-in-a-pandas-dataframe-in-python
#I don't know why index works
overall_index = purchase_data_df.index
num_unique_players = purchase_data_df['SN'].nunique()

#Display total
print(num_unique_players)

576


## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, total # of purchases, total revenue


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [24]:
#Count unique items in Index 5
unique_items = purchase_data_df['Item Name'].nunique()

#Find average purchase price
average_price = purchase_data_df['Price'].mean(axis=0, skipna=None, level=None, numeric_only=None)

#Count Total Number of Purchase
total_purchases = len(overall_index)

# Sum price column to get total revenue
total_revenue = purchase_data_df['Price'].sum()

# Store summary analysis in a dataframe called purchase_summary_df
purchase_summary_df = pd.DataFrame(
    {"Number of Unique Items" :[unique_items], "Average Price": [average_price], "Number of Purchases": [total_purchases], "Total Revenue": [total_revenue]})

#Display purchase_summary_df as stylized table
purchase_summary_df.style

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,3.050987,780,2379.77


## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [66]:
# PART 3: ANALYZE GENDER DEMOGRAPHICS & PURCHASING HABITS

# Create a group for gender and sub-groups for female, male, and other / non-disclosed
gender_data = purchase_data_df.groupby('Gender')
female_players_df = gender_data.get_group('Female')
male_players_df = gender_data.get_group('Male')
nb_players_df = gender_data.get_group('Other / Non-Disclosed')

# Create indexes for each gender group
female_index = female_players_df.index
male_index = male_players_df.index
nb_index = nb_players_df.index

# PART 3A: Display Basic Demographics
# Count unique individuals for each gender
num_unique_females = female_players_df['SN'].nunique()
num_unique_males = male_players_df['SN'].nunique()
num_unique_nb = nb_players_df['SN'].nunique()

# Calculate percentage of each gender over total individual players
percent_female = round((100* num_unique_females / num_unique_players), 2)
percent_male = round((100* num_unique_males / num_unique_players), 2)
percent_nb = round((100* num_unique_nb / num_unique_players), 2)

print(percent_female)

# Display summary dataframe of count and percent of genders / total
gender_demographic_df = pd.DataFrame(
    { "Gender": ["Female", "Male", "Other / Non-Disclosed"], 
     "Total Count": [num_unique_females, num_unique_males, num_unique_nb], 
     "Percentage of Players": [percent_female, percent_male, percent_nb]})

gender_demographic_df

14.06


Unnamed: 0,Gender,Total Count,Percentage of Players
0,Female,81,14.06
1,Male,484,84.03
2,Other / Non-Disclosed,11,1.91



## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [67]:
#PART 3B: Analyze FEMALE players
# Calculate average purchase price for female players
average_price_female = female_players_df['Price'].mean(axis=0, skipna=None, level=None, numeric_only=None)

# Count Total Number of Purchase
total_purchases_female = len(female_index)

# Sum price column to get total revenue
total_rev_female = female_players_df['Price'].sum()

# Average purchase total per person

# for i,row in enumerate(female_index)
#     female_SN = female_players_df(row[2])
#     for female_SN in list(unique_females.keys()):
#         f_percentage = (float(unique_females[female_SN]) / num_unique_players)*100
        
# print(f_percentage)

In [68]:
# PART 3C: Analyze MALE players

#Find average purchase price for male players
average_price_male = male_players_df['Price'].mean(axis=0, skipna=None, level=None, numeric_only=None)

# Count Total Number of Purchase
total_purchases_male = len(male_index)

# Sum price column to get total revenue
total_rev_male = male_players_df['Price'].sum()

# Average purchase total per male person

In [69]:
# PART 3D: Analyze OTHER / NON-SPECIFIED players
#Find average purchase price for male players
average_price_nb = nb_players_df['Price'].mean(axis=0, skipna=None, level=None, numeric_only=None)

# Count Total Number of Purchase
total_purchases_nb = len(nb_index)

# Sum price column to get total revenue
total_rev_nb = nb_players_df['Price'].sum()

# Average purchase total per person


In [70]:
# PART 4E: Create & Display Summary Analysis
# Set data frame to gender_summary_df
gender_summary_df = pd.DataFrame(
    { "Gender": ["Female", "Male", "Other / Non-specified"], 
     "Purchase Count": [total_purchases_female, total_purchases_male, total_purchases_nb], 
     "Average Price": [average_price_female, average_price_male, average_price_nb], 
     "Total Purchase Value" : [total_rev_female, total_rev_male, total_rev_nb]})

gender_summary_df

Unnamed: 0,Gender,Purchase Count,Average Price,Total Purchase Value
0,Female,113,3.203009,361.94
1,Male,652,3.017853,1967.64
2,Other / Non-specified,15,3.346,50.19


## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


In [29]:
# PART 5: AGE DEMOGRAPHICS

# age_demographics_df
print(purchase_data_df["Age"].max())
print(purchase_data_df["Age"].min())

age_bins = [0, 9, 14, 19, 24, 29, 34, 39, 44, 49]

# Create labels for these bins
age_labels = ["0-10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40-44", "45-49"]

# Slice age group and place into age_bins
pd.cut(purchase_data_df["Age"], age_bins, labels=age_labels).head()

45
7


0    20-24
1    40-44
2    20-24
3    20-24
4    20-24
Name: Age, dtype: category
Categories (9, object): ['0-10' < '10-14' < '15-19' < '20-24' ... '30-34' < '35-39' < '40-44' < '45-49']

In [30]:
purchase_data_df["Age Group"] = pd.cut(purchase_data_df["Age"], age_bins, labels=age_labels)
purchase_data_df.head()

age_group = purchase_data_df.groupby("Age Group")
print(age_group["SN"].count())

Age Group
0-10      23
10-14     28
15-19    136
20-24    365
25-29    101
30-34     73
35-39     41
40-44     12
45-49      1
Name: SN, dtype: int64


In [31]:
purchase_data_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price,Age Group
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53,20-24
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56,40-44
2,2,Ithergue48,24,Male,92,Final Critic,4.88,20-24
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27,20-24
4,4,Iskosia90,23,Male,131,Fury,1.44,20-24


In [34]:
age_group_count = age_group['SN'].count()
age_group_count

Age Group
0-10      23
10-14     28
15-19    136
20-24    365
25-29    101
30-34     73
35-39     41
40-44     12
45-49      1
Name: SN, dtype: int64

In [41]:
age_group_percentage = (round(age_group_count / num_unique_players)*100)
age_group_percentage

Age Group
0-10       0.0
10-14      0.0
15-19      0.0
20-24    100.0
25-29      0.0
30-34      0.0
35-39      0.0
40-44      0.0
45-49      0.0
Name: SN, dtype: float64

## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [None]:
# age_summary_df

## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [None]:
# top_spenders_df

## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, average item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [None]:
# popular_items_df

## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame

