In [114]:
# Dependencies and Setup
import pandas as pd
import numpy as np

# File to Load
purchase_csv = "purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_df = pd.read_csv(purchase_csv)

## Player Count

In [35]:
#Calculate the number of unique players in the data frame. Use the 'SN' column

player_count = len(purchase_df["SN"].unique())

#display the player count in a summary player count data frame

player_count_df = pd.DataFrame({"Total Players": [player_count]})
player_count_df

Unnamed: 0,Total Players
0,576


## Purchasing Analysis (Total)

In [36]:
#Run basic calculations to obtain number of unique items, average price, total # of purchases, Total Revenue

#Number of unique items is unique values in column 'Item ID'
item_count = len(purchase_df["Item ID"].unique())


#Average price is average of all prices in column 'Price'
avg_price = purchase_df["Price"].mean()


#Total # of purchases is the total number of rows in data frame. Can count 'Purchase ID' column
purchase_count = len(purchase_df["Purchase ID"])


#Total Revenue is sum of column 'Price'
total_revenue = purchase_df["Price"].sum()


In [37]:
#Create a Summary Data Frame to hold the results 

purchase_analysis_df = pd.DataFrame({"Number of Unique Items": [item_count],
                                    "Average Price": [avg_price],
                                    "Number of Purchases": [purchase_count],
                                    "Total Revenue": [total_revenue]})
purchase_analysis_df

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,3.050987,780,2379.77


# Gender Demographics

In [57]:
#Percentage and Count of 'Male' Players
#Percentage and Count of 'Female' Players
#Percentage and Count of 'Other / Non-Disclosed' Players

#Male Count and Percentage
total_male = purchase_df.groupby(['Gender']).get_group(('Male'))
unique_male_count = len(total_male["SN"].unique())
male_percent = round((unique_male_count/player_count)*100,2)

#Female Count and Percentage
total_female = purchase_df.groupby(['Gender']).get_group(('Female'))
unique_female_count = len(total_female["SN"].unique())
female_percent = round((unique_female_count/player_count)*100,2)

#Other Count and Percentage
total_other = purchase_df.groupby(['Gender']).get_group(('Other / Non-Disclosed'))
unique_other_count = len(total_other["SN"].unique())
other_percent = round((unique_other_count/player_count)*100,2)


In [63]:
#Summarize in Data Frame with Gender as a column then convert the Gender column to the Index

gender_demographics_df = pd.DataFrame ({"Total Count": [unique_male_count, unique_female_count, unique_other_count], 
                                        "Percentage of Players": [male_percent, female_percent, other_percent],
                                       "Gender": ["Male", "Female", "Other / Non-Disclosed"]})

gender_indexed_demographics_df = gender_demographics_df.set_index("Gender")
gender_indexed_demographics_df


Unnamed: 0_level_0,Total Count,Percentage of Players
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Male,484,84.03
Female,81,14.06
Other / Non-Disclosed,11,1.91


## Purchasing Analysis (Gender) 

In [90]:
#Unique count of individuals by gender
unique_gender_count =purchase_df.groupby("Gender")["SN"].nunique()

#Purchase count by gender 
gender_purchase_count = purchase_df.groupby("Gender")["Item ID"].count()

#Total value purchased by gender 
gender_total_purchase = purchase_df.groupby("Gender")["Price"].sum()


#Create data frame that's grouped by Gender and assign "Purchase Count" as first column
gender_df = gender_purchase_count.to_frame("Purchase Count")

#Add the Average purchase price by gender
gender_df["Average Purchase Price"] = round((gender_total_purchase/gender_purchase_count),2)

#Add the Total Purchase Value
gender_df["Total Purchase Value"] = round(gender_total_purchase,2)

#Add the Average Total Purchase
gender_df["Avg Total Purchase per Person"] = round((gender_total_purchase/unique_gender_count),2)
gender_df

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,3.2,361.94,4.47
Male,652,3.02,1967.64,4.07
Other / Non-Disclosed,15,3.35,50.19,4.56


# Age Demographics

In [121]:
# Create the bins in which Data will be held
bins = [0,9,14,19,24,29,34,39,46]

# Create the names for the eight bins
group_names = ["<10","10-14","15-19","20-24","25-29","30-34","35-39","40+"]


In [122]:
purchase_df["Age Summary"] = pd.cut(purchase_df["Age"],bins,labels=group_names)

In [118]:
purchase_df.columns

Index(['Purchase ID', 'SN', 'Age', 'Gender', 'Item ID', 'Item Name', 'Price',
       'Age Summary'],
      dtype='object')

In [123]:
#Total Counts and Percentages of Players should be based on unique count of 'SN' since there are dups

##Unique count of individuals by "Age Summary" group
unique_age_count = purchase_df.groupby("Age Summary")["SN"].nunique()

#Create data frame that's grouped by Age and assign "Total Count" as first column
age_bin_df = unique_age_count.to_frame("Total Count").sort_index()


#Add the Percentage of Players as a column
age_bin_df["Percentage of Players"] = round((unique_age_count/player_count)*100,2)



age_bin_df

Unnamed: 0_level_0,Total Count,Percentage of Players
Age Summary,Unnamed: 1_level_1,Unnamed: 2_level_1
<10,17,2.95
10-14,22,3.82
15-19,107,18.58
20-24,258,44.79
25-29,77,13.37
30-34,52,9.03
35-39,31,5.38
40+,12,2.08


## Purchasing Analysis (Age)