In [2]:
#Import the dependencies required as well as the resource file 
import pandas as pd 
import numpy as np 

In [3]:
resource = "Resources/purchase_data.csv"
purchasedata = pd.read_csv(resource)
purchasedata.head(4);

# Player Count

In [4]:
#Find the total number of unique players within the "SN" column to find the total number of players
total_players = len(purchasedata["SN"].value_counts())

#Create a dataframe using "pd.DataFrame"
total_players_df = pd.DataFrame({
    "Total Players" : [total_players]
})

total_players_df

Unnamed: 0,Total Players
0,576


# Purchasing Analytics

In [8]:
#Find the number of unique items by running the "len" command on the "Item ID" column of the resource file
unique_items = len((purchasedata["Item ID"]).unique())


#The average price can be found by running the ".mean()" function on the "price column"
avgprice = purchasedata["Price"].mean()
## Format the infromation stored in the "avgprice" variable to increase readability
avgprice = "${:.2f}".format(avgprice)


#Thr ".count()" function will find the total of the rows in the "Purchase ID" column
num_purchases = purchasedata["Purchase ID"].count()


#The total revenue is found through the sum of all the values in the "Price" column
revenue = purchasedata["Price"].sum()
## Format the infromation stored in the "avgprice" variable to increase readability
revenue = "${:.2f}".format(revenue)


#Construct the dataframe for the information found
purchase_analytics_df = pd.DataFrame({
    "Number of Unique Items" : [unique_items],
    "Average Price" : [avgprice],
    "Number of Purchases" : [num_purchases],
    "Total Revenue" : [revenue]
})


#Display the dataframe
purchase_analytics_df

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,$3.05,780,$2379.77


# Gender Demographics 

In [12]:
#Collect the data in the "Gender" column and split the data in the resource file into variables based on the genders listed.
male_data = purchasedata[purchasedata.Gender == "Male"]
female_data = purchasedata[purchasedata.Gender == "Female"]
other_gender_data = purchasedata[purchasedata.Gender == "Other / Non-Disclosed"]


#Find the total number of unique players for each gender 
##Acheieved through running the "unique()" function on the "SN" column 
male_count = len(male_data.SN.unique())
female_count = len(female_data.SN.unique())
other_count = len(other_gender_data.SN.unique())


#Find the percentage of total players by dividing the count for each gender by the total number of players 
male_percent = (male_count/total_players)*100
female_percent = (female_count/total_players)*100
other_percent = (other_count/total_players)*100

#Format the percentages to increase readability 
male_percent = "{:.2f}%".format(male_percent)
female_percent = "{:.2f}%".format(female_percent)
other_percent = "{:.2f}%".format(other_percent)


#Construct a dataframe for the information collected 
gender_demographics = pd.DataFrame({
    "Total Unique Players (Count)": [male_count, female_count, other_count],
    "Percentage of the Total": [male_percent,female_percent,other_percent]},
    index = ["Male", "Female", "Other/ Non-Disclosed"])

#Display the dataframe
gender_demographics

Unnamed: 0,Total Unique Players (Count),Percentage of the Total
Male,484,84.03%
Female,81,14.06%
Other/ Non-Disclosed,11,1.91%


# Purchasing Analytics (Gender)

In [20]:
#Count of the total purchases made by each gender can be found by using the ".count()" function on the "Purchase ID" column
male_purchase_count = male_data["Purchase ID"].count()
female_purchase_count = female_data["Purchase ID"].count()
other_purchase_count = other_gender_data["Purchase ID"].count()


#Find the average purchase price for each gender by using the ".mean()" function on the "Price" column
avg_price_male = male_data["Price"].mean()
avg_price_female = female_data["Price"].mean()
avg_price_other = other_gender_data["Price"].mean()


#Total purchase value for each gender is found by using the ".sum()" function on the "Price" column
purchase_value_male = male_data["Price"].sum()
purchase_value_female = female_data["Price"].sum()
purchase_value_other = other_gender_data["Price"].sum()


#The avergage purchase for each individual based on their gender can be found by dividing "purchase_value_[GENDER]" by the toal number of players for each gender 
avg_per_person_male = purchase_value_male/male_count
avg_per_person_female = purchase_value_female/female_count
avg_per_person_other = purchase_value_other/other_count


#Create a dataframe for the information collected 
gender_purchasing_analytics_df = pd.DataFrame({
    "Total Number of Purchases" : [male_purchase_count,female_purchase_count,other_purchase_count],
    "Total Purchase Value": [purchase_value_male,purchase_value_female,purchase_value_other],
    "Average Purchase Price": [avg_price_male,avg_price_female,avg_price_other],
    "Average Total Purchase Price Per Person": [avg_per_person_male,avg_per_person_female,avg_per_person_other]},
    index = ["Male", "Female", "Other/ Non-Disclosed"])

#Format appropriate columns to enhance readability 
gender_purchasing_analytics_df = gender_purchasing_analytics_df.style.format({"Total Purchase Value":"${:,.2f}",
                                  "Average Purchase Price":"${:,.2f}",
                                  "Average Total Purchase Price Per Person":"${:,.2f}"})

#Provide the index row 
gender_purchasing_analytics_df.index.name = "Gender"

#Display the finalised dataframe 
gender_purchasing_analytics_df

Unnamed: 0_level_0,Total Number of Purchases,Total Purchase Value,Average Purchase Price,Average Total Purchase Price Per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Male,652,"$1,967.64",$3.02,$4.07
Female,113,$361.94,$3.20,$4.47
Other/ Non-Disclosed,15,$50.19,$3.35,$4.56


# Age Demographics 

In [34]:
#Create the bins required for this part of the analysis 
bins = [ 0, 9.99, 14.99, 19.99, 24.99, 29.99, 34.99, 39.99, 1000]


#Create the labels for the age groups 
age_groups = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]


#Append a new column to the original data called "Age Group" and sort the values in the "Age" column of the original data based on the bins and labels created above 
purchasedata["Age Group"] = pd.cut(purchasedata["Age"], bins, labels=age_groups)

#Run the "groupby" function on the "Age Group" column, creating a new dataframe that is sorted by age group
grouped_age = purchasedata.groupby("Age Group")
grouped_age

#The total number of players in each age group is found by using the ".nunique()" function on the "SN" column
total_players_age = grouped_age["SN"].nunique()


#Calculate the percentage of players for each category 
age_bins_percentage = (total_players_age/total_players)*100

#Create a dataframe from the information collected 
age_demographics_df = pd.DataFrame({
    "Total Players" : total_players_age,
    "Percentage of Players": age_bins_percentage
})

# Format percentage with two decimal places 
age_demographics_df = age_demographics_df.style.format({"Percentage of Players":"{:,.2f}%"})

#Display the data frame
age_demographics_df

Unnamed: 0_level_0,Total Players,Percentage of Players
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1
<10,17,2.95%
10-14,22,3.82%
15-19,107,18.58%
20-24,258,44.79%
25-29,77,13.37%
30-34,52,9.03%
35-39,31,5.38%
40+,12,2.08%
