In [1]:
#import dependencies
import os
import csv
import pandas as pd
import numpy as np

In [2]:
#load csv file and create a pandas dataframe from it's data.
PurchaseData_file = "Resources/purchase_data.csv"
PurchaseData_df = pd.read_csv(PurchaseData_file)
# PurchaseData_df.head()

In [3]:
#Display total player count

#calculate amount of unique players by using x = len(DataFrame["column_name"])
UniquePlayersCount = len(PurchaseData_df["SN"].unique())
#print(UniquePlayersCount)

# #create a data frame with the amount of unique players listed in a column called "Total Players"
Data_Table = pd.DataFrame({"Total Players": [UniquePlayersCount]})
Data_Table

Unnamed: 0,Total Players
0,576


In [4]:
# Number of Unique Items
UniqueItems = len(PurchaseData_df["Item Name"].unique())

# Total Number of Purchases
AmountOfPurchases = len(PurchaseData_df["Purchase ID"])

# Total Revenue
TotalRevenue = PurchaseData_df["Price"].sum()

# Average Purchase Price
AveragePurchasePrice = TotalRevenue / AmountOfPurchases

#create Financials Data Frame
Financials_df = pd.DataFrame({
    "Number of Unique Items": [UniqueItems],
    "Average Price": [AveragePurchasePrice],
    "Number of Purchases": [AmountOfPurchases],
    "Total Revenue": [TotalRevenue]
    })

#format Average Price and Total Revenue columns
Financials_df["Average Price"] = Financials_df["Average Price"].map("${:.2f}".format)
Financials_df["Total Revenue"] = Financials_df["Total Revenue"].map("${:,.2f}".format)

#display the Financials summary
Financials_df

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,$3.05,780,"$2,379.77"


In [5]:
#calculate amount of unique players by using x = len(DataFrame["column_name"])
UniquePlayersCount = len(PurchaseData_df["SN"].unique())

#isolate lists of players by gender
MalePlayers = PurchaseData_df.loc[(PurchaseData_df["Gender"] == "Male"), :]
FemalePlayers = PurchaseData_df.loc[(PurchaseData_df["Gender"] == "Female"), :]
OtherPlayers = PurchaseData_df.loc[(PurchaseData_df["Gender"] == "Other / Non-Disclosed"), :]

#isolate lists of unique players by gender
UniqueMalePlayers = MalePlayers["SN"].unique()
UniqueFemalePlayers = FemalePlayers["SN"].unique()
UniqueOtherPlayers = OtherPlayers["SN"].unique()

#populate a dictionary of gender demographic data  
gender_demographic_data = [[len(UniqueMalePlayers), ((int(len(UniqueMalePlayers)) / int(UniquePlayersCount)) * 100)],
        [len(UniqueFemalePlayers), ((int(len(UniqueFemalePlayers)) / int(UniquePlayersCount)) * 100)], 
        [len(UniqueOtherPlayers), ((int(len(UniqueOtherPlayers)) / int(UniquePlayersCount)) * 100)]]

#create a data frame using the gender demographic data dictionary 
GenderDemographics_df = pd.DataFrame(gender_demographic_data, columns = ['Total Count', 'Percentage of Players'], index = ['Male', 'Female', 'Other / Non-Disclosed'])

#set percentage format
GenderDemographics_df["Percentage of Players"] = GenderDemographics_df["Percentage of Players"].map("{:.2f}%".format)

#display the Gender demographic summary
GenderDemographics_df

Unnamed: 0,Total Count,Percentage of Players
Male,484,84.03%
Female,81,14.06%
Other / Non-Disclosed,11,1.91%


In [6]:
#Find sum of all purchases made by each gender category.
Male_df = PurchaseData_df.loc[PurchaseData_df["Gender"] == "Male", :]
MaleSum = Male_df["Price"].sum()
Female_df = PurchaseData_df.loc[PurchaseData_df["Gender"] == "Female", :]
FemaleSum = Female_df["Price"].sum()
Other_df = PurchaseData_df.loc[PurchaseData_df["Gender"] == "Other / Non-Disclosed", :]
OtherSum = Other_df["Price"].sum()

#find the average purchase price of purchases made by each gender category.
MaleAverage = MaleSum / len(MalePlayers)
FemaleAverage = FemaleSum / len(FemalePlayers)
OtherAverage = OtherSum / len(OtherPlayers)

#find the average total purchase price per unique person of each gender category
UniqueMaleAverage = MaleSum / len(UniqueMalePlayers)
UniqueFemaleAverage = FemaleSum / len(UniqueFemalePlayers)
UniqueOtherAverage = OtherSum / len(UniqueOtherPlayers)

#create gender analysis data dictionary based on values above
gender_analysis_data = [['Female', len(FemalePlayers), FemaleAverage, FemaleSum, UniqueFemaleAverage],
                        ['Male', len(MalePlayers), MaleAverage, MaleSum, UniqueMaleAverage],
                        ['Other / Non-Disclosed', len(OtherPlayers), OtherAverage, OtherSum, UniqueOtherAverage]]

# create gender analysis data frame
GenderAnalysis_df = pd.DataFrame(gender_analysis_data, columns = ['Gender', 'Purchase Count', "Average Purchase Price", 'Total Purchase Value', 'Average Total Purchase Per Person'])

#format columns to correct number formats
GenderAnalysis_df["Total Purchase Value"] = GenderAnalysis_df["Total Purchase Value"].map("${:.2f}".format)
GenderAnalysis_df["Average Purchase Price"] = GenderAnalysis_df["Average Purchase Price"].map("${:.2f}".format)
GenderAnalysis_df["Average Total Purchase Per Person"] = GenderAnalysis_df["Average Total Purchase Per Person"].map("${:.2f}".format)

#group by Gender
grouped_GenderAnalysis_df = GenderAnalysis_df.set_index("Gender")

#display Purchasing Analysis (Gender)
grouped_GenderAnalysis_df

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Average Total Purchase Per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,$3.20,$361.94,$4.47
Male,652,$3.02,$1967.64,$4.07
Other / Non-Disclosed,15,$3.35,$50.19,$4.56


In [77]:
#create bins and names for the bins
bins = [-1, 9, 14, 19, 24, 29, 34, 39,  max(PurchaseData_df["Age"])]
groupnames = [">10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]

#add the age ranges column to the PurchaseData data frame. 
PurchaseData_df["Age Ranges"] = pd.cut(PurchaseData_df["Age"], bins, labels=groupnames)

#calculate and assign Under 10 data
Under10Purchases_df = PurchaseData_df.loc[PurchaseData_df["Age Ranges"] == ">10", :]
Under10PurchaseSum = Under10Purchases_df["Price"].sum()
Under10PurchaseCount = Under10Purchases_df["Purchase ID"].count()

#calculate and assign 10-14 data
TenToFourteenPurchases_df = PurchaseData_df.loc[PurchaseData_df["Age Ranges"] == "10-14", :]
TenToFourteenPurchaseSum = TenToFourteenPurchases_df["Price"].sum()
TenToFourteenPurchaseCount = TenToFourteenPurchases_df["Price"].count()

#calculate and assign 15-19 data
FifteenToNineteenPurchases_df = PurchaseData_df.loc[PurchaseData_df["Age Ranges"] == "15-19", :]
FifteenToNineteenPurchaseSum = FifteenToNineteenPurchases_df["Price"].sum()
FifteenToNineteenPurchaseCount = FifteenToNineteenPurchases_df["Price"].count()

#calculate and assign 20-24 data
TwentyToTwentyFourPurchases_df = PurchaseData_df.loc[PurchaseData_df["Age Ranges"] == "20-24", :]
TwentyToTwentyFourPurchaseSum = TwentyToTwentyFourPurchases_df["Price"].sum()
TwentyToTwentyFourPurchaseCount = TwentyToTwentyFourPurchases_df["Price"].count()

#calculate and assign 25-29 data
TwentyFiveToTwentyNinePurchases_df = PurchaseData_df.loc[PurchaseData_df["Age Ranges"] == "25-29", :]
TwentyFiveToTwentyNinePurchaseSum = TwentyFiveToTwentyNinePurchases_df["Price"].sum()
TwentyFiveToTwentyNinePurchaseCount = TwentyFiveToTwentyNinePurchases_df["Price"].count()

#calculate and assign 30-34 data
ThirtyToThirtyFourPurchases_df = PurchaseData_df.loc[PurchaseData_df["Age Ranges"] == "30-34", :]
ThirtyToThirtyFourPurchaseSum = ThirtyToThirtyFourPurchases_df["Price"].sum()
ThirtyToThirtyFourPurchaseCount = ThirtyToThirtyFourPurchases_df["Price"].count()

#calculate and assign 34-39 data
ThirtyFiveToThirtyNinePurchases_df = PurchaseData_df.loc[PurchaseData_df["Age Ranges"] == "35-39", :]
ThirtyFiveToThirtyNinePurchaseSum = ThirtyFiveToThirtyNinePurchases_df["Price"].sum()
ThirtyFiveToThirtyNinePurchaseCount = ThirtyFiveToThirtyNinePurchases_df["Price"].count()

#calculate and assign 40+ data
FortyPlusPurchases_df = PurchaseData_df.loc[PurchaseData_df["Age Ranges"] == "40+", :]
FortyPlusPurchaseSum = FortyPlusPurchases_df["Price"].sum()
FortyPlusPurchaseCount = FortyPlusPurchases_df["Price"].count()

# FifteenToNineteenPurchases_df
print(FortyPlusPurchaseCount)

# Data = [[">10", Under10PurchaseCount, 0.00, Under10PurchaseSum, 0.00],
#         ["10-14", TenToFourteenPurchaseCount, 0.00, TenToFourteenPurchaseSum, 0.00],
#         ["15-19", FifteenToNineteenPurchaseCount, 0.00, FifteenToNineteenPurchaseSum, 0.00],
#         ["20-24", TwentyToTwentyFourPurchaseCount, 0.00, TwentyToTwentyFourPurchaseSum, 0.00],
#         ["25-29", TwentyFiveToTwentyNinePurchaseCount, 0.00, TwentyFiveToTwentyNinePurchaseSum, 0.00],
#         ["30-34", ThirtyToThirtyFourPurchaseCount, 0.00, ThirtyToThirtyFourPurchaseSum, 0.00],
#         ["35-39", ThirtyFiveToThirtyNinePurchaseCount, 0.00, ThirtyFiveToThirtyNinePurchaseSum, 0.00],
#         ["40+", FortyPlusPurchaseCount, 0.00, FortyPlusPurchaseSum, 0.00]
#        ]

# # create gender analysis data frame
# PurchasingAnalysis_df = pd.DataFrame(Data, columns = ["Age Ranges", 'Purchase Count', "Average Purchase Price", 'Total Purchase Value', 'Average Total Purchase Per Person'])
# PurchasingAnalysis_df

# #format columns to correct number formats
# PurchasingAnalysis_df["Total Purchase Value"] = PurchasingAnalysis_df["Total Purchase Value"].map("${:.2f}".format)
# PurchasingAnalysis_df["Average Purchase Price"] = PurchasingAnalysis_df["Average Purchase Price"].map("${:.2f}".format)
# PurchasingAnalysis_df["Average Total Purchase Per Person"] = PurchasingAnalysis_df["Average Total Purchase Per Person"].map("${:.2f}".format)

# PurchasingAnalysis_df


13
