In [1]:
#import dependencies
import os
import csv
import pandas as pd
import numpy as np

In [2]:
#load csv file and create a pandas dataframe from it's data.
PurchaseData_file = "Resources/purchase_data.csv"
PurchaseData_df = pd.read_csv(PurchaseData_file)

In [3]:
#Display total player count

#calculate amount of unique players by using x = len(DataFrame["column_name"])
UniquePlayersCount = len(PurchaseData_df["SN"].unique())
#print(UniquePlayersCount)

# #create a data frame with the amount of unique players listed in a column called "Total Players"
Data_Table = pd.DataFrame({"Total Players": [UniquePlayersCount]})
Data_Table

Unnamed: 0,Total Players
0,576


In [4]:
# Number of Unique Items
UniqueItems = len(PurchaseData_df["Item Name"].unique())

# Total Number of Purchases
AmountOfPurchases = len(PurchaseData_df["Purchase ID"])

# Total Revenue
TotalRevenue = PurchaseData_df["Price"].sum()

# Average Purchase Price
AveragePurchasePrice = TotalRevenue / AmountOfPurchases

#create Financials Data Frame
Financials_df = pd.DataFrame({
    "Number of Unique Items": [UniqueItems],
    "Average Price": [AveragePurchasePrice],
    "Number of Purchases": [AmountOfPurchases],
    "Total Revenue": [TotalRevenue]
    })

#format Average Price and Total Revenue columns
Financials_df["Average Price"] = Financials_df["Average Price"].map("${:.2f}".format)
Financials_df["Total Revenue"] = Financials_df["Total Revenue"].map("${:,.2f}".format)

#display the Financials summary
Financials_df

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,$3.05,780,"$2,379.77"


In [30]:
#calculate amount of unique players by using x = len(DataFrame["column_name"])
UniquePlayersCount = len(PurchaseData_df["SN"].unique())

#group by Gender
GenderGroup = PurchaseData_df.groupby("Gender")

#get the number of unique Screen Names per Gender
GenderCounts = GenderGroup["SN"].nunique()

#get the percentages of Unique Players by Gender
GenderPercentages = (GenderCounts / UniquePlayersCount) * 100

#create table and populate with GenderCounts and GenderPercentages
GenderTable = pd.DataFrame({"Total Count": GenderCounts, 
                            "Percentage of Players": GenderPercentages
                            })

#format the percentages to read cleaner
GenderTable["Percentage of Players"] = GenderTable["Percentage of Players"].map("{:.2f}%".format)

#sort the table by highest count of players by gender
GenderTable = GenderTable.sort_values("Total Count", ascending=False)

#display the table
GenderTable

# #isolate lists of players by gender
# MalePlayers = PurchaseData_df.loc[(PurchaseData_df["Gender"] == "Male"), :]
# FemalePlayers = PurchaseData_df.loc[(PurchaseData_df["Gender"] == "Female"), :]
# OtherPlayers = PurchaseData_df.loc[(PurchaseData_df["Gender"] == "Other / Non-Disclosed"), :]

# #isolate lists of unique players by gender
# UniqueMalePlayers = MalePlayers["SN"].unique()
# UniqueFemalePlayers = FemalePlayers["SN"].unique()
# UniqueOtherPlayers = OtherPlayers["SN"].unique()

# #populate a dictionary of gender demographic data  
# gender_demographic_data = [[len(UniqueMalePlayers), ((int(len(UniqueMalePlayers)) / int(UniquePlayersCount)) * 100)],
#         [len(UniqueFemalePlayers), ((int(len(UniqueFemalePlayers)) / int(UniquePlayersCount)) * 100)], 
#         [len(UniqueOtherPlayers), ((int(len(UniqueOtherPlayers)) / int(UniquePlayersCount)) * 100)]]

# #create a data frame using the gender demographic data dictionary 
# GenderDemographics_df = pd.DataFrame(gender_demographic_data, columns = ['Total Count', 'Percentage of Players'], index = ['Male', 'Female', 'Other / Non-Disclosed'])

# #set percentage format
# GenderDemographics_df["Percentage of Players"] = GenderDemographics_df["Percentage of Players"].map("{:.2f}%".format)

# #display the Gender demographic summary
# GenderDemographics_df

Unnamed: 0_level_0,Total Count,Percentage of Players
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Male,484,84.03%
Female,81,14.06%
Other / Non-Disclosed,11,1.91%


In [None]:
#Find sum of all purchases made by each gender category.
Male_df = PurchaseData_df.loc[PurchaseData_df["Gender"] == "Male", :]
MaleSum = Male_df["Price"].sum()
Female_df = PurchaseData_df.loc[PurchaseData_df["Gender"] == "Female", :]
FemaleSum = Female_df["Price"].sum()
Other_df = PurchaseData_df.loc[PurchaseData_df["Gender"] == "Other / Non-Disclosed", :]
OtherSum = Other_df["Price"].sum()

#find the average purchase price of purchases made by each gender category.
MaleAverage = MaleSum / len(MalePlayers)
FemaleAverage = FemaleSum / len(FemalePlayers)
OtherAverage = OtherSum / len(OtherPlayers)

#find the average total purchase price per unique person of each gender category
UniqueMaleAverage = MaleSum / len(UniqueMalePlayers)
UniqueFemaleAverage = FemaleSum / len(UniqueFemalePlayers)
UniqueOtherAverage = OtherSum / len(UniqueOtherPlayers)

#create gender analysis data dictionary based on values above
gender_analysis_data = [['Female', len(FemalePlayers), FemaleAverage, FemaleSum, UniqueFemaleAverage],
                        ['Male', len(MalePlayers), MaleAverage, MaleSum, UniqueMaleAverage],
                        ['Other / Non-Disclosed', len(OtherPlayers), OtherAverage, OtherSum, UniqueOtherAverage]]

# create gender analysis data frame
GenderAnalysis_df = pd.DataFrame(gender_analysis_data, columns = ['Gender', 'Purchase Count', "Average Purchase Price", 'Total Purchase Value', 'Average Total Purchase Per Person'])

#format columns to correct number formats
GenderAnalysis_df["Total Purchase Value"] = GenderAnalysis_df["Total Purchase Value"].map("${:.2f}".format)
GenderAnalysis_df["Average Purchase Price"] = GenderAnalysis_df["Average Purchase Price"].map("${:.2f}".format)
GenderAnalysis_df["Average Total Purchase Per Person"] = GenderAnalysis_df["Average Total Purchase Per Person"].map("${:.2f}".format)

#group by Gender
grouped_GenderAnalysis_df = GenderAnalysis_df.set_index("Gender")

#display Purchasing Analysis (Gender)
grouped_GenderAnalysis_df

In [19]:
#create bins and names for the bins
bins = [-1, 9, 14, 19, 24, 29, 34, 39,  max(PurchaseData_df["Age"])]
groupnames = [">10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]

#add the age ranges column to the PurchaseData data frame. 
PurchaseData_df["Age Ranges"] = pd.cut(PurchaseData_df["Age"], bins, labels=groupnames)

groupedbyage = PurchaseData_df.groupby("Age Ranges")
AgeCounts = groupedbyage["SN"].nunique()

AgePercentage = (AgeCounts / UniquePlayersCount) * 100

agetable = pd.DataFrame({"Total Count": AgeCounts,
                         "Percentage of Players": AgePercentage})

agetable["Percentage of Players"] = agetable["Percentage of Players"].map("{:.2f}%".format)

agetable


Unnamed: 0_level_0,Total Count,Percentage of Players
Age Ranges,Unnamed: 1_level_1,Unnamed: 2_level_1
>10,17,2.95%
10-14,22,3.82%
15-19,107,18.58%
20-24,258,44.79%
25-29,77,13.37%
30-34,52,9.03%
35-39,31,5.38%
40+,12,2.08%


In [None]:
AgePurchases = groupedbyage["SN"]
TotalPurchases = groupedbyage["Price"].sum()
AveragePrice = (TotalPurchases / AgePurchases.count())
PerPerson = TotalPurchases / AgeCounts

agetable["PurchaseCount"] = AgePurchases.count()
agetable["Average Purchase Price"] = AveragePrice
agetable["Total Purchase Value"] = TotalPurchases
agetable["Average Total Purchase Per Person"] = PerPerson

#format columns to correct number formats
agetable["Total Purchase Value"] = agetable["Total Purchase Value"].map("${:,.2f}".format)
agetable["Average Purchase Price"] = agetable["Average Purchase Price"].map("${:,.2f}".format)
agetable["Average Total Purchase Per Person"] = agetable["Average Total Purchase Per Person"].map("${:,.2f}".format)

newtable = agetable.drop(columns=['Total Count', 'Percentage of Players'])

newtable

# #calculate and assign Under 10 data
# Under10Purchases_df = PurchaseData_df.loc[PurchaseData_df["Age Ranges"] == ">10", :]
# UniqueUnder10Players = len(Under10Purchases_df["SN"].unique())
# Under10PurchaseSum = Under10Purchases_df["Price"].sum()
# Under10PurchaseCount = Under10Purchases_df["Purchase ID"].count()
# Under10AvgPrice = Under10PurchaseSum / Under10PurchaseCount
# Under10AvgTotal = Under10PurchaseSum / UniqueUnder10Players

# #calculate and assign 10-14 data
# TenToFourteenPurchases_df = PurchaseData_df.loc[PurchaseData_df["Age Ranges"] == "10-14", :]
# UniqueTenToFourteenPlayers = len(TenToFourteenPurchases_df["SN"].unique())
# TenToFourteenPurchaseSum = TenToFourteenPurchases_df["Price"].sum()
# TenToFourteenPurchaseCount = TenToFourteenPurchases_df["Price"].count()
# TenToFourteenAvgPrice = TenToFourteenPurchaseSum / TenToFourteenPurchaseCount
# TenToFourteenAvgTotal = TenToFourteenPurchaseSum / UniqueTenToFourteenPlayers

# #calculate and assign 15-19 data
# FifteenToNineteenPurchases_df = PurchaseData_df.loc[PurchaseData_df["Age Ranges"] == "15-19", :]
# UniqueFifteenToNineteenPlayers = len(FifteenToNineteenPurchases_df["SN"].unique())
# FifteenToNineteenPurchaseSum = FifteenToNineteenPurchases_df["Price"].sum()
# FifteenToNineteenPurchaseCount = FifteenToNineteenPurchases_df["Price"].count()
# FifteenToNineteenAvgPrice = FifteenToNineteenPurchaseSum / FifteenToNineteenPurchaseCount
# FifteenToNineteenAvgTotal = FifteenToNineteenPurchaseSum / UniqueFifteenToNineteenPlayers

# #calculate and assign 20-24 data
# TwentyToTwentyFourPurchases_df = PurchaseData_df.loc[PurchaseData_df["Age Ranges"] == "20-24", :]
# UniqueTwentyToTwentyFourPlayers = len(TwentyToTwentyFourPurchases_df["SN"].unique())
# TwentyToTwentyFourPurchaseSum = TwentyToTwentyFourPurchases_df["Price"].sum()
# TwentyToTwentyFourPurchaseCount = TwentyToTwentyFourPurchases_df["Price"].count()
# TwentyToTwentyFourAvgPrice = TwentyToTwentyFourPurchaseSum / TwentyToTwentyFourPurchaseCount
# TwentyToTwentyFourAvgTotal = TwentyToTwentyFourPurchaseSum / UniqueTwentyToTwentyFourPlayers

# #calculate and assign 25-29 data
# TwentyFiveToTwentyNinePurchases_df = PurchaseData_df.loc[PurchaseData_df["Age Ranges"] == "25-29", :]
# UniqueTwentyFiveToTwentyNinePlayers = len(TwentyFiveToTwentyNinePurchases_df["SN"].unique())
# TwentyFiveToTwentyNinePurchaseSum = TwentyFiveToTwentyNinePurchases_df["Price"].sum()
# TwentyFiveToTwentyNinePurchaseCount = TwentyFiveToTwentyNinePurchases_df["Price"].count()
# TwentyFiveToTwentyNineAvgPrice = TwentyFiveToTwentyNinePurchaseSum / TwentyFiveToTwentyNinePurchaseCount
# TwentyFiveToTwentyNineAvgTotal = TwentyFiveToTwentyNinePurchaseSum / UniqueTwentyFiveToTwentyNinePlayers

# #calculate and assign 30-34 data
# ThirtyToThirtyFourPurchases_df = PurchaseData_df.loc[PurchaseData_df["Age Ranges"] == "30-34", :]
# UniqueThirtyToThirtyFourPlayers = len(ThirtyToThirtyFourPurchases_df["SN"].unique())
# ThirtyToThirtyFourPurchaseSum = ThirtyToThirtyFourPurchases_df["Price"].sum()
# ThirtyToThirtyFourPurchaseCount = ThirtyToThirtyFourPurchases_df["Price"].count()
# ThirtyToThirtyFourAvgPrice = ThirtyToThirtyFourPurchaseSum / ThirtyToThirtyFourPurchaseCount
# ThirtyToThirtyFourAvgTotal = ThirtyToThirtyFourPurchaseSum / UniqueThirtyToThirtyFourPlayers

# #calculate and assign 34-39 data
# ThirtyFiveToThirtyNinePurchases_df = PurchaseData_df.loc[PurchaseData_df["Age Ranges"] == "35-39", :]
# UniqueThirtyFiveToThirtyNinePlayers = len(ThirtyFiveToThirtyNinePurchases_df["SN"].unique())
# ThirtyFiveToThirtyNinePurchaseSum = ThirtyFiveToThirtyNinePurchases_df["Price"].sum()
# ThirtyFiveToThirtyNinePurchaseCount = ThirtyFiveToThirtyNinePurchases_df["Price"].count()
# ThirtyFiveToThirtyNineAvgPrice = ThirtyFiveToThirtyNinePurchaseSum / ThirtyFiveToThirtyNinePurchaseCount
# ThirtyFiveToThirtyNineAvgTotal = ThirtyFiveToThirtyNinePurchaseSum / UniqueThirtyFiveToThirtyNinePlayers

# #calculate and assign 40+ data
# FortyPlusPurchases_df = PurchaseData_df.loc[PurchaseData_df["Age Ranges"] == "40+", :]
# UniqueFortyPlusPlayers = len(FortyPlusPurchases_df["SN"].unique())
# FortyPlusPurchaseSum = FortyPlusPurchases_df["Price"].sum()
# FortyPlusPurchaseCount = FortyPlusPurchases_df["Price"].count()
# FortyPlusAvgPrice = FortyPlusPurchaseSum / FortyPlusPurchaseCount
# FortyPlusAvgTotal = FortyPlusPurchaseSum / UniqueFortyPlusPlayers

# Data = [[">10", Under10PurchaseCount, Under10AvgPrice, Under10PurchaseSum, Under10AvgTotal],
#         ["10-14", TenToFourteenPurchaseCount, TenToFourteenAvgPrice, TenToFourteenPurchaseSum, TenToFourteenAvgTotal],
#         ["15-19", FifteenToNineteenPurchaseCount, FifteenToNineteenAvgPrice, FifteenToNineteenPurchaseSum, FifteenToNineteenAvgTotal],
#         ["20-24", TwentyToTwentyFourPurchaseCount, TwentyToTwentyFourAvgPrice, TwentyToTwentyFourPurchaseSum, TwentyToTwentyFourAvgTotal],
#         ["25-29", TwentyFiveToTwentyNinePurchaseCount, TwentyFiveToTwentyNineAvgPrice, TwentyFiveToTwentyNinePurchaseSum, TwentyFiveToTwentyNineAvgTotal],
#         ["30-34", ThirtyToThirtyFourPurchaseCount, ThirtyToThirtyFourAvgPrice, ThirtyToThirtyFourPurchaseSum, ThirtyToThirtyFourAvgTotal],
#         ["35-39", ThirtyFiveToThirtyNinePurchaseCount, ThirtyFiveToThirtyNineAvgPrice, ThirtyFiveToThirtyNinePurchaseSum, ThirtyFiveToThirtyNineAvgTotal],
#         ["40+", FortyPlusPurchaseCount, FortyPlusAvgPrice, FortyPlusPurchaseSum, FortyPlusAvgTotal]
#        ]

# # create gender analysis data frame
# PurchasingAnalysis_df = pd.DataFrame(Data, columns = ["Age Ranges", 'Purchase Count', "Average Purchase Price", 'Total Purchase Value', 'Average Total Purchase Per Person'])
# PurchasingAnalysis_df

#format columns to correct number formats
# PurchaseData_df["Total Purchase Value"] = agetable["Total Purchase Value"].map("${:.2f}".format)
# PurchaseData_df["Average Purchase Price"] = agetable["Average Purchase Price"].map("${:.2f}".format)
# PurchaseData_df["Average Total Purchase Per Person"] = agetable["Average Total Purchase Per Person"].map("${:.2f}".format)

# grouped_PurchasingAnalysis_df = PurchasingAnalysis_df.set_index("Age Ranges")
# grouped_PurchasingAnalysis_df
