In [1]:
# main data pull
import pandas as pd
import numpy as np

maincsv = "Resources/purchase_data.csv"
alldata = pd.read_csv(maincsv)
alldata.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


# Player Count

In [9]:
players = alldata["SN"].unique()
playercount = len(players)
PlayerTable = pd.DataFrame([{"Player Count": playercount}])
PlayerTable

Unnamed: 0,Player Count
0,576


# Purchasing Analysis (Total)

In [25]:
# Unique Items
items = alldata["Item ID"].unique()
itemcount = len(items)

# Total Purchases
purchases = len(alldata["Purchase ID"])

# Total Revenue
totalrev = alldata["Price"].sum()
totalrev = float(totalrev)

# Average Purchase Price
avgprice = totalrev / purchases
avgprice = float(avgprice)

# Formatting/Data Framing
totalrev = '${:2f}'.format(totalrev)
avgprice ='${:2f}'.format(avgprice)

Summary = pd.DataFrame([{"Number of Unique Items": itemcount, 
                                             "Average Purchase Price": avgprice, 
                                            "Number of Purchases": purchases, 
                                             "Total Revenue": totalrev}])

Summary = Summary[["Number of Unique Items", 
                                   "Number of Purchases", 
                                   "Average Purchase Price", 
                                   "Total Revenue"]]

Summary

Unnamed: 0,Number of Unique Items,Number of Purchases,Average Purchase Price,Total Revenue
0,183,780,$3.050987,$2379.770000


# Gender Demographics

In [27]:
# Data Pulls
males = alldata.loc[alldata["Gender"] == 'Male']
malecount = len(males["SN"].unique())

females = alldata.loc[alldata["Gender"] == 'Female']
femalecount = len(females["SN"].unique())

others = alldata.loc[alldata["Gender"] == 'Other / Non-Disclosed']
othercount = len(others["SN"].unique())

malePCT = malecount / playercount * 100
femalePCT = femalecount / playercount * 100
othersPCT = othercount / playercount * 100

GenderDemo = pd.DataFrame([{"Gender": "Male", 
                                                 "Total Count": malecount, 
                                                 "Percentage of Players": malePCT}, 
                                                {"Gender": "Female", 
                                                 "Total Count": femalecount, 
                                                 "Percentage of Players": femalePCT}, 
                                                {"Gender": "Other / Non-Disclosed", 
                                                 "Total Count": othercount, 
                                                 "Percentage of Players": othersPCT}])

GenderDemo = GenderDemo [["Gender", "Total Count", "Percentage of Players"]]

GenderDemo["Percentage of Players"] = GenderDemo["Percentage of Players"].map("{:.2f}%".format)
GenderDemo

Unnamed: 0,Gender,Total Count,Percentage of Players
0,Male,484,84.03%
1,Female,81,14.06%
2,Other / Non-Disclosed,11,1.91%


# Puchase Analysis (Gender)

In [31]:
malepurchases = len(males["Purchase ID"])
malevalue = males["Price"].sum()
malevalue = float(malevalue)

femalepurchases = len(females["Purchase ID"])
femalevalue = females["Price"].sum()
femalevalue = float(femalevalue)

otherpurchases = len(others["Purchase ID"])
othervalue = others["Price"].sum()
othervalue = float(othervalue)

maleavgprice = malevalue / malepurchases
femaleavgprice = femalevalue / femalepurchases
otheravgprice = othervalue / otherpurchases

maleavgpurchase = malevalue / malecount
femaleavgpurchase = femalevalue / femalecount
otheravgpurchase = othervalue / othercount

GenderAnalysis = pd.DataFrame([{"Gender": "Male", 
                                        "Purchase Count": malepurchases, 
                                        "Avg Purchase Price": maleavgprice,
                                       "Total Purchase Value": malevalue, 
                                        "Avg Total Purchase per Person": maleavgpurchase}, 
                                       {"Gender": "Female", "Purchase Count": femalepurchases, 
                                        "Avg Purchase Price": femaleavgprice,
                                       "Total Purchase Value": femalevalue, 
                                        "Avg Total Purchase per Person": femaleavgpurchase}, 
                                       {"Gender": "Other / Non-Disclosed", 
                                        "Purchase Count": otherpurchases, 
                                        "Avg Purchase Price": otheravgprice,
                                       "Total Purchase Value": othervalue, 
                                        "Avg Total Purchase per Person": otheravgpurchase
                                       }])

GenderAnalysis = GenderAnalysis[["Gender", "Purchase Count", "Avg Purchase Price",  "Total Purchase Value", "Avg Total Purchase per Person"]]

GenderAnalysis["Avg Purchase Price"] = GenderAnalysis["Avg Purchase Price"].map("${:.2f}".format)
GenderAnalysis["Total Purchase Value"] = GenderAnalysis["Total Purchase Value"].map("${:.2f}".format)
GenderAnalysis["Avg Total Purchase per Person"] = GenderAnalysis["Avg Total Purchase per Person"].map("${:.2f}".format)

GenderAnalysis

Unnamed: 0,Gender,Purchase Count,Avg Purchase Price,Total Purchase Value,Avg Total Purchase per Person
0,Male,652,$3.02,$1967.64,$4.07
1,Female,113,$3.20,$361.94,$4.47
2,Other / Non-Disclosed,15,$3.35,$50.19,$4.56


# Age Demographics 

In [35]:
agebins = [0, 9, 14, 19, 24, 29, 34, 39, 200]
agenames = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]

ages = alldata.loc[:, ["Age", "SN", "Price"]]

ages["Age"] = pd.cut(ages["Age"], agebins, labels=agenames)

agepurchaseslist = []
agecountlist = []
agevalues = []

for i in range(len(agenames)):

    agepurchaseslist.append(ages.loc[ages["Age"] == agenames[i], :])
    agecountlist.append(len(agepurchaseslist[i]["SN"].unique()))
    agevalues.append(float(agepurchaseslist[i]["Price"].sum()))
    agepurchaseslist[i] = len(agepurchaseslist[i])

AgeDemos = pd.DataFrame({"Age": agenames, "Players": agecountlist})
AgeDemos

Unnamed: 0,Age,Players
0,<10,17
1,10-14,22
2,15-19,107
3,20-24,258
4,25-29,77
5,30-34,52
6,35-39,31
7,40+,12
