In [1]:
# Dependencies and Setup
import pandas as pd
import numpy as np

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)
purchase_data.head()


Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [2]:
# Player Count

print(f"Total Number of Players:{purchase_data['SN'].nunique()}")

Total Number of Players:576


In [3]:
#Purchasing Analysis (Total)

pur = {"Number of Unique Items":[purchase_data["Item ID"].nunique()], 
          "Average Price":[purchase_data["Price"].mean()],
          "Number of Purchases":[purchase_data["Purchase ID"].count()],
          "Total Revenue":[purchase_data["Price"].sum()]}
pur_df = pd.DataFrame(data=pur)

# Formatting of Numbers
pur_df["Average Price"] = pur_df["Average Price"].map("${0:,.2f}".format)
pur_df["Total Revenue"] = pur_df["Total Revenue"].map("${0:,.2f}".format)


pur_df.apply
print(pur_df)

   Number of Unique Items Average Price  Number of Purchases Total Revenue
0                     179         $3.05                  780     $2,379.77


In [4]:
gender_data = purchase_data[["Gender","SN"]].drop_duplicates()
gender_data 
#np.unique(gender_data)

Unnamed: 0,Gender,SN
0,Male,Lisim78
1,Male,Lisovynya38
2,Male,Ithergue48
3,Male,Chamassasya86
4,Male,Iskosia90
...,...,...
773,Male,Hala31
774,Male,Jiskjask80
775,Female,Aethedru70
777,Male,Yathecal72


In [5]:
purchase_data["Gender %"] = purchase_data["Gender"].sum()

In [6]:
#purchase_data.value_counts("Gender")

In [7]:
#Gender Demographics
gender_count = gender_data.groupby("Gender").count()
gender_percent = (gender_data.groupby("Gender").count()/gender_data.count())*100
del gender_percent["Gender"]
disp_gender = pd.merge(gender_count,gender_percent,on="Gender")
disp_gender.columns = ["Total Count", "Percentage of Players"]
disp_gender["Percentage of Players"] = disp_gender ["Percentage of Players"].map("{0:,.2f}%".format)
#Display
#Percentage and Count of Male Players
#Percentage and Count of Female Players
#Percentage and Count of Other / Non-Disclosed
disp_gender

Unnamed: 0_level_0,Total Count,Percentage of Players
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,81,14.06%
Male,484,84.03%
Other / Non-Disclosed,11,1.91%


In [8]:
#Purchasing Analysis (Gender)
#Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender
pur = {
    "Number of Unique Items":[purchase_data["Item ID"].nunique()], 
          "Average Price":[purchase_data["Price"].mean()],
          "Number of Purchases":[purchase_data["Purchase ID"].count()],
          "Total Revenue":[purchase_data["Price"].sum()]}
#Create a summary data frame to hold the results
pur_df = pd.DataFrame(data=pur)

# Formatting of Numbers
pur_df["Average Price"] = pur_df["Average Price"].map("${0:,.2f}".format)
pur_df["Total Revenue"] = pur_df["Total Revenue"].map("${0:,.2f}".format)


pur_df.apply
#Display the summary data frame
print(pur_df)

   Number of Unique Items Average Price  Number of Purchases Total Revenue
0                     179         $3.05                  780     $2,379.77


In [10]:
#Age Demographics
bins = [0,9.99,14.99,19.99,24.99,29.99,34.99,39.99,44.99,49.99]
group_names = ["<10","10-14", "15-19", "20-24", "25-29","30-34","35-39","40-44","45-49"]
purchase_data["Age Group"] = pd.cut(purchase_data["Age"],bins,labels=group_names,include_lowest=True)

#Purchase Count
re_data = purchase_data.groupby("Age Group")["Purchase ID"].count()

#Average Purchase Price
re_data_avgPrice = purchase_data.groupby("Age Group")["Price"].mean()

#Total Purchase Value
re_data_total = purchase_data.groupby("Age Group")["Price"].sum()
mer = pd.merge(re_data,re_data_avgPrice,on=["Age Group"])
mer2 = pd.merge(mer,re_data_total,on=["Age Group"])
mer2



#Average Purchase Total per Person by Age Group
re_data_total_sn = purchase_data.groupby(["SN","Age Group"])["Price"].mean() 

re_data_total_sn.head()

SN            Age Group
Adairialis76  <10           NaN
              10-14         NaN
              15-19        2.28
              20-24         NaN
              25-29         NaN
Name: Price, dtype: float64