In [1]:
import pandas as pd
import numpy as np

In [2]:
csv_path = 'Resources/purchase_data.csv'
data_df = pd.read_csv(csv_path)

In [21]:
# Number of Players

num_players = data_df["SN"].nunique()

players_df = pd.DataFrame({"Number of Players": [num_players]})
players_df

Unnamed: 0,Number of Players
0,576


In [45]:
# Purchasing Analysis (Total)

purchasing_data = {
    "Number of Unique Items": [data_df["Item Name"].nunique()],
    "Average Price": [round(data_df["Price"].mean(), 2)],
    "Number of Purchases": [data_df["Purchase ID"].count()],
    "Total Revenue": [data_df["Price"].sum()]
}

purchasing_df = pd.DataFrame(purchasing_data)
purchasing_df["Average Price"] = purchasing_df["Average Price"].map("${:.2f}".format)
purchasing_df["Total Revenue"] = purchasing_df["Total Revenue"].map("${:.2f}".format)
purchasing_df

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,$3.05,780,$2379.77


In [46]:
# Gender Demographics

genders = (
    "Male",
    "Female",
    "Other / Non-Disclosed"
)

gender_data = {
    "Gender": [],
    "Total Count": [],
    "Percentage of Players": []
}

for gender in genders:
    gender_df = data_df[(data_df['Gender'] == gender)]
    count = gender_df['SN'].nunique()
    pct = 100 * count / num_players
    gender_data["Gender"].append(gender)
    gender_data["Total Count"].append(count)
    gender_data["Percentage of Players"].append(pct)
    
gender_df = pd.DataFrame(gender_data)
gender_df = gender_df.set_index("Gender")
gender_df["Percentage of Players"] = gender_df["Percentage of Players"].map("{:.2f}%".format)
gender_df

Unnamed: 0_level_0,Total Count,Percentage of Players
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Male,484,84.03%
Female,81,14.06%
Other / Non-Disclosed,11,1.91%


In [None]:
# Purchasing Analysis (Gender)

def gender_analysis(gender):
    df = data_df[(data_df['Gender'] == gender)]
    purchase_count = df['Purchase ID'].count()
    avg_price = round(df['Price'].mean(), 2)
    total_value = round(df['Price'].sum(), 2)
    avg_per_player = round(total_value / df['SN'].nunique(), 2)

In [None]:
# Age Demographics

age_bins = (
    (0, 10, "< 10"),
    (10, 15, "10 - 15"),
    (15, 20, "15 - 20"),
    (20, 30, "20 - 50"),
    (30, 40, "30 - 40"),
    (40, 1000, "> 40")
)

def age_analysis(bin):
    df = data_df[(data_df['Age'] >= bin[0]) & (data_df['Age'] < bin[1])]
    purchase_count = df['Purchase ID'].count()
    avg_price = round(df['Price'].mean(), 2)
    total_value = round(df['Price'].sum(), 2)
    avg_per_player = round(total_value / df['SN'].nunique(), 2)

In [None]:
data_df