In [97]:
import pandas as pd
import numpy as np

In [98]:
#Read csv
csv_path = 'purchase_data.csv'
purchase_df = pd.read_csv(csv_path, encoding="utf-8")
#print(purchase_df.columns.values)
# purchase_df1 = pd.DataFrame(purchase_df)
purchase_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [99]:
purchase_df[['Price', 'Gender']]

Unnamed: 0,Price,Gender
0,3.53,Male
1,1.56,Male
2,4.88,Male
3,3.27,Male
4,1.44,Male
...,...,...
775,3.54,Female
776,1.63,Male
777,3.46,Male
778,4.19,Male


In [100]:
#Player Count
pd.Index(list('Total'))
total_players = purchase_df['Purchase ID'].count()

summary_total = {
    "Total Players": [total_players]
}
total = pd.DataFrame(summary_total)
total


Unnamed: 0,Total Players
0,780


In [101]:
#Purchasing Analysis

uniq_items = (purchase_df["Item Name"])
uniq_total = len(set(uniq_items))
total_rev = (purchase_df["Price"].sum())
average = (total_rev / total_players)
num_purch = (total_players)
purch_summary = {
    "Number of Purchases": [num_purch],
    "Total Unique Items": [uniq_total],
    "Total Revenue": [total_rev],
    "Average Price": [average]
}
summary_df = pd.DataFrame(purch_summary)
summary_df.style.format({"Average Price": "${:.2f}", "Total Revenue": "${:.2f}"})

Unnamed: 0,Number of Purchases,Total Unique Items,Total Revenue,Average Price
0,780,179,$2379.77,$3.05


In [102]:
#Gender Demographics

male = purchase_df['Gender'].value_counts()['Male']
perc_male = (male / total_players) * 100
female = purchase_df['Gender'].value_counts()['Female']
perc_female = (female / total_players) * 100
other = total_players - (male + female)
perc_other = (other / total_players) * 100

gender_summary = ({"Gender": ["Male", "Female", "Other"],
    "Count": [male, female, other],
    "Percentage": [perc_male, perc_female, perc_other]
})
summary_gender = pd.DataFrame(gender_summary)
summary_gender.style.format({"Percentage": "{:.2f}%"})

Unnamed: 0,Gender,Count,Percentage
0,Male,652,83.59%
1,Female,113,14.49%
2,Other,15,1.92%


In [103]:
# Purchasing Analysis (Gender)


male_purchases = purchase_df.loc[purchase_df['Gender'] == 'Male', :]
avg_purch_male = male_purchases['Price'].mean()
total_male = male_purchases['Price'].sum()
avg_per_male = total_male / male

female_purchases = purchase_df.loc[purchase_df['Gender'] == 'Female', :]
avg_purch_female = female_purchases['Price'].mean()
total_female = female_purchases['Price'].sum()
avg_per_female = total_female / female

other_purchases = purchase_df.loc[purchase_df['Gender'] == 'Other / Non-Disclosed', :]
avg_purch_other = other_purchases['Price'].mean()
total_other = other_purchases['Price'].sum()
avg_per_other = total_other / other

gender_summary = ({"Gender": ["Male", "Female", "Other"],
    "Purchase Count": [male,female,other],
    "Avg Purchase Price": [avg_purch_male,avg_purch_female,avg_purch_other],
    "Total Purchase Value": [total_male,total_female,total_other],
    "Avg Purchase per Person": [avg_per_male,avg_per_female,avg_per_other]
})
summary_gender = pd.DataFrame(gender_summary)

summary_gender.style.format({"Avg Purchase Price": "${:2f}", "Avg Purchase per Person": "${:2f}", "Total Purchase Value": "${:2f}"})

Unnamed: 0,Gender,Purchase Count,Avg Purchase Price,Total Purchase Value,Avg Purchase per Person
0,Male,652,$3.017853,$1967.640000,$3.017853
1,Female,113,$3.203009,$361.940000,$3.203009
2,Other,15,$3.346000,$50.190000,$3.346000


In [104]:
#Age Demographics

ages = [0, 10, 15, 20, 25, 30, 35, 40, 50]
group_names = ["< 10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"] 
purchase_df["Ages"] = pd.cut(purchase_df["Age"], ages, labels=group_names, right=False)
age_group = purchase_df.groupby("Ages")
num_people = age_group['SN'].count()
perc_per_age = age_group['Price'].mean()
avg = (perc_per_age / total_players) *100
summary_Demo = pd.DataFrame({"Percentage of People": perc_per_age, "Number of People": num_people})
summary_Demo.style.format({"Percentage of People": "{:2f}%"})

Unnamed: 0_level_0,Percentage of People,Number of People
Ages,Unnamed: 1_level_1,Unnamed: 2_level_1
< 10,3.353478%,23
10-14,2.956429%,28
15-19,3.035956%,136
20-24,3.052219%,365
25-29,2.900990%,101
30-34,2.931507%,73
35-39,3.601707%,41
40+,2.941538%,13


In [167]:
#Purchase Analysis Age

total_purchase = purchase_df.groupby('Ages')['Price'].sum()
total_count = purchase_df.groupby('Ages')['Item Name'].count()
purchase_avg = purchase_df.groupby('Ages')['Price'].mean()

data = pd.DataFrame({
    "Purchase Count": total_count,
    "Average Purchase Price": purchase_avg,
    "Total Purchase Value": total_purchase, 
    "Average Total Per Person": average
})
data.style.format({"Average Purchase Price": "${:2f}", "Total Purchase Value": "${:2f}", "Average Total Per Person": "${:2f}"})



Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Average Total Per Person
Ages,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
< 10,23,$3.353478,$77.130000,$3.050987
10-14,28,$2.956429,$82.780000,$3.050987
15-19,136,$3.035956,$412.890000,$3.050987
20-24,365,$3.052219,$1114.060000,$3.050987
25-29,101,$2.900990,$293.000000,$3.050987
30-34,73,$2.931507,$214.000000,$3.050987
35-39,41,$3.601707,$147.670000,$3.050987
40+,13,$2.941538,$38.240000,$3.050987


In [174]:

total_purchase_one = purchase_df.groupby('SN')['Price'].max()
total_count_one = purchase_df.groupby('SN')['Price'].count()
purchase_avg_one = purchase_df.groupby('SN')['Price'].mean()

sn = pd.DataFrame({
      "SN": ["SN"],
      "Purchase Count": total_count_one,
      "Average Purchase Price": purchase_avg_one,
      "Total Purchase Value": total_purchase_one
})
sn

ValueError: array length 1 does not match index length 576