In [240]:
import pandas as pd
import numpy as np 
from collections import Counter

In [241]:
file = 'Resources/purchase_data.csv'
purchase_data = pd.read_csv(file)

In [242]:
purchase_data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


## Player Count

In [243]:
gender_count = list(purchase_data["Gender"].unique())
gender_count

['Male', 'Other / Non-Disclosed', 'Female']

In [244]:
purchase_df= pd.DataFrame(purchase_data)

In [245]:
#Total Purchases
total = purchase_data["SN"].count()


In [246]:
# Unique Player Count Purchases
player_count = len(purchase_df["SN"].unique())


In [247]:
players = pd.DataFrame({"Total Players": [player_count]
                              })
players

Unnamed: 0,Total Players
0,576


## Purchasing Analysis (Total)

In [248]:
item_count = len(purchase_df["Item ID"].unique())
avg_item_price = purchase_df["Price"].mean()
total_items_purchased = purchase_data["SN"].count()
sum_item_price = purchase_df["Price"].sum()

In [249]:
purchase_analysis = pd.DataFrame({"Number of Unique Items": [item_count],
                                  "Average Price": [avg_item_price],
                                  "Total Purchases": [total_items_purchased],
                                  "Total Revenue": [sum_item_price]
                              })
purchase_analysis

Unnamed: 0,Number of Unique Items,Average Price,Total Purchases,Total Revenue
0,183,3.050987,780,2379.77


In [250]:
# Setting Purchase Data Frame to Purchase ID 
purchase_id = purchase_df.set_index("Purchase ID") 

## Purchasing Analysis (Gender)

In [251]:
grouped_gender= purchase_id.groupby(["Gender"])

In [252]:
gender_counts = purchase_id["Gender"].value_counts()
gender_avg = grouped_gender["Price"].mean()
gender_sum = grouped_gender["Price"].sum()
norm_total = gender_sum / gender_counts

In [253]:
summary_table = pd.DataFrame({"Purchase Count": gender_counts,
                              "Average Purchase Price": gender_avg,
                                    "Purchase Total": gender_sum,
                               "Average Purchase Total Per Person": norm_total})
summary_table.head()

Unnamed: 0,Purchase Count,Average Purchase Price,Purchase Total,Average Purchase Total Per Person
Female,113,3.203009,361.94,3.203009
Male,652,3.017853,1967.64,3.017853
Other / Non-Disclosed,15,3.346,50.19,3.346


## Gender Demographics

In [254]:
# Get Total Males
males = purchase_id.loc[purchase_id["Gender"] == "Male", :]
total_males = males["SN"].count()

In [255]:
# Get Male Percentage
Percentage_Males = total_males/total *100

In [256]:
#Femal Total
females = purchase_id.loc[purchase_id["Gender"] == "Female", :]
total_females = females["SN"].count()

In [257]:
#Femal Percent
Percentage_females = total_females/total * 100

In [258]:
#Other Total
other = purchase_id.loc[purchase_id["Gender"] == "Other / Non-Disclosed", :]
total_other = other["SN"].count()

In [259]:
#Other Percent
Percentage_other = total_other/total * 100

In [260]:
gender_demo = pd.DataFrame({"Percentage of Players": [Percentage_Males, Percentage_females, Percentage_other],
                              "Total Count": [total_males, total_females, total_other]
                                    },index=['male','female','other'])
gender_demo.head()

Unnamed: 0,Percentage of Players,Total Count
male,83.589744,652
female,14.487179,113
other,1.923077,15


## Age Demographics

In [261]:
age_bins = [0, 9.90, 14.90, 19.90, 24.90, 29.90, 34.90, 39.90, 99999]
group_names = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]

In [262]:
pd.cut(purchase_id["Age"], age_bins, labels=group_names).head()

Purchase ID
0    20-24
1      40+
2    20-24
3    20-24
4    20-24
Name: Age, dtype: category
Categories (8, object): [<10 < 10-14 < 15-19 < 20-24 < 25-29 < 30-34 < 35-39 < 40+]

In [263]:
#Creating Age Groups
purchase_id["Age Group"] = pd.cut(purchase_id["Age"], age_bins, labels=group_names)
purchase_id.head()

Unnamed: 0_level_0,SN,Age,Gender,Item ID,Item Name,Price,Age Group
Purchase ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53,20-24
1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56,40+
2,Ithergue48,24,Male,92,Final Critic,4.88,20-24
3,Chamassasya86,24,Male,100,Blindscythe,3.27,20-24
4,Iskosia90,23,Male,131,Fury,1.44,20-24


In [264]:
age_grouped= purchase_id.groupby(["Age Group"])

In [265]:
age_group_count= purchase_id["Age Group"].value_counts()
age_group_percent = age_group_count/player_count *100

In [266]:
age_table = pd.DataFrame({"Percentage of Players": age_group_percent,
                            "Total Counts": age_group_count
                         }, index =['<10', '10-14','15-19','20-24','25-29','30-34','35-39','40+'])
age_table

Unnamed: 0,Percentage of Players,Total Counts
<10,3.993056,23
10-14,4.861111,28
15-19,23.611111,136
20-24,63.368056,365
25-29,17.534722,101
30-34,12.673611,73
35-39,7.118056,41
40+,2.256944,13


## Purchasing Analysis (Age)

In [280]:
age_avg = age_grouped["Price"].mean()
age_sum = age_grouped["Price"].sum()
age_total_avg = age_sum / age_group_count

In [281]:
age_analysis = pd.DataFrame({"Total Counts": age_group_count,
                             "Average Purchase Price":age_avg,
                             " Total Purchase Value": age_sum,
                             "Average Purchase Total per Person": age_total_avg
                                 })
age_analysis

Unnamed: 0,Total Counts,Average Purchase Price,Total Purchase Value,Average Purchase Total per Person
10-14,28,2.956429,82.78,2.956429
15-19,136,3.035956,412.89,3.035956
20-24,365,3.052219,1114.06,3.052219
25-29,101,2.90099,293.0,2.90099
30-34,73,2.931507,214.0,2.931507
35-39,41,3.601707,147.67,3.601707
40+,13,2.941538,38.24,2.941538
<10,23,3.353478,77.13,3.353478


## Top Spenders

In [282]:
purchase_count = purchase_data["SN"].value_counts()
purchase_count.head()

Lisosia93      5
Idastidru52    4
Iral74         4
Hada39         3
Raesty92       3
Name: SN, dtype: int64

In [283]:
SN = purchase_df.set_index("SN")
SN.head()

Unnamed: 0_level_0,Purchase ID,Age,Gender,Item ID,Item Name,Price
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Lisim78,0,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
Lisovynya38,1,40,Male,143,Frenzied Scimitar,1.56
Ithergue48,2,24,Male,92,Final Critic,4.88
Chamassasya86,3,24,Male,100,Blindscythe,3.27
Iskosia90,4,23,Male,131,Fury,1.44


In [284]:
top_spenders = purchase_df[["SN", "Price"]].groupby(by= "SN").sum().sort_values(by="Price", ascending= False).head()

In [285]:
top_spenders = {"Price" : ['count','sum','mean']}
spending_SN = purchase_df.groupby('SN').agg(top_spenders)["Price"].sort_values(by="count", ascending= False).head()
spending_SN = spending_SN.rename(columns={'count': 'Purchase Count', 'sum': 'Total Purchase Value', 'mean': 'Average Purchase Price'})                             

In [286]:
spending_SN.columns

Index(['Purchase Count', 'Total Purchase Value', 'Average Purchase Price'], dtype='object')

In [287]:
spending_SN

Unnamed: 0_level_0,Purchase Count,Total Purchase Value,Average Purchase Price
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,18.96,3.792
Iral74,4,13.62,3.405
Idastidru52,4,15.45,3.8625
Asur53,3,7.44,2.48
Inguron55,3,11.11,3.703333


## Most Popular Item

In [288]:
item_id= purchase_df.set_index("Item ID")
item_id.head()

Unnamed: 0_level_0,Purchase ID,SN,Age,Gender,Item Name,Price
Item ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
108,0,Lisim78,20,Male,"Extraction, Quickblade Of Trembling Hands",3.53
143,1,Lisovynya38,40,Male,Frenzied Scimitar,1.56
92,2,Ithergue48,24,Male,Final Critic,4.88
100,3,Chamassasya86,24,Male,Blindscythe,3.27
131,4,Iskosia90,23,Male,Fury,1.44


In [289]:
item_grouped= item_id.groupby(["Item ID"])

In [290]:
item_count = purchase_data["Item ID"].value_counts()
item_name = purchase_data["Item Name"].unique()
item_price = item_avg = item_grouped["Price"].mean()
item_total = item_price * item_count

In [291]:
item_analysis = pd.DataFrame({ "Purchase Count":item_count,
                                "Item Price": item_price,
                              "Total Purchase Value": item_total
                                  })
item_analysis_sorted = item_analysis.sort_values(["Purchase Count"], ascending=False)
item_analysis_sorted.head()

Unnamed: 0,Purchase Count,Item Price,Total Purchase Value
178,12,4.23,50.76
145,9,4.58,41.22
108,9,3.53,31.77
82,9,4.9,44.1
19,8,1.02,8.16


## Most Profitable Item

In [292]:
most_profitable_item = item_analysis.sort_values(["Total Purchase Value"], ascending=False)
most_profitable_item.head()

Unnamed: 0,Purchase Count,Item Price,Total Purchase Value
178,12,4.23,50.76
82,9,4.9,44.1
145,9,4.58,41.22
92,8,4.88,39.04
103,8,4.35,34.8
