In [171]:
# Dependencies and setup
import pandas as pd
import numpy as np 

# File to load
file_to_load = "Resources/purchase_data.csv"

# Read purchasing file and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)

In [172]:
purchase_data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [173]:
# Looking for missing values
purchase_data.count()

Purchase ID    780
SN             780
Age            780
Gender         780
Item ID        780
Item Name      780
Price          780
dtype: int64

In [174]:
# Describing data
purchase_data.describe()

Unnamed: 0,Purchase ID,Age,Item ID,Price
count,780.0,780.0,780.0,780.0
mean,389.5,22.714103,92.114103,3.050987
std,225.310896,6.659444,52.775943,1.169549
min,0.0,7.0,0.0,1.0
25%,194.75,20.0,48.0,1.98
50%,389.5,22.0,93.0,3.15
75%,584.25,25.0,139.0,4.08
max,779.0,45.0,183.0,4.99


In [175]:
# Finding unique values
unique_SN = purchase_data.groupby('SN')['SN'].nunique()
unique_SN.head()

SN
Adairialis76    1
Adastirin33     1
Aeda94          1
Aela59          1
Aelaria33       1
Name: SN, dtype: int64

In [176]:
# Total players
total_players = unique_SN.count()
total_players

576

In [177]:
# Number of unique items
unique_items = purchase_data.groupby('Item Name')['Item Name'].nunique()
total_items = unique_items.count()
total_items

179

In [178]:
# Average price
average_price = purchase_data['Price'].mean()
average_price

3.050987179487176

In [179]:
# Number of purchases
total_purchase = purchase_data['Purchase ID'].count()
total_purchase

780

In [180]:
# Total revenue
total_revenue = purchase_data['Price'].sum()
total_revenue

2379.77

In [181]:
total_purchasing_analysis = pd.DataFrame({'Number of Unique Items': [total_items],
                                          'Average Price': [average_price],
                                          'Number of Purchases': [total_purchase],
                                          'Total Revenue': [total_revenue]})
total_purchasing_analysis['Average Price'] = total_purchasing_analysis['Average Price'].map('${:,.2f}'.format)
total_purchasing_analysis['Total Revenue'] = total_purchasing_analysis['Total Revenue'].map('${:,.2f}'.format)
total_purchasing_analysis

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,$3.05,780,"$2,379.77"


In [182]:
# Gender count
gender_count = purchase_data.groupby('Gender')['SN'].nunique()
gender_count

Gender
Female                    81
Male                     484
Other / Non-Disclosed     11
Name: SN, dtype: int64

In [183]:
# Gender percentage
gender_percentage = gender_count/total_players * 100
gender_percentage

Gender
Female                   14.062500
Male                     84.027778
Other / Non-Disclosed     1.909722
Name: SN, dtype: float64

In [184]:
gender_demographics = pd.DataFrame({'Gender Count': gender_count,
                                    'Gender Percentage': gender_percentage})
gender_demographics = gender_demographics.round(2)
gender_demographics

Unnamed: 0_level_0,Gender Count,Gender Percentage
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,81,14.06
Male,484,84.03
Other / Non-Disclosed,11,1.91


In [185]:
# Purchase count
purchase_count = purchase_data.groupby('Gender')['Purchase ID'].nunique()
purchase_count

Gender
Female                   113
Male                     652
Other / Non-Disclosed     15
Name: Purchase ID, dtype: int64

In [186]:
# Average purchase price
average_purchase = purchase_data.groupby('Gender')['Price'].mean()
average_purchase

Gender
Female                   3.203009
Male                     3.017853
Other / Non-Disclosed    3.346000
Name: Price, dtype: float64

In [187]:
# Total purchase value
total_gender_purchase = purchase_data.groupby('Gender')['Price'].sum()
total_gender_purchase

Gender
Female                    361.94
Male                     1967.64
Other / Non-Disclosed      50.19
Name: Price, dtype: float64

In [188]:
# Average total purchase per person
average_total_purchase = total_gender_purchase/gender_count
average_total_purchase

Gender
Female                   4.468395
Male                     4.065372
Other / Non-Disclosed    4.562727
dtype: float64

In [189]:
gender_purchasing_analysis = pd.DataFrame({'Purchase Count': purchase_count,
                                           'Average Purchase Price': average_purchase,
                                           'Total Purchase Value': total_gender_purchase,
                                           'Average Total Purchase per Person': average_total_purchase})
gender_purchasing_analysis['Average Purchase Price'] = gender_purchasing_analysis['Average Purchase Price'].map('${:,.2f}'.format)
gender_purchasing_analysis['Average Total Purchase per Person'] = gender_purchasing_analysis['Average Total Purchase per Person'].map('${:,.2f}'.format)
gender_purchasing_analysis

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Average Total Purchase per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,$3.20,361.94,$4.47
Male,652,$3.02,1967.64,$4.07
Other / Non-Disclosed,15,$3.35,50.19,$4.56


In [190]:
# Creating bins
bins = [0, 10, 15, 20, 25, 30, 35, 40, 45]
age_ranges = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]

In [191]:
# Cutting purchase data
purchase_data['Age Ranges'] = pd.cut(purchase_data["Age"], bins, labels=age_ranges)
purchase_data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price,Age Ranges
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53,15-19
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56,35-39
2,2,Ithergue48,24,Male,92,Final Critic,4.88,20-24
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27,20-24
4,4,Iskosia90,23,Male,131,Fury,1.44,20-24


In [192]:
# Total count
age_demographics_count = purchase_data['Age Ranges'].value_counts()
age_demographics_count

20-24    325
15-19    200
25-29     77
10-14     54
30-34     52
35-39     33
<10       32
40+        7
Name: Age Ranges, dtype: int64

In [193]:
# Percentage of players
age_demographics_percentage = age_demographics_count/total_purchase * 100
age_demographics_percentage

20-24    41.666667
15-19    25.641026
25-29     9.871795
10-14     6.923077
30-34     6.666667
35-39     4.230769
<10       4.102564
40+       0.897436
Name: Age Ranges, dtype: float64

In [194]:
age_demographics_analysis = pd.DataFrame({'Total Count': age_demographics_count,
                                          'Percentage of Players': age_demographics_percentage})
age_demographics_analysis = age_demographics_analysis.round(2)
age_demographics_analysis

Unnamed: 0,Total Count,Percentage of Players
20-24,325,41.67
15-19,200,25.64
25-29,77,9.87
10-14,54,6.92
30-34,52,6.67
35-39,33,4.23
<10,32,4.1
40+,7,0.9


In [195]:
# Purchase count
age_purchase_count = purchase_data.groupby('Age Ranges')['Item Name'].count()
age_purchase_count

Age Ranges
<10       32
10-14     54
15-19    200
20-24    325
25-29     77
30-34     52
35-39     33
40+        7
Name: Item Name, dtype: int64

In [196]:
# Average purchase price
average_purchase_price = purchase_data.groupby('Age Ranges')['Price'].mean()
average_purchase_price.round(2)

Age Ranges
<10      3.40
10-14    2.90
15-19    3.11
20-24    3.02
25-29    2.88
30-34    2.99
35-39    3.40
40+      3.08
Name: Price, dtype: float64

In [197]:
# Total purchase value
total_purchase_value = purchase_data.groupby('Age Ranges')['Price'].sum()
total_purchase_value

Age Ranges
<10      108.96
10-14    156.60
15-19    621.56
20-24    981.64
25-29    221.42
30-34    155.71
35-39    112.35
40+       21.53
Name: Price, dtype: float64

In [198]:
# Average total purchase per person
average_total_purchase_per_person = total_purchase_value/total_players
average_total_purchase_per_person.round(2)

Age Ranges
<10      0.19
10-14    0.27
15-19    1.08
20-24    1.70
25-29    0.38
30-34    0.27
35-39    0.20
40+      0.04
Name: Price, dtype: float64

In [199]:
age_purchasing_analysis = pd.DataFrame({'Purchase Count': age_purchase_count,
                                        'Average Purchase Price': average_purchase_price,
                                        'Total Purchase Value': total_purchase_value,
                                        'Average Total Purchase per Person': average_total_purchase_per_person})
age_purchasing_analysis['Average Purchase Price'] = age_purchasing_analysis['Average Purchase Price'].map('${:,.2f}'.format)
age_purchasing_analysis['Total Purchase Value'] = age_purchasing_analysis['Total Purchase Value'].map('${:,.2f}'.format)
age_purchasing_analysis['Average Total Purchase per Person'] = age_purchasing_analysis['Average Total Purchase per Person'].map('${:,.2f}'.format)
age_purchasing_analysis

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Average Total Purchase per Person
Age Ranges,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
<10,32,$3.40,$108.96,$0.19
10-14,54,$2.90,$156.60,$0.27
15-19,200,$3.11,$621.56,$1.08
20-24,325,$3.02,$981.64,$1.70
25-29,77,$2.88,$221.42,$0.38
30-34,52,$2.99,$155.71,$0.27
35-39,33,$3.40,$112.35,$0.20
40+,7,$3.08,$21.53,$0.04


In [200]:
# Purchase count
spenders_purchase_count = purchase_data.groupby('SN').count()['Price']
spenders_purchase_count.head()

SN
Adairialis76    1
Adastirin33     1
Aeda94          1
Aela59          1
Aelaria33       1
Name: Price, dtype: int64

In [201]:
# Average purchase price
spenders_average_purchase_price = purchase_data.groupby('SN').mean()['Price']
spenders_average_purchase_price.round(2).head()

SN
Adairialis76    2.28
Adastirin33     4.48
Aeda94          4.91
Aela59          4.32
Aelaria33       1.79
Name: Price, dtype: float64

In [202]:
# Total purchase value
spenders_total_purchase_value = purchase_data.groupby('SN').sum()['Price']
spenders_total_purchase_value.head()

SN
Adairialis76    2.28
Adastirin33     4.48
Aeda94          4.91
Aela59          4.32
Aelaria33       1.79
Name: Price, dtype: float64

In [203]:
top_spenders_analysis = pd.DataFrame({"Purchase Count": spenders_purchase_count,
                                      "Average Purchase Price": spenders_average_purchase_price,
                                      "Total Purchase Value": spenders_total_purchase_value})
top_spenders_analysis.round(2).head()

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Adairialis76,1,2.28,2.28
Adastirin33,1,4.48,4.48
Aeda94,1,4.91,4.91
Aela59,1,4.32,4.32
Aelaria33,1,1.79,1.79


In [204]:
top_five_spenders = top_spenders_analysis.sort_values('Total Purchase Value', ascending=False)
top_five_spenders.round(2).head()

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,3.79,18.96
Idastidru52,4,3.86,15.45
Chamjask73,3,4.61,13.83
Iral74,4,3.4,13.62
Iskadarya95,3,4.37,13.1


In [205]:
# Most popular items purchase count
items_purchase_count = purchase_data.groupby(['Item ID', 'Item Name']).count()['Price']
items_purchase_count.head()

Item ID  Item Name         
0        Splinter              4
1        Crucifer              3
2        Verdict               6
3        Phantomlight          6
4        Bloodlord's Fetish    5
Name: Price, dtype: int64

In [206]:
# Most popular items price
items_price = purchase_data.groupby(['Item ID', 'Item Name']).mean()['Price']
items_price.head()

Item ID  Item Name         
0        Splinter              1.28
1        Crucifer              3.26
2        Verdict               2.48
3        Phantomlight          2.49
4        Bloodlord's Fetish    1.70
Name: Price, dtype: float64

In [207]:
# Most popular items purchase value
items_purchase_value = purchase_data.groupby(['Item ID', 'Item Name']).sum()['Price']
items_purchase_value.head()

Item ID  Item Name         
0        Splinter               5.12
1        Crucifer               9.78
2        Verdict               14.88
3        Phantomlight          14.94
4        Bloodlord's Fetish     8.50
Name: Price, dtype: float64

In [208]:
most_popular_items_analysis = pd.DataFrame({"Purchase Count": items_purchase_count,
                                            "Item Price": items_price,
                                            "Total Purchase Value": items_purchase_value})
most_popular_items_analysis['Item Price'] = most_popular_items_analysis['Item Price'].map('${:,.2f}'.format)
most_popular_items_analysis['Total Purchase Value'] = most_popular_items_analysis['Total Purchase Value'].map('${:,.2f}'.format)
most_popular_items_analysis.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,Splinter,4,$1.28,$5.12
1,Crucifer,3,$3.26,$9.78
2,Verdict,6,$2.48,$14.88
3,Phantomlight,6,$2.49,$14.94
4,Bloodlord's Fetish,5,$1.70,$8.50


In [209]:
# Most popular items
most_popular_items = most_popular_items_analysis.sort_values('Purchase Count', ascending=False)
most_popular_items.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
145,Fiery Glass Crusader,9,$4.58,$41.22
108,"Extraction, Quickblade Of Trembling Hands",9,$3.53,$31.77
82,Nirvana,9,$4.90,$44.10
19,"Pursuit, Cudgel of Necromancy",8,$1.02,$8.16


In [210]:
# Most profitable items
most_profitable_items = most_popular_items_analysis.sort_values('Total Purchase Value', ascending=False)
most_profitable_items.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
63,Stormfury Mace,2,$4.99,$9.98
29,"Chaos, Ender of the End",5,$1.98,$9.90
173,Stormfury Longsword,2,$4.93,$9.86
1,Crucifer,3,$3.26,$9.78
38,"The Void, Vengeance of Dark Magic",4,$2.37,$9.48
