In [2]:
import pandas as pd
import numpy as np
import json

In [3]:
purchase_file = "purchase_data.json"

In [4]:
purchase_file_df = pd.read_json(purchase_file)

In [5]:
purchase_file_df.head()

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46
2,34,Male,174,Primitive Blade,2.46,Assastnya25
3,21,Male,92,Final Critic,1.36,Pheusrical25
4,23,Male,63,Stormfury Mace,1.27,Aela59


In [6]:
purchase_file_df.shape

(780, 6)

In [7]:
# Check to see if there are any rows missing data 
purchase_file_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 780 entries, 0 to 779
Data columns (total 6 columns):
Age          780 non-null int64
Gender       780 non-null object
Item ID      780 non-null int64
Item Name    780 non-null object
Price        780 non-null float64
SN           780 non-null object
dtypes: float64(1), int64(2), object(3)
memory usage: 36.6+ KB


In [8]:
# Remove the rows with missing data
purchase_file_df = purchase_file_df.dropna(how='any')

In [9]:
# Count the total number of unique players
player_count=purchase_file_df["SN"].nunique()
player_count

573

In [10]:
# Calculate the total number of unique items in "Item ID" or "Item Name"
item_id=purchase_file_df["Item ID"].nunique()
item_id

183

In [11]:
# Calculate the average purchase price
purchase_file_df["Price"].mean()

2.931192307692303

In [12]:
# Calculate the total number of purchases greater than 0... 
purchase_file_df["Price"].count()

780

In [13]:
# Calculate the total number of revenue
purchase_file_df["Price"].sum()

2286.3299999999963

In [14]:
# Count of group gender category by number of unique players
gender_data=purchase_file_df.groupby('Gender')['SN'].nunique()
print(gender_data)

Gender
Female                   100
Male                     465
Other / Non-Disclosed      8
Name: SN, dtype: int64


In [15]:
# Percentage of group gender category
percent_gender=purchase_file_df.groupby('Gender')['SN'].nunique()/player_count*100
print(percent_gender)

Gender
Female                   17.452007
Male                     81.151832
Other / Non-Disclosed     1.396161
Name: SN, dtype: float64


In [16]:
# Count of gender purchases
gender_purchase=purchase_file_df.groupby('Gender')['Item ID'].count()
print(gender_purchase)

Gender
Female                   136
Male                     633
Other / Non-Disclosed     11
Name: Item ID, dtype: int64


In [17]:
# Average purchase price broken down by gender
gender_average=purchase_file_df.groupby('Gender')['Price'].mean()
print(gender_average)

Gender
Female                   2.815515
Male                     2.950521
Other / Non-Disclosed    3.249091
Name: Price, dtype: float64


In [18]:
# Total purchase value broken down by gender
gender_total=purchase_file_df.groupby('Gender')['Price'].sum()
print(gender_total)

Gender
Female                    382.91
Male                     1867.68
Other / Non-Disclosed      35.74
Name: Price, dtype: float64


In [19]:
# Average Age 
avg_age=purchase_file_df["Age"].mean()
avg_age

22.72948717948718

In [20]:
# Median Age
median_age=purchase_file_df["Age"].median()
median_age

22.0

In [21]:
# Min age
min_age=purchase_file_df["Age"].min()
min_age

7

In [22]:
# Max age
max_age=purchase_file_df["Age"].max()
max_age

45

In [23]:
# Create bins for age demographics
# Bins are 0 to 18, 18 to 34, 34 to 50 
bins = [0, 18, 34, 50]

# Create names for the bins
age_groups = ['GenZ', 'Millennials', 'GenX']

In [24]:
pd.cut(purchase_file_df["Age"], bins, labels=age_groups)

0             GenX
1      Millennials
2      Millennials
3      Millennials
4      Millennials
5      Millennials
6      Millennials
7      Millennials
8      Millennials
9      Millennials
10     Millennials
11     Millennials
12     Millennials
13     Millennials
14            GenX
15     Millennials
16     Millennials
17     Millennials
18     Millennials
19     Millennials
20     Millennials
21            GenZ
22            GenZ
23     Millennials
24            GenZ
25     Millennials
26     Millennials
27     Millennials
28            GenZ
29            GenZ
          ...     
750    Millennials
751    Millennials
752           GenZ
753    Millennials
754    Millennials
755    Millennials
756    Millennials
757           GenX
758    Millennials
759    Millennials
760    Millennials
761    Millennials
762           GenX
763    Millennials
764    Millennials
765           GenZ
766    Millennials
767    Millennials
768    Millennials
769    Millennials
770    Millennials
771    Mille

In [25]:
purchase_file_df["Age Summary"] = pd.cut(purchase_file_df["Age"], bins, labels=age_groups)
purchase_file_df

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN,Age Summary
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34,GenX
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46,Millennials
2,34,Male,174,Primitive Blade,2.46,Assastnya25,Millennials
3,21,Male,92,Final Critic,1.36,Pheusrical25,Millennials
4,23,Male,63,Stormfury Mace,1.27,Aela59,Millennials
5,20,Male,10,Sleepwalker,1.73,Tanimnya91,Millennials
6,20,Male,153,Mercenary Sabre,4.57,Undjaskla97,Millennials
7,29,Female,169,"Interrogator, Blood Blade of the Queen",3.32,Iathenudil29,Millennials
8,25,Male,118,"Ghost Reaver, Longsword of Magic",2.77,Sondenasta63,Millennials
9,31,Male,99,"Expiration, Warscythe Of Lost Worlds",4.53,Hilaerin92,Millennials


In [26]:
# Total number of purchases broken down by age group  
purchase_file_df.groupby("Age Summary")["Price"].count()

Age Summary
GenZ           174
Millennials    547
GenX            59
Name: Price, dtype: int64

In [27]:
# Average purchase price broken down by age group
purchase_file_df.groupby("Age Summary")["Price"].mean()

Age Summary
GenZ           2.872011
Millennials    2.949634
GenX           2.934746
Name: Price, dtype: float64

In [28]:
# Total purchase amount broken down by age group
purchase_file_df.groupby("Age Summary")["Price"].sum()

Age Summary
GenZ            499.73
Millennials    1613.45
GenX            173.15
Name: Price, dtype: float64

In [29]:
# Number of purchases by player
player_purchase_df=purchase_file_df.groupby("SN")["Price"].count().sort_values(ascending=False)
df1=pd.DataFrame(player_purchase_df)
df1.rename(columns={'Price':'Number of Purchases'}, inplace=True)
df1.reset_index
df1

Unnamed: 0_level_0,Number of Purchases
SN,Unnamed: 1_level_1
Undirrala66,5
Hailaphos89,4
Mindimnya67,4
Qarwen67,4
Sondastan54,4
Saedue76,4
Chadjask77,3
Frichosiala98,3
Aerithllora36,3
Chanastsda67,3


In [30]:
# Avergage purchase by player
player_avg_df=purchase_file_df.groupby("SN")["Price"].mean().sort_values(ascending=False)
df2=pd.DataFrame(player_avg_df)
df2.rename(columns={'Price':'Average Purchases'}, inplace=True)
df2.reset_index
df2

Unnamed: 0_level_0,Average Purchases
SN,Unnamed: 1_level_1
Tyarithn67,4.950
Palurrian69,4.950
Qiluard68,4.950
Frichaststa61,4.950
Syathe73,4.890
Assossa43,4.890
Iasur80,4.870
Hiasur92,4.870
Ilophos58,4.870
Idaria87,4.870


In [31]:
new_df=df1.join(df2)
new_df

Unnamed: 0_level_0,Number of Purchases,Average Purchases
SN,Unnamed: 1_level_1,Unnamed: 2_level_1
Undirrala66,5,3.412000
Hailaphos89,4,1.467500
Mindimnya67,4,3.185000
Qarwen67,4,2.492500
Sondastan54,4,2.560000
Saedue76,4,3.390000
Chadjask77,3,3.336667
Frichosiala98,3,2.980000
Aerithllora36,3,3.483333
Chanastsda67,3,2.986667


In [32]:
# Total purchase by player
player_total_df=purchase_file_df.groupby("SN")["Price"].sum().sort_values(ascending=False)
df3=pd.DataFrame(player_total_df)
df3.rename(columns={'Price':'Total Purchase Amount'}, inplace=True)
df3.head()

Unnamed: 0_level_0,Total Purchase Amount
SN,Unnamed: 1_level_1
Undirrala66,17.06
Saedue76,13.56
Mindimnya67,12.74
Haellysu29,12.73
Eoda93,11.58


In [33]:
#Identify top 5 spenders based on total purchase amount
combined_df=new_df.join(df3)
combined_df.sort_values("Total Purchase Amount", ascending=False)

Unnamed: 0_level_0,Number of Purchases,Average Purchases,Total Purchase Amount
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Undirrala66,5,3.412000,17.06
Saedue76,4,3.390000,13.56
Mindimnya67,4,3.185000,12.74
Haellysu29,3,4.243333,12.73
Eoda93,3,3.860000,11.58
Isursti83,3,3.683333,11.05
Isurria36,3,3.670000,11.01
Eusri70,3,3.516667,10.55
Aerithllora36,3,3.483333,10.45
Yasriphos60,3,3.466667,10.40


In [53]:
# Identify 5 most popular items by purchase count
popular_item=purchase_file_df.groupby("Item Name")["Price"].count().sort_values(ascending=False)
df4=pd.DataFrame(popular_item)
df4.reset_index
df4.rename(columns={'Price':'Purchase Count'}, inplace=True)
df4.head()

Unnamed: 0_level_0,Purchase Count
Item Name,Unnamed: 1_level_1
Final Critic,14
"Betrayal, Whisper of Grieving Widows",11
Arcane Gem,11
Stormcaller,10
Woeful Adamantite Claymore,9


In [68]:
item_name=purchase_file_df.groupby("Item Name")["Item ID"].value_counts()
df5=pd.DataFrame(item_name)
df5.reset_index
df5.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Item ID
Item Name,Item ID,Unnamed: 2_level_1
Abyssal Shard,162,3
"Aetherius, Boon of the Blessed",137,4
Agatha,120,5
Alpha,130,7
"Alpha, Oath of Zeal",79,7


In [59]:
# Identify Total Purchase Amount of popular items
item_price=purchase_file_df.groupby("Item Name")["Price"].sum().sort_values(ascending=False)
df5=pd.DataFrame(item_price)
df5.reset_index
df5.rename(columns={'Price':'Total Purchase Amount'}, inplace=True)
df5.head()

Unnamed: 0_level_0,Total Purchase Amount
Item Name,Unnamed: 1_level_1
Final Critic,38.6
Retribution Axe,37.26
Stormcaller,34.65
Spectral Diamond Doomblade,29.75
Orenmir,29.7
