In [1]:
#Importing some modules
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

In [2]:
#Defining path and file to load
path=os.path.join("Resources","purchase_data.csv" )

In [3]:
# Read Purchasing File and store into Pandas data frame
purchase=pd.read_csv(path, sep=",")

In [4]:
# looking the data first
purchase.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [None]:
#looking for shape
purchase.shape
print(f'The number of rows is:{purchase.shape[0]} and number of columns is: {purchase.shape[1]}')

In [None]:
#looking for dtypes and null value
purchase.info()

In [None]:
# is any null value
purchase.isna()

In [None]:
#Describing the dataframe
purchase.describe()

In [None]:
#columns name
purchase.columns

In [None]:
# index range
purchase.index

In [None]:
#Display the total number of players
total_players=purchase.SN.nunique()
pd.DataFrame({'Total Players':[total_players]})

In [None]:
#Purchasing Analysis (Total)
unique=purchase['Item Name'].nunique()
avg_purchase=purchase.Price.mean()
total_no_purchase=purchase['Item ID'].count()
total_revenue=purchase.Price.sum()
total=pd.DataFrame({'Number of Unique Items':[unique],
             "Average Purchase Price":[avg_purchase],
             "Total Number of Purchases":[total_no_purchase],
             "Total Revenue":[total_revenue]})
total['Total Revenue']=total['Total Revenue'].map('${:.2f}'.format)
total['Average Purchase Price']=total['Average Purchase Price'].map('${:.2f}'.format)
total

In [None]:
#Gender Demographics
 # total count
totl_count=purchase.Gender.count()  
# converting to dataframe
gender=purchase.Gender.value_counts().to_frame()  
# adding perct change column
gender['Percentage Change']=gender['Gender']/totl_count  
gender

In [14]:
#Purchasing Analysis (Gender)
#short cut way to convert groupby to dataframe using reset_index
purchase.groupby('Gender')['Price'].agg(['count', 'mean', 'sum']).reset_index()

#other way
purchase_group=purchase.groupby('Gender')['Price'].agg(['count', 'mean', 'sum'])
purchase_df=pd.DataFrame(purchase_group)
purchase_df['Avg Total Purchase per Person']=purchase_df['sum']/purchase_df['count']
purchase_df.rename(columns={'count':'Purchase Count',
                           "mean":"Average Purchase Price",
                           'sum':'Total Purchase Value'})

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,3.203009,361.94,3.203009
Male,652,3.017853,1967.64,3.017853
Other / Non-Disclosed,15,3.346,50.19,3.346


In [7]:
purchase.Age.sum()

17717

In [8]:
#Age Demographics
bins=[0,10,15,20,25,30,35,40,100]
levels=['<10','10-14','15-19','20-24','25-29','30-34', '35-39','40+']
purchase['Bins']=pd.cut(purchase['Age'], bins, labels=levels)
bins_df=purchase.groupby('Bins')["Age"].count().reset_index()
bins_df.set_index("Bins", inplace=True)
bins_df["Percentage of Players"]=bins_df.Age/bins_df.Age.count()
bins_df.rename(columns={'Age': "Total Count"})

Unnamed: 0_level_0,Total Count,Percentage of Players
Bins,Unnamed: 1_level_1,Unnamed: 2_level_1
<10,32,4.0
10-14,54,6.75
15-19,200,25.0
20-24,325,40.625
25-29,77,9.625
30-34,52,6.5
35-39,33,4.125
40+,7,0.875


In [13]:
#Purchasing Analysis (Age)
bins_purch=purchase.groupby('Bins')["Price"].agg(['count', 'mean', 'sum'])
purch_df=pd.DataFrame(bins_purch)
purch_df['Avg Total Purchase per Person']=purch_df['sum']/purch_df['count']
purch_df.rename(columns={'count':'Purchase Count',
                           "mean":"Average Purchase Price",
                           'sum':'Total Purchase Value'})

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
Bins,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
<10,32,3.405,108.96,3.405
10-14,54,2.9,156.6,2.9
15-19,200,3.1078,621.56,3.1078
20-24,325,3.020431,981.64,3.020431
25-29,77,2.875584,221.42,2.875584
30-34,52,2.994423,155.71,2.994423
35-39,33,3.404545,112.35,3.404545
40+,7,3.075714,21.53,3.075714


In [18]:
#Top Spenders
spender_groupby=purchase.groupby("SN")['Price'].agg(['count', 'mean', 'sum'])
top_spender=pd.DataFrame(spender_groupby)
top_spender.rename(columns={'count':'Purchase Count',
                           "mean":"Average Purchase Price",
                           'sum':'Total Purchase Value'})

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Adairialis76,1,2.280000,2.28
Adastirin33,1,4.480000,4.48
Aeda94,1,4.910000,4.91
Aela59,1,4.320000,4.32
Aelaria33,1,1.790000,1.79
...,...,...,...
Yathecal82,3,2.073333,6.22
Yathedeu43,2,3.010000,6.02
Yoishirrala98,1,4.580000,4.58
Zhisrisu83,2,3.945000,7.89


In [None]:
# Most Popular Items

In [None]:
#Most Profitable Items