In [1]:
#Importing some modules
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

In [2]:
#Defining path and file to load
path=os.path.join("Resources","purchase_data.csv" )

In [3]:
# Read Purchasing File and store into Pandas data frame
purchase=pd.read_csv(path, sep=",")

In [4]:
# looking the data first
purchase.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [None]:
#looking for shape
purchase.shape
print(f'The number of rows is:{purchase.shape[0]} and number of columns is: {purchase.shape[1]}')

In [None]:
#looking for dtypes and null value
purchase.info()

In [None]:
# is any null value
purchase.isna()

In [None]:
#Describing the dataframe
purchase.describe()

In [None]:
#columns name
purchase.columns

In [None]:
# index range
purchase.index

In [5]:
#Display the total number of players
total_players=purchase.SN.nunique()
pd.DataFrame({'Total Players':[total_players]})

Unnamed: 0,Total Players
0,576


In [None]:
#Purchasing Analysis (Total)
unique=purchase['Item Name'].nunique()
avg_purchase=purchase.Price.mean()
total_no_purchase=purchase['Item ID'].count()
total_revenue=purchase.Price.sum()
total=pd.DataFrame({'Number of Unique Items':[unique],
             "Average Purchase Price":[avg_purchase],
             "Total Number of Purchases":[total_no_purchase],
             "Total Revenue":[total_revenue]})
total['Total Revenue']=total['Total Revenue'].map('${:.2f}'.format)
total['Average Purchase Price']=total['Average Purchase Price'].map('${:.2f}'.format)
total

In [None]:
#Gender Demographics
 # total count
totl_count=purchase.Gender.count()  
# converting to dataframe
gender=purchase.Gender.value_counts().to_frame()  
# adding perct change column
gender['Percentage Change']=gender['Gender']/totl_count  
gender

In [None]:
#Purchasing Analysis (Gender)
#short cut way to convert groupby to dataframe using reset_index
purchase.groupby('Gender')['Price'].agg(['count', 'mean', 'sum']).reset_index()

#other way
purchase_group=purchase.groupby('Gender')['Price'].agg(['count', 'mean', 'sum'])
purchase_df=pd.DataFrame(purchase_group)
purchase_df['Avg Total Purchase per Person']=purchase_df['sum']/purchase_df['count']
purchase_df.rename(columns={'count':'Purchase Count',
                           "mean":"Average Purchase Price",
                           'sum':'Total Purchase Value'}, inplace=True)
purchase_df['Average Purchase Price']=(
                      purchase_df['Average Purchase Price'].map('${:.2f}'.format))
purchase_df['Total Purchase Value']=(
                      purchase_df['Total Purchase Value'].map('${:.2f}'.format))
purchase_df['Avg Total Purchase per Person']=(
                      purchase_df['Avg Total Purchase per Person'].map('${:.2f}'.format))
purchase_df

In [None]:
purchase.Age.sum()

In [None]:
#Age Demographics
bins=[0,10,15,20,25,30,35,40,100]
levels=['<10','10-14','15-19','20-24','25-29','30-34', '35-39','40+']
purchase['Bins']=pd.cut(purchase['Age'], bins, labels=levels)
bins_df=purchase.groupby('Bins')["Age"].count().reset_index()
bins_df.set_index("Bins", inplace=True)
bins_df["Percentage of Players"]=bins_df.Age/bins_df.Age.count()
bins_df.rename(columns={'Age': "Total Count"})

In [None]:
#Purchasing Analysis (Age)
bins_purch=purchase.groupby('Bins')["Price"].agg(['count', 'mean', 'sum'])
purch_df=pd.DataFrame(bins_purch)
purch_df['Avg Total Purchase per Person']=purch_df['sum']/purch_df['count']
purch_df.rename(columns={'count':'Purchase Count',
                           "mean":"Average Purchase Price",
                           'sum':'Total Purchase Value'}, inplace=True)
purch_df['Average Purchase Price']=(
                      purch_df['Average Purchase Price'].map('${:.2f}'.format))
purch_df['Total Purchase Value']=(
                      purch_df['Total Purchase Value'].map('${:.2f}'.format))
purch_df['Avg Total Purchase per Person']=(
                      purch_df['Avg Total Purchase per Person'].map('${:.2f}'.format))
purch_df

In [None]:
#Top Spenders
spender_groupby=purchase.groupby("SN")['Price'].agg(['count', 'mean', 'sum'])
top_spender=pd.DataFrame(spender_groupby)
top_spender.rename(columns={'count':'Purchase Count',
                           "mean":"Average Purchase Price",
                           'sum':'Total Purchase Value'}, inplace=True)
top_spender.sort_values(by=['Total Purchase Value'], ascending=False, inplace=True)
top_spender['Average Purchase Price']=(
                      top_spender['Average Purchase Price'].map('${:.2f}'.format))
top_spender['Total Purchase Value']=(
                      top_spender['Total Purchase Value'].map('${:.2f}'.format))

top_spender

In [None]:
# Most Popular Items
items_groupby=purchase.groupby(['Item ID', 'Item Name'])['Price'].agg(['count', 'mean', 'sum'])
popular_items=pd.DataFrame(items_groupby)
popular_items.rename(columns={'count':'Purchase Count',
                           "mean":"Item Price",
                           'sum':'Total Purchase Value'}, inplace=True)
popular_items.sort_values(by=['Purchase Count'], ascending=False, inplace=True)

popular_items

In [None]:
#Most Profitable Items
popular_items.sort_values(by=['Total Purchase Value'], ascending=False, inplace=True)
popular_items['Item Price']=(
                      popular_items['Item Price'].map('${:.2f}'.format))
popular_items['Total Purchase Value']=(
                      popular_items['Total Purchase Value'].map('${:.2f}'.format))
popular_items