In [1]:
# import modules
import pandas as pd
import os

In [2]:
# Create DataFrame from csv file
path_to_csv = os.path.join('Resources','purchase_data.csv')
purchase_df = pd.read_csv(path_to_csv)
purchase_df.head()
# purchase_df.tail()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [3]:
# Glancing at column names
purchase_df.columns

Index(['Purchase ID', 'SN', 'Age', 'Gender', 'Item ID', 'Item Name', 'Price'], dtype='object')

In [4]:
# Checking if data is clean or not
purchase_df.count()

Purchase ID    780
SN             780
Age            780
Gender         780
Item ID        780
Item Name      780
Price          780
dtype: int64

In [5]:
purchase_df.dtypes

Purchase ID      int64
SN              object
Age              int64
Gender          object
Item ID          int64
Item Name       object
Price          float64
dtype: object

In [6]:
# Player Count
total_players = purchase_df["SN"].nunique()
pd.DataFrame({"Total Players":[total_players]})

Unnamed: 0,Total Players
0,576


In [7]:
# Neede to Confirm with Paul
print(purchase_df["Item Name"].nunique())
print(purchase_df["Item ID"].nunique())

179
183


In [8]:
# Purchasing Analysis
total_number_of_unique_items = purchase_df["Item ID"].nunique()
average_purchase_price = purchase_df["Price"].mean()
total_number_of_purchases = purchase_df["Item ID"].count()
total_revenue = purchase_df["Price"].sum()
purchasing_analysis_df = pd.DataFrame({"Number of Unique Items":[total_number_of_unique_items],\
              "Average Price":[average_purchase_price],\
             "Number of Purchases":[total_number_of_purchases],\
             "Total Revenue":total_revenue})

# Using mapping to change display formatting for readibility
purchasing_analysis_df["Average Price"] = purchasing_analysis_df["Average Price"].map('${:.2f}'.format)
purchasing_analysis_df["Total Revenue"] = purchasing_analysis_df["Total Revenue"].map('${:,.2f}'.format)
purchasing_analysis_df

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,183,$3.05,780,"$2,379.77"


In [17]:
# Gender Demographics

# Performing value_counts() on Gender column after
# dropping duplicate players from the data set
# Reasoning: A player may have bought items more than once and therefore 
# may appear multiple times in transactions.
gender_df = pd.DataFrame(purchase_df.drop_duplicates("SN")["Gender"].value_counts())
gender_df = gender_df.rename(columns = {"Gender":"Total Counts"})
gender_df["Percentage of Players"] = gender_df["Total Counts"]/total_players*100

# Using mapping to change display formatting for readibility
gender_df["Percentage of Players"] = gender_df["Percentage of Players"].map('{:.2f}%'.format)
gender_df

Unnamed: 0,Total Counts,Percentage of Players
Male,484,84.03%
Female,81,14.06%
Other / Non-Disclosed,11,1.91%


In [53]:
# Grab locations of Male/Female/Other genders
isFemale = purchase_df["Gender"]=="Female"
isMale = purchase_df["Gender"]=="Male"
isOther = purchase_df["Gender"] == "Other / Non-Disclosed"

print(purchase_df[isFemale]["Item ID"].count())
print(purchase_df[isMale]["Item ID"].count())
print(purchase_df[isOther]["Item ID"].count())

print(purchase_df[isFemale]["Price"].mean())
print(purchase_df[isMale]["Price"].mean())
print(purchase_df[isOther]["Price"].mean())

print(purchase_df[isFemale]["Price"].sum())
print(purchase_df[isMale]["Price"].sum())
print(purchase_df[isOther]["Price"].sum())

# AVERAGE PURCHASE TOTAL PER PERSON BY GENDER???

113
652
15
3.203008849557519
3.0178527607361953
3.3460000000000005
361.94
1967.64
50.19
