In [1]:
# import modules
import pandas as pd
import os

# Adding options to display all of the rows of DataFrame w/o truncation
pd.set_option('display.max_rows',None)
pd.set_option('display.max_columns',None)

In [2]:
# Create DataFrame from csv file
path_to_csv = os.path.join('Resources','purchase_data.csv')
purchase_df = pd.read_csv(path_to_csv)
purchase_df.head()
# purchase_df.tail()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [3]:
# Glancing at column names
purchase_df.columns

Index(['Purchase ID', 'SN', 'Age', 'Gender', 'Item ID', 'Item Name', 'Price'], dtype='object')

In [4]:
# Checking if data is clean or not
purchase_df.count()

Purchase ID    780
SN             780
Age            780
Gender         780
Item ID        780
Item Name      780
Price          780
dtype: int64

In [5]:
purchase_df.dtypes

Purchase ID      int64
SN              object
Age              int64
Gender          object
Item ID          int64
Item Name       object
Price          float64
dtype: object

## Player Count

In [6]:
# Player Count
total_players = purchase_df["SN"].nunique()
pd.DataFrame({"Total Players":[total_players]})

Unnamed: 0,Total Players
0,576


## Purchasing Analysis

In [7]:
# There are more unique IDs than unique Item Names, 
# meaning some Items may have more than 1 IDs assigned to them
# This may be a data cleaning issue. 
print(purchase_df["Item Name"].nunique())
print(purchase_df["Item ID"].nunique())

179
183


In [8]:
# Grouping by Item Number and Item ID to flush out the Items with two or more IDs
group_df = purchase_df.groupby(["Item Name", "Item ID"])
group_df.first()

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase ID,SN,Age,Gender,Price
Item Name,Item ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Abyssal Shard,162,7,Iskjaskst81,20,Male,2.67
"Aetherius, Boon of the Blessed",137,214,Iaralsuir44,23,Male,3.39
Agatha,120,130,Maridisya31,25,Male,3.08
Alpha,130,673,Idacal95,30,Male,2.07
"Alpha, Oath of Zeal",79,352,Ilarin91,22,Male,4.05
"Alpha, Reach of Ending Hope",47,664,Chamistast30,31,Male,3.58
Amnesia,22,50,Saelaephos52,21,Male,2.18
Apocalyptic Battlescythe,93,413,Siana77,12,Male,1.97
Arcane Gem,84,55,Phaelap26,25,Female,3.79
Avenger,102,46,Yasrisu92,27,Male,3.44


In [92]:
# Grab the group keys and cast them to list
# Loop through all list items to find out duplicate items and
# Put the duplicate names and their multiple IDs in a dictionary
# to process them later.
duplicate_dict = {}
key_list = list(group_df.groups.keys())
for idx in range(len(key_list)-1):
    if(key_list[idx][0] == key_list[idx+1][0]):
        print(f'"{key_list[idx][0]}" has IDs: {key_list[idx][1]} & {key_list[idx+1][1]}')
        duplicate_dict.update({key_list[idx][0]: [key_list[idx][1], key_list[idx+1][1]]})

print(duplicate_dict)        

"Crucifer" has IDs: 1 & 23
"Final Critic" has IDs: 92 & 101
"Persuasion" has IDs: 132 & 141
"Stormcaller" has IDs: 30 & 180
{'Crucifer': [1, 23], 'Final Critic': [92, 101], 'Persuasion': [132, 141], 'Stormcaller': [30, 180]}


In [80]:
# Clean the original DataFrame by assigning new Item IDs to problematic items. 

# NOTE: Not fully automated (len(idx_list) may by > 2 for other DataFrames
# but this is a quick and dirty way for current data-set.

clean_purchase_df = purchase_df

new_ID = 200
for name in duplicate_dict.keys():
    idx_list = duplicate_dict.get(name)
    clean_purchase_df = clean_purchase_df.replace({"Item ID":{idx_list[0]:new_ID, idx_list[1]:new_ID}})
    new_ID = new_ID + 1    

## Purchasing Analysis (Continued)

In [91]:
total_number_of_unique_items = clean_purchase_df["Item ID"].nunique()
average_purchase_price = clean_purchase_df["Price"].mean()
total_number_of_purchases = clean_purchase_df["Item ID"].count()
total_revenue = purchase_df["Price"].sum()
purchasing_analysis_df = pd.DataFrame({"Number of Unique Items":[total_number_of_unique_items],\
              "Average Price":[average_purchase_price],\
             "Number of Purchases":[total_number_of_purchases],\
             "Total Revenue":total_revenue})

# Using mapping to change display formatting for readibility
purchasing_analysis_df["Average Price"] = purchasing_analysis_df["Average Price"].map('${:.2f}'.format)
purchasing_analysis_df["Total Revenue"] = purchasing_analysis_df["Total Revenue"].map('${:,.2f}'.format)
purchasing_analysis_df

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,$3.05,780,"$2,379.77"


## Gender Demographics

In [95]:
# Performing value_counts() on Gender column after
# dropping duplicate players from the data set
# Reasoning: A player may have bought items more than once and therefore 
# may appear multiple times in transactions.
gender_df = pd.DataFrame(purchase_df.drop_duplicates("SN")["Gender"].value_counts())
gender_df = gender_df.rename(columns = {"Gender":"Total Counts"})
gender_df["Percentage of Players"] = gender_df["Total Counts"]/total_players*100

# Using mapping to change display formatting for readibility
gender_df["Percentage of Players"] = gender_df["Percentage of Players"].map('{:.2f}%'.format)
gender_df

Unnamed: 0,Total Counts,Percentage of Players
Male,484,84.03%
Female,81,14.06%
Other / Non-Disclosed,11,1.91%


In [96]:
# Grab locations of Male/Female/Other genders
isFemale = purchase_df["Gender"]=="Female"
isMale = purchase_df["Gender"]=="Male"
isOther = purchase_df["Gender"] == "Other / Non-Disclosed"

print(purchase_df[isFemale]["Item ID"].count())
print(purchase_df[isMale]["Item ID"].count())
print(purchase_df[isOther]["Item ID"].count())

print(purchase_df[isFemale]["Price"].mean())
print(purchase_df[isMale]["Price"].mean())
print(purchase_df[isOther]["Price"].mean())

print(purchase_df[isFemale]["Price"].sum())
print(purchase_df[isMale]["Price"].sum())
print(purchase_df[isOther]["Price"].sum())

# AVERAGE PURCHASE TOTAL PER PERSON BY GENDER???

113
652
15
3.203008849557519
3.0178527607361953
3.3460000000000005
361.94
1967.64
50.19
