In [2]:
#Import Dependencies
import pandas as pd

In [3]:
#Import file 
file = "Resources/purchase_data.csv"

In [4]:
data = pd.read_csv(file, encoding="ISO-8859-1")
data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [5]:
#Check that the data types are all correct
data.dtypes

Purchase ID      int64
SN              object
Age              int64
Gender          object
Item ID          int64
Item Name       object
Price          float64
dtype: object

In [6]:
# Create a list of all columns for referencing
data_col = ['Purchase ID', 'SN', 'Age', 'Gender', 'Item ID', 'Item Name', 'Price']

# Explore the Data

## Game Analysis

In [34]:
games = data["Item ID"].nunique()
print("Number of Games for Sale: " + str(games))

Number of Games for Sale: 179


In [36]:
#Create a new data frame for determining game statistics
games_df = data.groupby(["Item ID", "Item Name", "Price"], as_index = False).count()
games_df.head(10)

Unnamed: 0,Item ID,Item Name,Price,Purchase ID,SN,Age,Gender
0,0,Splinter,1.28,4,4,4,4
1,1,Crucifer,1.99,1,1,1,1
2,1,Crucifer,3.26,3,3,3,3
3,2,Verdict,2.48,6,6,6,6
4,3,Phantomlight,2.49,6,6,6,6
5,4,Bloodlord's Fetish,1.7,5,5,5,5
6,5,Putrid Fan,4.08,4,4,4,4
7,6,Rusty Skull,3.7,2,2,2,2
8,7,"Thorn, Satchel of Dark Souls",1.33,7,7,7,7
9,8,"Purgatory, Gem of Regret",3.93,3,3,3,3


In [48]:
#Summary statistics for games
g_avg = games_df["Price"].mean()
g_max = games_df["Price"].max()
g_min = games_df["Price"].min()

In [51]:
#Create Summary Table
game_sum_table = pd.DataFrame({"Average Price": [g_avg],
                                "Highest Price": [g_max],
                                 "Lowest Price": [g_min]})
                            
game_sum_table["Average Price"] = game_sum_table["Average Price"].map("${:.2f}".format)
game_sum_table["Highest Price"] = game_sum_table["Highest Price"].map("${:.2f}".format)
game_sum_table["Lowest Price"] = game_sum_table["Lowest Price"].map("${:.2f}".format)
game_sum_table

Unnamed: 0,Average Price,Highest Price,Lowest Price
0,$3.04,$4.99,$1.00


## Player Summary Statistics


In [7]:
players = data["SN"].nunique()
print("Total number of players: " + str(players))

Total number of players: 576


In [37]:
#Create a new data frame for determining gender and age counts
player_df = data.groupby(["SN", "Age", "Gender"], as_index = False).count()

In [9]:
print("Summary of Player Gender")
print("----------------------------")
player_df["Gender"].value_counts()

Summary of Player Gender
----------------------------


Male                     484
Female                    81
Other / Non-Disclosed     11
Name: Gender, dtype: int64

In [10]:
print("Summary Statistics of Player Age")
print("--------------------------------")
round(player_df["Age"].describe(),2)

Summary Statistics of Player Age
--------------------------------


count    576.00
mean      22.74
std        6.84
min        7.00
25%       19.00
50%       22.00
75%       25.00
max       45.00
Name: Age, dtype: float64

## Gender Demographics
* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed


In [10]:
#Create total number of players variable
total = player_df["Gender"].count()

#Create male variables
male = player_df.loc[player_df["Gender"] == "Male"]
male_count = male["Gender"].count()

#Create female variables
female = player_df.loc[player_df["Gender"] == "Female"]
female_count = female["Gender"].count()

#Create other variables
other = player_df.loc[player_df["Gender"] == "Other / Non-Disclosed"]
other_count = other["Gender"].count()

In [11]:
male_per = male_count / total
female_per = female_count / total
other_per = other_count / total

player_per_df = pd.DataFrame({"Gender": ["Male", "Female", "Other / Non-Disclosed"],"Percentage": [male_per, female_per, other_per]})
player_per_df["Percentage"] = player_per_df["Percentage"].map("{:,.2%}".format)
player_per_df

Unnamed: 0,Gender,Percentage
0,Male,84.03%
1,Female,14.06%
2,Other / Non-Disclosed,1.91%


## Purchasing Analysis (Gender)

In [24]:
#Create total gender variable statistics of interest
gender_total = data["Purchase ID"].count()
gender_sum = data["Price"].sum()
gender_avg = round(data["Price"].mean(),2)

In [17]:
#Create complete dfs specific to gender
m_df = data.loc[data["Gender"] == "Male", data_col]
f_df = data.loc[data["Gender"] == "Female", data_col]
o_df = data.loc[data["Gender"] == "Other / Non-Disclosed", data_col]

In [23]:
#Create male variable statistics of interest
m_total = m_df["Purchase ID"].count()
m_sum = m_df["Price"].sum()
m_avg_no = male["Price"].mean()
m_avg = m_df["Price"].mean()

In [24]:
#Create female variable statistics of interest
f_total = f_df["Purchase ID"].count()
f_sum = f_df["Price"].sum()
f_avg_no = female["Price"].mean()
f_avg = f_df["Price"].mean()

In [25]:
#Create other variable statistics of interest
o_total = o_df["Purchase ID"].count()
o_sum = o_df["Price"].sum()
o_avg_no = other["Price"].mean()
o_avg = o_df["Price"].mean()

In [27]:
#Create Summary Table
gender_sum_table = pd.DataFrame({"Gender": ["Male", "Female", "Other / Non-Disclosed"],
                                "Total Purchases": [m_total, f_total, o_total],
                                 "Avg No. of Purchases": [m_avg_no, f_avg_no, o_avg_no],
                                "Total Spent": [m_sum, f_sum, o_sum],
                               "Average Spent": [m_avg, f_avg, o_avg]})
gender_sum_table["Total Spent"] = gender_sum_table["Total Spent"].map("${:.2f}".format)
gender_sum_table["Average Spent"] = gender_sum_table["Average Spent"].map("${:.2f}".format)
gender_sum_table["Avg No. of Purchases"] = gender_sum_table["Avg No. of Purchases"].map("{:.2f}".format)
gender_sum_table

Unnamed: 0,Gender,Total Purchases,Avg No. of Purchases,Total Spent,Average Spent
0,Male,652,1.35,$1967.64,$3.02
1,Female,113,1.4,$361.94,$3.20
2,Other / Non-Disclosed,15,1.36,$50.19,$3.35
