In [3]:
#Import Dependencies
import pandas as pd

In [4]:
#Import file 
file = "Resources/purchase_data.csv"

In [5]:
data = pd.read_csv(file, encoding="ISO-8859-1")
data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [6]:
#Create bins for age groups
age_bins = [0,11, 21, 31, 41, 51]
bin_name = ["under 10", "11 to 20", "21 to 30", "31 to 40", " over 40"]
data["Age Summary"] = pd.cut(data["Age"], age_bins, labels=bin_name, include_lowest=True)
data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price,Age Summary
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53,11 to 20
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56,31 to 40
2,2,Ithergue48,24,Male,92,Final Critic,4.88,21 to 30
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27,21 to 30
4,4,Iskosia90,23,Male,131,Fury,1.44,21 to 30


In [7]:
# Create a list of all columns for referencing
data_col = ['Purchase ID', 'SN', 'Age', 'Gender', 'Item ID', 'Item Name', 'Price', 'Age Summary']

# Explore the Data

# Game Analysis

In [8]:
games = data["Item ID"].nunique()
print("Number of Games for Sale: " + str(games))

Number of Games for Sale: 179


In [41]:
#Create a new data frame for determining game statistics
games_df = data.groupby(["Item ID", "Item Name", "Price"], as_index = False).count()
games_df.head(10)

Unnamed: 0,Item ID,Item Name,Price,Purchase ID,SN,Age,Gender,Age Summary
0,0,Splinter,1.28,4,4,4,4,4
1,1,Crucifer,1.99,1,1,1,1,1
2,1,Crucifer,3.26,3,3,3,3,3
3,2,Verdict,2.48,6,6,6,6,6
4,3,Phantomlight,2.49,6,6,6,6,6
5,4,Bloodlord's Fetish,1.7,5,5,5,5,5
6,5,Putrid Fan,4.08,4,4,4,4,4
7,6,Rusty Skull,3.7,2,2,2,2,2
8,7,"Thorn, Satchel of Dark Souls",1.33,7,7,7,7,7
9,8,"Purgatory, Gem of Regret",3.93,3,3,3,3,3


In [10]:
#Summary statistics for games
g_avg = games_df["Price"].mean()
g_max = games_df["Price"].max()
g_min = games_df["Price"].min()

In [11]:
#Create Summary Table
game_sum_table = pd.DataFrame({"Average Price": [g_avg],
                                "Highest Price": [g_max],
                                 "Lowest Price": [g_min]})
                            
game_sum_table["Average Price"] = game_sum_table["Average Price"].map("${:.2f}".format)
game_sum_table["Highest Price"] = game_sum_table["Highest Price"].map("${:.2f}".format)
game_sum_table["Lowest Price"] = game_sum_table["Lowest Price"].map("${:.2f}".format)
game_sum_table

Unnamed: 0,Average Price,Highest Price,Lowest Price
0,$3.04,$4.99,$1.00


# Player Summary Statistics


In [12]:
players = data["SN"].nunique()
print("Total number of players: " + str(players))

Total number of players: 576


## Gender Demographics

In [34]:
player_df = data.groupby(["SN", "Age", "Gender"], as_index = False).count()

#Create total number of players variable
total = player_df["Gender"].count()

#Create male variables
male = player_df.loc[player_df["Gender"] == "Male"]
male_count = male["Gender"].count()

#Create female variables
female = player_df.loc[player_df["Gender"] == "Female"]
female_count = female["Gender"].count()

#Create other variables
other = player_df.loc[player_df["Gender"] == "Other / Non-Disclosed"]
other_count = other["Gender"].count()

In [20]:
male_per = male_count / total
female_per = female_count / total
other_per = other_count / total

player_per_df = pd.DataFrame({"Gender": ["Male", "Female", "Other / Non-Disclosed"],
                              "Total": [male_count, female_count, other_count],
                              "Percentage": [male_per, female_per, other_per]})
player_per_df["Percentage"] = player_per_df["Percentage"].map("{:,.2%}".format)
player_per_df

Unnamed: 0,Gender,Total,Percentage
0,Male,484,84.03%
1,Female,81,14.06%
2,Other / Non-Disclosed,11,1.91%


## Purchasing Analysis (Gender)

In [15]:
#Create total gender variable statistics of interest
gender_total = data["Purchase ID"].count()
gender_sum = data["Price"].sum()
gender_avg = round(data["Price"].mean(),2)

In [16]:
#Create complete dfs specific to gender
m_df = data.loc[data["Gender"] == "Male", data_col]
f_df = data.loc[data["Gender"] == "Female", data_col]
o_df = data.loc[data["Gender"] == "Other / Non-Disclosed", data_col]

In [None]:
#Create male variable statistics of interest
m_total = m_df["Purchase ID"].count()
m_sum = m_df["Price"].sum()
m_avg_no = male["Price"].mean()
m_avg = m_df["Price"].mean()

In [None]:
#Create female variable statistics of interest
f_total = f_df["Purchase ID"].count()
f_sum = f_df["Price"].sum()
f_avg_no = female["Price"].mean()
f_avg = f_df["Price"].mean()

In [None]:
#Create other variable statistics of interest
o_total = o_df["Purchase ID"].count()
o_sum = o_df["Price"].sum()
o_avg_no = other["Price"].mean()
o_avg = o_df["Price"].mean()

In [None]:
#Create Summary Table
gender_sum_table = pd.DataFrame({"Gender": ["Male", "Female", "Other / Non-Disclosed"],
                                "Total Purchases": [m_total, f_total, o_total],
                                 "Avg No. of Purchases": [m_avg_no, f_avg_no, o_avg_no],
                                "Total Spent": [m_sum, f_sum, o_sum],
                               "Average Spent": [m_avg, f_avg, o_avg]})
gender_sum_table["Total Spent"] = gender_sum_table["Total Spent"].map("${:.2f}".format)
gender_sum_table["Average Spent"] = gender_sum_table["Average Spent"].map("${:.2f}".format)
gender_sum_table["Avg No. of Purchases"] = gender_sum_table["Avg No. of Purchases"].map("{:.2f}".format)
gender_sum_table

## Age Demographics

In [36]:
df_2 = pd.DataFramecolumns=["""Price", "Age", "Item Name"
age_df.head()

Unnamed: 0,SN,Age_x,Gender_x,Purchase ID_x,Item ID_x,Item Name_x,Price_x,Age Summary_x,Purchase ID_y,Age_y,Gender_y,Item ID_y,Item Name_y,Price_y,Age Summary_y
0,Adairialis76,16,Male,1,1,1,1,1,467,16,Male,123,Twilight's Carver,2.28,11 to 20
1,Adastirin33,35,Female,1,1,1,1,1,142,35,Female,175,Woeful Adamantite Claymore,4.48,31 to 40
2,Aeda94,17,Male,1,1,1,1,1,388,17,Male,128,"Blazeguard, Reach of Eternity",4.91,11 to 20
3,Aela59,21,Male,1,1,1,1,1,28,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",4.32,11 to 20
4,Aelaria33,23,Male,1,1,1,1,1,630,23,Male,171,Scalpel,1.79,21 to 30
