# Data Analysis: Heroes of Pymoli



In [1]:
#Import Pandas Library 
import pandas as pd 

In [2]:
# Set CSV path to import data 
csv_path = "Resources/purchase_data.csv"

# Read the CSV into a Pandas DataFrame
data = pd.read_csv(csv_path)

# Display columns for easy reference 
data.head(0)

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price


In [3]:
# Check if there is any null entry in the data
data.isnull().sum()

Purchase ID    0
SN             0
Age            0
Gender         0
Item ID        0
Item Name      0
Price          0
dtype: int64

### Player Count

In [4]:
# Count the total number of players
player_count = len(data["SN"].unique())
player_count

# Display the total number of players in dataframe
total = pd.DataFrame({"Total Players" :[player_count]})
total

Unnamed: 0,Total Players
0,576


 ### Purchasing Analysis  (Total)

In [5]:
# Identify unique items and drop any duplicates 
items = data['Item ID'].drop_duplicates(keep='first')

# Count the total of unique items 
items_count = len(items)

# Calculate the average price 
average_price = round(data["Price"].mean(),2)

# Count the total number of purchases
number_purchases = data["Purchase ID"].count()

# Calculate the total revenue 
total_revenue = data["Price"].sum()

# Display results in dataframe 
purchasing_total = pd.DataFrame({"Number of Unique Items": [items_count],
                            "Average Price": [average_price],
                            "Number of Purchases": [number_purchases],
                            "Total Revenue": [total_revenue]})

# Change format of 'Average Price' and 'Total Revenue' to currency 
purchasing_total [["Average Price","Total Revenue"]] \
= purchasing_total [["Average Price","Total Revenue"]].applymap("${:,.2f}".format)

purchasing_total

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,$3.05,780,"$2,379.77"


### Gender Demographics

In [22]:
# Drop any duplicates 
players = data[["Gender", "SN"]].drop_duplicates(keep='first')

# Count the amount of players by gender
gender_count = players["Gender"].value_counts()

# Calculate the percentage of players by gender
gender_percent = (round(gender_count / players["Gender"].count() * 100, 2))

# Display the gender demographics in a table
gender_demo = pd.DataFrame({"Total Count": gender_count,
                          "Percentage of Players" : gender_percent})

# Change the format 'Percentage of Players' to percentage
gender_demo["Percentage of Players"] = gender_demo["Percentage of Players"].apply("{0:.2f}%".format)

# Rename the axis to show data label "Gender"
gender_demo = gender_demo.rename_axis("Gender")

gender_demo.reset_index()

Unnamed: 0,Gender,Total Count,Percentage of Players
0,Male,484,84.03%
1,Female,81,14.06%
2,Other / Non-Disclosed,11,1.91%


###  Purchasing Analysis (Gender)

In [17]:
# Count the amount of purchases by gender
purch_count_gender = data.groupby("Gender")['SN'].count()

# Calculate the average purchase price
avg_price_gender = data.groupby(["Gender"])["Price"].mean()

# Calculate the total purchase value
purch_tot_gender = data.groupby(["Gender"])['Price'].sum()

# Calculate the average purchase total per person by gender
avg_tot_gender = purch_tot_gender / gender_demo['Total Count']

# Display results in DataFrame 
purchasing_gender = pd.DataFrame({"Purchase Count": (purch_count_gender.astype('int')),
                                  "Average Purchase Price" : (avg_price_gender.apply("${0:.2f}".format)),
                                  "Total Purchase Value": (purch_tot_gender.apply("${0:.2f}".format)),
                                  "Avg Total Purchase per Person" : (avg_tot_gender.apply("${0:.2f}".format))})



purchasing_gender.reset_index()

Unnamed: 0,Gender,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
0,Female,113,$3.20,$361.94,$4.47
1,Male,652,$3.02,$1967.64,$4.07
2,Other / Non-Disclosed,15,$3.35,$50.19,$4.56


### Age Demographics

In [8]:
# Drop any duplicates 
players = data[["Age", "SN"]].drop_duplicates()

# Establish bins for ages and age group
bins = [0,9,14,19,24,29,34,39,100]
age_group = ["<10","10-14","15-19","20-24","25-29","30-34","35-39","40+"]

# Categorize the existing players using the age bins. 
players['Age Range'] = pd.cut(players['Age'], bins)

# Calculate the numbers and percentages by age group
age_count = players["Age Range"].value_counts()
age_percent = ((age_count/players["SN"].count()) * 100).round(2)

# Display results in DataFrame 
players = pd.DataFrame({'Age Ranges': age_group, 'Total Count': age_count,'Percentage of Players': age_percent})
players['Percentage of Players'] = players['Percentage of Players'].apply('{:.2f}%'.format)

# Rename the axis to show data label "Age Ranges"

players.style.hide_index()


Age Ranges,Total Count,Percentage of Players
<10,258,44.79%
10-14,107,18.58%
15-19,77,13.37%
20-24,52,9.03%
25-29,31,5.38%
30-34,22,3.82%
35-39,17,2.95%
40+,12,2.08%


### Purchasing Analysis (Age)

In [21]:

# Categorize the players using the age bins. 
data['Age Ranges'] = pd.cut(data['Age'], bins)
age_count = data["Age Ranges"].value_counts()

# Count the amount of purchases by age
purch_count_age = data.groupby("Age Ranges")['SN'].count()

# Calculate the average purchase price
avg_price_age = data.groupby("Age Ranges")['Price'].mean()


# Calculate the total purchase value
purch_tot_age = data.groupby("Age Ranges")['Price'].sum()

# Calculate the average purchase total per person by age
avg_tot_age = purch_tot_age / players['Total Count']

# Display results in DataFrame 
purchasing_age = pd.DataFrame({'Age Ranges': age_group, 
                               'Purchase Count': purch_count_age,
                               'Average Purchase Price': (avg_price_age.apply("${0:.2f}".format)),
                               'Total Purchase Value': (purch_tot_age.apply("${0:.2f}".format)), 
                               'Avg Total Purchase per Person': (avg_tot_age.apply("${0:.2f}".format))})


purchasing_age.style.hide_index()

Age Ranges,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
<10,23,$3.35,$77.13,$4.54
10-14,28,$2.96,$82.78,$3.76
15-19,136,$3.04,$412.89,$3.86
20-24,365,$3.05,$1114.06,$4.32
25-29,101,$2.90,$293.00,$3.81
30-34,73,$2.93,$214.00,$4.12
35-39,41,$3.60,$147.67,$4.76
40+,13,$2.94,$38.24,$3.19


###  Top Spenders