In [2]:
# Dependencies
import pandas as pd

In [3]:
# Store filepath in a variable
pymoli_data = "Resources/purchase_data.csv"

In [4]:
# Read our Data file with the pandas library
pymoli_data_df = pd.read_csv(pymoli_data)

In [6]:
# Show just the header to check that csv imported properly
pymoli_data_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [7]:
# Find the total number of players

player_count = pymoli_data_df["SN"].unique()
player_count = len(player_count)
player_count = {'Total Players':f'{player_count}'}
player_count_df = pd.DataFrame(player_count,index=[0])
player_count_df
#Player Count: 576

Unnamed: 0,Total Players
0,576


In [8]:
# Purchasing Analysis (Total)

# Number of Unique Items
unique_items = len(pymoli_data_df['Item ID'].unique())
unique_items = {'Number of Unique Items':f'{unique_items}'}
unique_items_df = pd.DataFrame(unique_items, index = [0])
unique_items_df


Unnamed: 0,Number of Unique Items
0,179


In [9]:
# Total Number of Purchases
num_of_purchases = (pymoli_data_df['Purchase ID'].count())
num_of_purchases

780

In [10]:
# Total Revenue
revenue = (pymoli_data_df['Price'].sum())
revenue

2379.77

In [11]:
# Average Purchase Price = revenue/number of purchases
avg_purchase_price = revenue/num_of_purchases
avg_purchase_price = {'Average Purchase Price':f'{avg_purchase_price}'}
avg_purchase_price_df = pd.DataFrame(avg_purchase_price, index= [0])
avg_purchase_price_df

Unnamed: 0,Average Purchase Price
0,3.0509871794871795


In [12]:
#Gender Demographics:

# Percentage and Count of Male Players
male_count = len(pymoli_data_df.loc[pymoli_data_df['Gender'] == 'Male']['SN'].unique())

# Percentage and Count of Female Players
female_count = len(pymoli_data_df.loc[pymoli_data_df['Gender']=='Female']['SN'].unique())

# Percentage and Count of Other / Non-Disclosed
other_count = len(pymoli_data_df.loc[pymoli_data_df['Gender']=='Other/Non-Disclosed']['SN'].unique())

sum_of_count = male_count + female_count + other_count

gender_demographics = {'Total Count':[male_count,female_count,other_count],'Percentage of Players':[round(male_count/sum_of_count*100,2),
round(female_count/sum_of_count*100,2),round(other_count/sum_of_count*100,2)]}
gender_demographics_df = pd.DataFrame(gender_demographics,index = ['Male','Female','Other / Non-Disclosed'])
gender_demographics_df


Unnamed: 0,Total Count,Percentage of Players
Male,484,85.66
Female,81,14.34
Other / Non-Disclosed,0,0.0


In [13]:
### Purchasing Analysis (Gender)

#Group data by Gender 
gender_demographics = pymoli_data_df.groupby("Gender")
total_count_gender = gender_demographics.nunique()["SN"]

# The below each broken by gender:

# Purchase Count
purchase_count = gender_demographics["Purchase ID"].count()
purchase_count

# Average Purchase Price
avg_purchase_price_gender = gender_demographics["Price"].mean()
avg_purchase_price_gender

# Total Purchase Value

total_purchase_value = gender_demographics["Price"].sum()
total_purchase_value

# Average Purchase Total per Person by Gender
avg_purchase_total_gender = total_purchase_value/total_count_gender
avg_purchase_total_gender 

# Create data frame 
purchasing_analysis_gender = pd.DataFrame({"Purchase Count": purchase_count, 
                                    "Average Purchase Price": avg_purchase_price_gender,
                                    "Total Purchase Value":total_purchase_value,
                                    "Average Purchase Total per Person": avg_purchase_total_gender})

# Creat an index for "Gender"
purchasing_analysis_gender.index.name = "Gender"

# Format 
purchasing_analysis_gender.style.format({"Average Purchase Value":"${:,.2f}",
                                  "Average Purchase Price":"${:,.2f}",
                                  "Average Purchase Total per Person":"${:,.2f}"})


Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Average Purchase Total per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,$3.20,361.94,$4.47
Male,652,$3.02,1967.64,$4.07
Other / Non-Disclosed,15,$3.35,50.19,$4.56


In [29]:
### Age Demographics

# The below each broken into bins of 4 years (i.e. &lt;10, 10-14, 15-19, etc.)

# Create bins for ages
age_bins = [0, 9.90, 14.90, 19.90, 24.90, 29.90, 34.90, 39.90, 99999]
group_names = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]


pymoli_data_df["Age Demographics"]= pd.cut(pymoli_data_df["Age"], age_bins, labels=group_names)
demographic_group =pymoli_data_df.groupby("Age Demographics")
pymoli_data_df.head()


# Purchase Count
purchase_count_age = demographic_group["SN"].nunique()
purchase_count_age

# Average Purchase Price
avg_purchase_price_age = demographic_group["Price"].mean()
avg_purchase_price_age

# Total Purchase Value
total_purchase_value = demographic_group["Price"].sum()
total_purchase_value

# Average Purchase Total per Person by Age Group
avg_purchase_per_person_age = total_purchase_value/purchase_count_age
avg_purchase_per_person_age

# Create data frame 
age_demographics = pd.DataFrame({"Purchase Count": purchase_count_age,
                                 "Average Purchase Price": avg_purchase_price_age,
                                 "Total Purchase Value":total_purchase_value,
                                 "Average Purchase Total per Person by Age Group": avg_purchase_per_person_age})

# Format the data frame with no index name in the corner
age_demographics.index.name = None

# Format with currency symbols
age_demographics.style.format({"Average Purchase Price":"${:,.2f}",
                               "Total Purchase Value":"${:,.2f}",
                               "Average Purchase Total per Person by Age Group":"${:,.2f}"})

Unnamed: 0,Purchase Count,Average Purchase Price,Total Purchase Value,Average Purchase Total per Person by Age Group
<10,17,$3.35,$77.13,$4.54
10-14,22,$2.96,$82.78,$3.76
15-19,107,$3.04,$412.89,$3.86
20-24,258,$3.05,"$1,114.06",$4.32
25-29,77,$2.90,$293.00,$3.81
30-34,52,$2.93,$214.00,$4.12
35-39,31,$3.60,$147.67,$4.76
40+,12,$2.94,$38.24,$3.19
