In [2]:
#Import dependencies or modules
import pandas as pd
import numpy as np

In [3]:
#Create a path to the csv file and read the file with pandas
purchase_data = "../PurchaseData.csv"
purchase_data_df = pd.read_csv(purchase_data, encoding="utf-8")

purchase_data_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [4]:
#Player Count
#Display the total number of players
#Calculate the length of the data in column "SN" and use .value_counts() as a function
total_players = len(purchase_data_df["SN"].value_counts())
print(total_players)

#Create a dataframe with pandas, and use the form of a dictionary to create a header and column
total_players_counted = pd.DataFrame({"Total Number of Players": [total_players]})

total_players_counted

576


Unnamed: 0,Total Number of Players
0,576


In [43]:
#Purchasing Analysis (Total)
#Run basic calculations to obtain the number of unique items, average purchase price, total number of purchases, and total revenue
#Create variables, reference the dataframe, use square brackets to select relevant columns, then implement .unique(), .mean(), .count(), and .sum() as functions
unique_items = len(purchase_data_df["Item ID"].unique())
print(unique_items)

average_price = purchase_data_df["Price"].mean()
print(average_price)

total_purchases = purchase_data_df["Purchase ID"].count()
print(total_purchases)

total_revenue = purchase_data_df["Price"].sum()
print(total_revenue)

#Create a dataframe to hold the results of the calculations
#Use the form of a dictionary to create headers and columns
purchase_data_df_summary = pd.DataFrame({"Number of Unique Items": [unique_items],
                                         "Average Purchase Price": [average_price],
                                         "Total Number of Purchases": [total_purchases],
                                         "Total Revenue": [total_revenue]})

#Reference the dataframe, use square brackets to select relevant columns for modification, then map to format columns
purchase_data_df_summary["Average Purchase Price"] = purchase_data_df_summary["Average Purchase Price"].map("${:.2f}".format)
purchase_data_df_summary["Total Revenue"] = purchase_data_df_summary["Total Revenue"].map("${:.2f}".format)


purchase_data_df_summary

183
3.0509871794871795
780
2379.77


Unnamed: 0,Number of Unique Items,Average Purchase Price,Total Number of Purchases,Total Revenue
0,183,$3.05,780,$2379.77


In [29]:
#Gender Demographics
#Calculate the number of genders in the column "Gender" through use of .nunique() as a function
gender_count = purchase_data_df["Gender"].nunique()
print(gender_count)

#Determine the categories of genders in column "Gender" through the use of list and set as functions
gender_types = purchase_data_df["Gender"]
gender_types_list = list(set(gender_types))
print(gender_types_list)

#Organize the dataframe with gender as an index through use of .groupby() as a function
groupby_gender = purchase_data_df.groupby("Gender")

#Use the unique or distinct categories of gender to sort information through the use of .nunique() as a function 
unique_genders = groupby_gender["SN"].nunique()

#Use division of the unique or distinct categories of gender by the total number of players to determine percentages
percentage_of_players = (unique_genders / total_players) * 100

#Create a dataframe with pandas, and use the form of a dictionary to create a header and column
gender_results_df = pd.DataFrame({"Total Count": unique_genders, "Percentage of Players": percentage_of_players})

#Reference the dataframe, use square brackets to select relevant columns for modification, then sort values and map to format columns
gender_results_df = gender_results_df.sort_values(["Total Count"], ascending = False)
gender_results_df["Percentage of Players"] = gender_results_df["Percentage of Players"].map("{0:.2f}%".format)

gender_results_df

3
['Female', 'Male', 'Other / Non-Disclosed']


Unnamed: 0_level_0,Total Count,Percentage of Players
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Male,484,84.03%
Female,81,14.06%
Other / Non-Disclosed,11,1.91%


In [57]:
#Purchasing Analysis (Gender)
#With data indexed by gender through the .groupby() function, run basic calculations to obtain purchase count, average purchase price, total purchase value, and average purchase total per person
#Create variables, reference the dataframe, use square brackets to select relevant columns, then implement .count(), .mean(), .count(), and .sum() as functions
purchase_count = groupby_gender["Purchase ID"].count()
print(purchase_count)

average_purchase_price = groupby_gender["Price"].mean()
print(average_purchase_price)

total_purchase_value = groupby_gender["Price"].sum()
print(total_purchase_value)

average_purchase_per_person = total_purchase_value / unique_genders
print(average_purchase_per_person)

purchase_analysis_gender_df = pd.DataFrame({"Purchase Count": purchase_count,
                                            "Average Purchase Price": average_purchase_price,
                                            "Total Purchase Value": total_purchase_value,
                                            "Average Total Purchase per Person": average_purchase_per_person})

#Reference the dataframe, use square brackets to select relevant columns for modification, then map to format columns
purchase_analysis_gender_df["Average Purchase Price"] = purchase_analysis_gender_df["Average Purchase Price"].map("${:.2f}".format)
purchase_analysis_gender_df["Total Purchase Value"] = purchase_analysis_gender_df["Total Purchase Value"].map("${:.2f}".format)
purchase_analysis_gender_df["Average Total Purchase per Person"] = purchase_analysis_gender_df["Average Total Purchase per Person"].map("${:.2f}".format)

purchase_analysis_gender_df

Gender
Female                   113
Male                     652
Other / Non-Disclosed     15
Name: Purchase ID, dtype: int64
Gender
Female                   3.203009
Male                     3.017853
Other / Non-Disclosed    3.346000
Name: Price, dtype: float64
Gender
Female                    361.94
Male                     1967.64
Other / Non-Disclosed      50.19
Name: Price, dtype: float64
Gender
Female                   4.468395
Male                     4.065372
Other / Non-Disclosed    4.562727
dtype: float64


Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Average Total Purchase per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,$3.20,$361.94,$4.47
Male,652,$3.02,$1967.64,$4.07
Other / Non-Disclosed,15,$3.35,$50.19,$4.56


In [55]:
#Age Demographics
#Determine the range of ages in column "Age" through the use of list and set as functions
age_ranges = purchase_data_df["Age"]
age_ranges_list = list(set(age_ranges))
print(age_ranges_list)

#Establish bins for ages based on the information from the range of ages
age_bins = [0, 9, 14, 19, 24, 29, 34, 39, 44, 49]
age_groups = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40-44", "45-49"]

#Add the bin to the dataframe through the use of pd.cut() as a function
purchase_data_df["Age Groups"] = pd.cut(purchase_data_df["Age"], age_bins, labels=age_groups)

#Organize the dataframe with age as an index through use of .groupby() as a function
groupby_age = purchase_data_df.groupby("Age Groups")

#Use the unique or distinct categories of age to sort information through use of .nunique() as a function
unique_ages = groupby_age["SN"].nunique()

#Use division of the unique or distinct categories of age by the total number of players to determine percentages
percentage_by_ages = (unique_ages / total_players) * 100

#Create a dataframe with pandas, and use the form of a dictionary to create a header and column
age_results_df = pd.DataFrame({"Total Count": unique_ages, "Percentage of Players": percentage_by_ages})

#Reference the dataframe, use square brackets to select relevant columns for modification, then sort values and map to format columns
age_results_df = age_results_df.sort_values(["Age Groups"], ascending = True)
age_results_df["Percentage of Players"] = age_results_df["Percentage of Players"].map("{0:.2f}%".format)

age_results_df

[7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45]


Unnamed: 0_level_0,Total Count,Percentage of Players
Age Groups,Unnamed: 1_level_1,Unnamed: 2_level_1
<10,17,2.95%
10-14,22,3.82%
15-19,107,18.58%
20-24,258,44.79%
25-29,77,13.37%
30-34,52,9.03%
35-39,31,5.38%
40-44,11,1.91%
45-49,1,0.17%
