In [1]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)
purchase_data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [19]:
#Display the total number of players - unique values in the SN column
totalplayers = purchase_data["SN"].value_counts()
len(totalplayers) 
player_dict = [{"Total Players": len(totalplayers) }]
totalplayer_df = pd.DataFrame(player_dict)
totalplayer_df
                              

Unnamed: 0,Total Players
0,576


In [39]:
# Purchase Analysis (total)
# Run basic calculations to obtain number of unique items, average price, number of purchases, and total revenue
unique_items = len(purchase_data["Item ID"].value_counts())
avg_price = purchase_data["Price"].mean()
number_purchases = purchase_data["Purchase ID"].count()
total_revenue = purchase_data["Price"].sum()

# Create a summary data frame to hold the results
purchase_dict = [{"Number of Unique Items": unique_items,
                "Average Price": avg_price,
                "Number of Purchases": number_purchases,
                "Total Revenue": total_revenue}]
purchase_df = pd.DataFrame(purchase_dict)
purchase_df

# Give the displayed data cleaner formatting
# Use Map to format all the columns
purchase_df["Average Price"] = purchase_df["Average Price"].map("${:.2f}".format)
purchase_df["Total Revenue"] = purchase_df["Total Revenue"].map("${:,.2f}".format)

# Display the summary data frame
purchase_df

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,$3.05,780,"$2,379.77"


In [85]:
# Gender Demographics: Percentage and Count of Male Players, Percentage and Count of Female Players
# Percentage and Count of Other / Non-Disclosed

#group_SN = purchase_data.groupby(['SN'])
#group_SN_df = pd.Dataframe(group_SN)
#group_gender_df = group_SN_df.groupby(['Gender'])
#print(group_SN)
#total = group_SN["Gender"].count()
#len(total)
#gender_counts = group_SN["Gender"].sum()
#gender_counts

# get just gender and SN, remove duplicates, create series for count and percent

gender_SN_df = purchase_data.loc[:, ["Gender", "SN"]]
gender_SN_unique_df = gender_SN_df.drop_duplicates()
gender_counts = gender_SN_unique_df["Gender"].value_counts()
gender_percents = gender_counts / gender_SN_unique_df["Gender"].count() * 100

# create the gender dataframe
gender_df = pd.DataFrame({
    "Total Count": gender_counts,
    "Percentage": gender_percents
})

# format the % column
gender_df["Percentage"] = gender_df["Percentage"].map("{:.2f}%".format)

# display the dataframe
gender_df


Unnamed: 0,Total Count,Percentage
Male,484,84.03%
Female,81,14.06%
Other / Non-Disclosed,11,1.91%


In [109]:
# Purchasing Analysis (Gender)
# Run basic calculations to obtain: 
# purchase count, avg. purchase price, total purchase value, and avg. purchase total per person, all by gender
#use groupby
grouped_gender_df = purchase_data.groupby(["Gender"]).sum()
#grouped_gender_df
purchase_count = purchase_data.groupby(["Gender"]).count()["Purchase ID"]
#purchase_count
avg_purch_price = purchase_data.groupby(["Gender"]).mean()["Price"]
total_purch_value = purchase_data.groupby(["Gender"]).sum()["Price"]
avg_total_purch = total_purch_value / gender_counts

# Create a summary data frame to hold the results
summary_df = pd.DataFrame({"Purchase Count": purchase_count,
                          "Average Purchase Price": avg_purch_price,
                          "Total Purchase Value": total_purch_value,
                          "Total Purchase Avg. Per Person": avg_total_purch})
# Optional: give the displayed data cleaner formatting
# format the % column
summary_df["Average Purchase Price"] = summary_df["Average Purchase Price"].map("${:.2f}".format)
summary_df["Total Purchase Value"] = summary_df["Total Purchase Value"].map("${:.2f}".format)
summary_df["Total Purchase Avg. Per Person"] = summary_df["Total Purchase Avg. Per Person"].map("${:.2f}".format)

# Display the summary data frame
summary_df

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Total Purchase Avg. Per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,$3.20,$361.94,$4.47
Male,652,$3.02,$1967.64,$4.07
Other / Non-Disclosed,15,$3.35,$50.19,$4.56


In [None]:
# Age Demographics
# Establish bins for ages
# Categorize the existing players using the age bins. Hint: use pd.cut()
# Calculate the numbers and percentages by age group
# Create a summary data frame to hold the results
# Optional: round the percentage column to two decimal points
# Display Age Demographics Table

In [None]:
# Purchasing Analysis (Age)
# Bin the purchase_data data frame by age
# Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below
# Create a summary data frame to hold the results
# Optional: give the displayed data cleaner formatting
# Display the summary data frame

In [None]:
# Top Spenders
# Run basic calculations to obtain the results in the table below
# Create a summary data frame to hold the results
# Sort the total purchase value column in descending order
# Optional: give the displayed data cleaner formatting
# Display a preview of the summary data frame

In [None]:
# Most Popular Items
# Retrieve the Item ID, Item Name, and Item Price columns
# Group by Item ID and Item Name. Perform calculations to obtain purchase count, item price, and total purchase value
# Create a summary data frame to hold the results
# Sort the purchase count column in descending order
# Optional: give the displayed data cleaner formatting
# Display a preview of the summary data frame

In [None]:
# Most Profitable Items
# Sort the above table by total purchase value in descending order
# Optional: give the displayed data cleaner formatting
# Display a preview of the data frame