In [None]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)

In [None]:
purchase_data.head()

In [None]:
purchase_data["SN"].value_counts()

In [None]:
deduped = purchase_data.drop_duplicates(subset ="SN")

In [None]:
deduped["SN"].count()
total_players = deduped["SN"].count()

In [None]:
# Purchasing Analysis

#Unique Items
Unique_Items = purchase_data["Item ID"].nunique()
Unique_Items

# Average Price 
avg_price = purchase_data["Price"].mean()
avg_price = "${:,.2f}".format(avg_price)
avg_price 

#Total Revenue
total_rev = purchase_data["Price"].sum()
total_rev = "${:,.2f}".format(total_rev)
total_rev

# Number of Purchases
total_purchases = purchase_data["Purchase ID"].count()
total_purchases

df = pd.DataFrame([[Unique_Items, avg_price, total_purchases, total_rev]], columns = ["Number of Unique Items", "Average Price", "Number of Purchases", "Total Revenue"])

In [None]:
# Purchasing Analysis alternate method
summary_dict = {}

summary_dict["Unique_Items"] = purchase_data["Item ID"].nunique()
summary_dict["avg_price"] = purchase_data["Price"].mean()
summary_dict["total_rev"] = purchase_data["Price"].sum()
summary_dict["total_purchases"] = purchase_data["Purchase ID"].count()

summary_dict = summary_dict.squeeze()

summary_stats = pd.DataFrame.from_dict(summary_dict, orient="columns")

In [None]:
#Gender Dataframe 

#group by gender
gender_group = deduped.groupby("Gender")
gender_group["SN"].count()

#create list of users by gender
gender_list = gender_group["SN"].count()
gender_list

#turn list into Dataframe
gender_df = pd.DataFrame({"Total Count": gender_list})
gender_df

#create new DF column with percentage of whole
gender_df['Percentage of Players'] = ((gender_df['Total Count'] / gender_df['Total Count'].sum())*100).round(2).astype(str) + '%'
gender_df


#sort dataframe
gender_df.sort_values(by='Total Count', ascending=False)

In [None]:
# Purchasing Analysis - Gender

gender_purchasing = purchase_data.groupby("Gender")

g_p_count = gender_purchasing["Purchase ID"].count()
g_p_count

g_p_price = gender_purchasing["Price"].mean()
g_p_price 

g_p_value = gender_purchasing["Price"].sum()
g_p_value

gpa_df = pd.DataFrame({"Purchase Count": g_p_count, "Average Purchase Price": g_p_price, "Total Purchase Value": g_p_value})
gpa_df

gpa_df.sort_values(by='Purchase Count', ascending=False)

gpa_df["Avg Total Purchase per Person"] = gpa_df['Total Purchase Value'] / gender_df['Total Count']
gpa_df

gpa_df["Avg Total Purchase per Person"] = gpa_df["Avg Total Purchase per Person"].map("${:.2f}".format)
gpa_df['Total Purchase Value'] = gpa_df['Total Purchase Value'].map("${:.2f}".format)
gpa_df['Average Purchase Price'] = gpa_df['Average Purchase Price'].map("${:.2f}".format)

gpa_df

In [None]:
#Age Demographics 

bins = [0, 9, 14, 19, 24, 29, 34, 39, 200]
group_names = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]

deduped["Age Range"] = pd.cut(deduped["Age"], bins, labels=group_names, include_lowest=True)
deduped.head()

age_group = deduped.groupby("Age Range")
age_group

age_group_df = age_group["Age Range"].count().to_frame(name="Total Count").reset_index()
age_group_df.set_index("Age Range")

age_group_df["Percentage of Players"] = ((age_group_df['Total Count'] / total_players)*100).round(2).astype(str) + '%'
age_group_df = age_group_df.set_index("Age Range")
age_group_df

In [None]:
#Purchasing Analysis Age 
purchase_data["Age Range"] = pd.cut(purchase_data["Age"], bins, labels=group_names, include_lowest=True)
purchase_age_group = purchase_data.groupby("Age Range")
purchase_age_group_avg = purchase_age_group["Price"].mean().to_frame(name="Average Purchase Price").reset_index()
purchase_age_group_avg

purchase_age_group_total = purchase_age_group["Price"].sum().to_frame(name="Total Purchase Value").reset_index()
purchase_age_group_total

purchase_age_group_count = purchase_age_group["Purchase ID"].count().to_frame(name="Purchase Count").reset_index()
purchase_age_group_count

purchase_age_group_merged = pd.merge(purchase_age_group_count, purchase_age_group_avg, on="Age Range")
purchase_age_group_merged

purchase_age_group_merged = pd.merge(purchase_age_group_merged, purchase_age_group_total, on="Age Range")
purchase_age_group_merged

purchase_age_group_merged = purchase_age_group_merged.set_index("Age Range") 
purchase_age_group_merged

purchase_age_group_merged["Avg Total Purchase per Person"] = purchase_age_group_merged['Total Purchase Value'] / age_group_df["Total Count"] 
purchase_age_group_merged

purchase_age_group_merged["Average Purchase Price"] = purchase_age_group_merged["Average Purchase Price"].map("${:.2f}".format)
purchase_age_group_merged["Total Purchase Value"] = purchase_age_group_merged["Total Purchase Value"].map("${:.2f}".format)
purchase_age_group_merged["Avg Total Purchase per Person"] = purchase_age_group_merged["Avg Total Purchase per Person"].map("${:.2f}".format)
purchase_age_group_merged

In [None]:
# Top Spenders

top_spenders2 = purchase_data.groupby('SN').agg(
    purchase_count=('Purchase ID', "count"),
    avg_purchase_price=('Price', "mean"),
    total_purchase_value=('Price', sum)
)

top_spenders2 = top_spenders2.sort_values(by='total_purchase_value', ascending=False)
top_spenders2

top_spenders2["avg_purchase_price"] = top_spenders2["avg_purchase_price"].map("${:.2f}".format)
top_spenders2["total_purchase_value"] = top_spenders2["total_purchase_value"].map("${:.2f}".format)
top_spenders2.head()


In [None]:
# Most Popular Items

popular = purchase_data.groupby(["Item ID", "Item Name"]).agg(
    purchase_count=('Purchase ID', "count"),
    item_price=("Price", "mean"),
    total_purchase_value=('Price', sum)
)

popular.head()

popular = popular.sort_values(by='purchase_count', ascending=False)
popular.head()

popular["item_price"] = popular["item_price"].map("${:.2f}".format)
popular["total_purchase_value"] = popular["total_purchase_value"].map("${:.2f}".format)

popular.head()

In [None]:
# Most Profitable Items 

most_profitable = purchase_data.groupby(["Item ID", "Item Name"]).agg(
    purchase_count=('Purchase ID', "count"),
    item_price=("Price", "mean"),
    total_purchase_value=('Price', sum)
)

most_profitable = most_profitable.sort_values(by='total_purchase_value', ascending=False)
most_profitable

most_profitable["item_price"] = most_profitable["item_price"].map("${:.2f}".format)
most_profitable["total_purchase_value"] = most_profitable["total_purchase_value"].map("${:.2f}".format)

most_profitable.head()