In [1]:
# Dependencies and Setup
import pandas as pd
import numpy as np

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)
purchase_data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [2]:

# #Creates a Data Frame with the data of total players. 
total_players = pd.DataFrame([{"Total Players":purchase_data["SN"].nunique()}])
total_players

Unnamed: 0,Total Players
0,576


In [3]:
#Calculates unique items purchased
unique_items = purchase_data["Item ID"].nunique()
# #Calculates average price of items purchased
average_price = purchase_data["Price"].mean()
# #calculates total number of purchases
number_purchases= purchase_data["Purchase ID"].count()
# #Calculates total revenue by summing all
total_revenue = purchase_data["Price"].sum()
# #Creates a new data frame to present the basic calculation
basic_calc_df = pd.DataFrame([{"Number of Unique Items":unique_items, 
                                "Average Price":average_price,
                                "Number of Purchases":number_purchases,
                               "Total Revenue":total_revenue}])
basic_calc_df["Total Revenue"] = basic_calc_df["Total Revenue"].map("${:,.2f}".format)
basic_calc_df["Average Price"] = basic_calc_df["Average Price"].map("${:,.2f}".format)
basic_calc_df = basic_calc_df[["Number of Unique Items", "Average Price", "Number of Purchases", "Total Revenue"]]
basic_calc_df

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,183,$3.05,780,"$2,379.77"


In [4]:
#Calculation of Gender Demographics

gendered_data = pd.DataFrame(purchase_data)
gendered_data.drop_duplicates(subset="SN", keep="first", inplace=True)

gendered_data = pd.DataFrame(gendered_data.groupby("Gender").size())
gendered_data = gendered_data.rename(columns = {0:"Count_Genders"})
total = int(gendered_data.sum())


gendered_data["Percentage"]=gendered_data.Count_Genders/total
gendered_data["Percentage"]= gendered_data["Percentage"]*100
gendered_data


Unnamed: 0_level_0,Count_Genders,Percentage
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,81,14.0625
Male,484,84.027778
Other / Non-Disclosed,11,1.909722


In [5]:
#Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender
#Create a summary data frame to hold the results
gender_analysis = purchase_data.groupby(["Gender"])
purchase_count = gender_analysis["Purchase ID"].count()
average_purchase_price = gender_analysis["Price"].mean()
total_purchaseValue = gender_analysis["Price"].sum()
average_purchase_person = gender_analysis["Price"].sum() / gendered_data.Count_Genders

purchase_count
gender_summary = pd.DataFrame({"Purchase Count":purchase_count, 
                                "Average Purchase Price":average_purchase_price,
                                "Total Purchase Value": total_purchaseValue,
                               "Avg Total Purchase per Person":average_purchase_person})



gender_summary["Average Purchase Price"] = gender_summary["Average Purchase Price"].map("${:,.2f}".format)
gender_summary["Total Purchase Value"] = gender_summary["Total Purchase Value"].map("${:,.2f}".format)
gender_summary["Avg Total Purchase per Person"] = gender_summary["Avg Total Purchase per Person"].map("${:,.2f}".format)
gender_summary


#Optional: give the displayed data cleaner formatting


#Display the summary data frame

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,$3.20,$361.94,$4.47
Male,652,$3.02,"$1,967.64",$4.07
Other / Non-Disclosed,15,$3.35,$50.19,$4.56


In [6]:
#Establish bins for ages
bins = [0,9,14,19,24,29,34,39,999]
group_names = ["<10", "10-14", "15-19", "20-24", "25-29","30-34", "35-39", "40+"]
bin_df = pd.DataFrame(purchase_data)
bin_df.drop_duplicates(subset="SN", keep="first", inplace=True)
bin_df["New Bins"]=pd.cut(bin_df["Age"], bins, labels = group_names)
bin_df

#Categorize the existing players using the age bins. Hint: use pd.cut()


#Calculate the numbers and percentages by age group
bin_df = bin_df.groupby("New Bins")
binned_data = pd.DataFrame(bin_df["Purchase ID"].count())
total = int(binned_data.sum())

binned_data["Percentage of Players"] = binned_data["Purchase ID"]*100/total

binned_data["Percentage of Players"] = binned_data["Percentage of Players"].map("{:,.2f}".format)
binned_data
# total_count
# total = bin_df["Purchase ID"].size()
# total

#Create a summary data frame to hold the results


#Optional: round the percentage column to two decimal points


#Display Age Demographics Table

Unnamed: 0_level_0,Purchase ID,Percentage of Players
New Bins,Unnamed: 1_level_1,Unnamed: 2_level_1
<10,17,2.95
10-14,22,3.82
15-19,107,18.58
20-24,258,44.79
25-29,77,13.37
30-34,52,9.03
35-39,31,5.38
40+,12,2.08


In [7]:
#Bin the purchase_data data frame by age
bins = [0,9,14,19,24,29,34,39,999]
group_names = ["<10", "10-14", "15-19", "20-24", "25-29","30-34", "35-39", "40+"]
binned_df = pd.DataFrame(purchase_data)
binned_df["New Bins"]=pd.cut(binned_df["Age"], bins, labels = group_names)
binned_df = binned_df.groupby("New Bins")
binned_df.count()
purchase_count = binned_df["Purchase ID"].count()
Average_Purchase_Price = binned_df["Price"].mean()
Total_Purchase_Value = binned_df["Price"].sum()
Avg_Total_Purchase_per_Person = binned_df["Price"].sum()/binned_data["Purchase ID"]


summary_binned_df = pd.DataFrame({"Purchase Count":purchase_count, 
                                "Average Purchase Price":Average_Purchase_Price,
                                "Total Purchase Value": Total_Purchase_Value,
                               "Avg Total Purchase per Person":Avg_Total_Purchase_per_Person})
summary_binned_df

summary_binned_df["Average Purchase Price"] = summary_binned_df["Average Purchase Price"].map("${:,.2f}".format)
summary_binned_df["Total Purchase Value"] = summary_binned_df["Total Purchase Value"].map("${:,.2f}".format)
summary_binned_df["Avg Total Purchase per Person"] = summary_binned_df["Avg Total Purchase per Person"].map("${:,.2f}".format)
summary_binned_df

#Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


#Create a summary data frame to hold the results


#Optional: give the displayed data cleaner formatting


#Display the summ 



Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
New Bins,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
<10,23,$3.35,$77.13,$4.54
10-14,28,$2.96,$82.78,$3.76
15-19,136,$3.04,$412.89,$3.86
20-24,365,$3.05,"$1,114.06",$4.32
25-29,101,$2.90,$293.00,$3.81
30-34,73,$2.93,$214.00,$4.12
35-39,41,$3.60,$147.67,$4.76
40+,13,$2.94,$38.24,$3.19


In [8]:
#Run basic calculations to obtain the results in the table below

top_spenders = pd.DataFrame(purchase_data)
top_spend = top_spenders.groupby("SN")
p_count = top_spend["Purchase ID"].count()
average_purch_price = top_spend["Price"].mean()
tot_purch_value = top_spend["Price"].sum()
#Create a summary data frame to hold the results
ind_purch_df = pd.DataFrame({"Purchase Count":p_count, 
                                "Average Purchase Price":average_purch_price,
                                "Total Purchase Value": tot_purch_value})
ind_purch_df = ind_purch_df.sort_values(by= "Total Purchase Value", ascending = False)
ind_purch_df["Average Purchase Price"] = ind_purch_df["Average Purchase Price"].map("${:,.2f}".format)
ind_purch_df["Total Purchase Value"] = ind_purch_df["Total Purchase Value"].map("${:,.2f}".format)


ind_purch_df.head()

#Sort the total purchase value column in descending order


#Optional: give the displayed data cleaner formatting


#Display a preview of the summary data frameary data frame



Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,$3.79,$18.96
Idastidru52,4,$3.86,$15.45
Chamjask73,3,$4.61,$13.83
Iral74,4,$3.40,$13.62
Iskadarya95,3,$4.37,$13.10


In [9]:
# Retrieve the Item ID, Item Name, and Item Price columns
popular_items = pd.DataFrame(purchase_data)
popular_items = popular_items[["Item ID", "Price","Item Name"]]
popular_grouped = popular_items.groupby(["Item ID","Item Name"])
popular_purchase_count = popular_grouped["Price"].count()
popular_item_price_series = popular_grouped["Price"].mean()
popular_item_total = popular_grouped["Price"].sum()

# Group by Item ID and Item Name. Perform calculations to obtain purchase count, item price, and total purchase value



# Create a summary data frame to hold the results
popular_summary_reversed=pd.DataFrame({"Purchase Count":popular_purchase_count, 
                                "Item Price":popular_item_price_series,
                                "Total Purchase Value": popular_item_total})
popular_summary = pd.DataFrame({"Purchase Count":popular_purchase_count, 
                                "Item Price":popular_item_price_series,
                                "Total Purchase Value": popular_item_total})

# Sort the purchase count column in descending order



# Optional: give the displayed data cleaner formatting
popular_summary["Item Price"] = popular_summary["Item Price"].map("${:,.2f}".format)
popular_summary["Total Purchase Value"] = popular_summary["Total Purchase Value"].map("${:,.2f}".format)
popular_summary = popular_summary.sort_values(by = "Purchase Count", ascending=False)
popular_summary.head()
# Display a preview of the summary data frame

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
145,Fiery Glass Crusader,9,$4.58,$41.22
108,"Extraction, Quickblade Of Trembling Hands",9,$3.53,$31.77
82,Nirvana,9,$4.90,$44.10
19,"Pursuit, Cudgel of Necromancy",8,$1.02,$8.16


In [10]:
# Sort the above table by total purchase value in descending order
popular_summary_reversed = popular_summary_reversed.sort_values(by= "Total Purchase Value", ascending = False)
# Optional: give the displayed data cleaner formatting
popular_summary_reversed["Item Price"] = popular_summary_reversed["Item Price"].map("${:,.2f}".format)
popular_summary_reversed["Total Purchase Value"] = popular_summary_reversed["Total Purchase Value"].map("${:,.2f}".format)
popular_summary_reversed.head()

# Display a preview of the data frame




Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
82,Nirvana,9,$4.90,$44.10
145,Fiery Glass Crusader,9,$4.58,$41.22
92,Final Critic,8,$4.88,$39.04
103,Singed Scalpel,8,$4.35,$34.80


In [12]:
#You must include a written description of three observable trends based on the data.
print("The most likely age groups to purchase items on the game site are people within the ages of 15 and 29, with a higher chance\n of people within the 20-24 age group to buy more items, while younger people (below 10 years old) and older people (above 40 \n years old) are the age groups least likely to buy items")
print("\n The age groups who play the game te most are people within the ages of 15 and 29, with a higher number of \n people within the 20-24 age group, while younger people (below 10 years old) and older people (above 40 years old) are\n the age groups that play the game less")
print("\n Women are more likely to spend on average more than male players on items")

The most likely age groups to purchase items on the game site are people within the ages of 15 and 29, with a higher chance
 of people within the 20-24 age group to buy more items, while younger people (below 10 years old) and older people (above 40 
 years old) are the age groups least likely to buy items

 The age groups who play the game te most are people within the ages of 15 and 29, with a higher number of 
 people within the 20-24 age group, while younger people (below 10 years old) and older people (above 40 years old) are
 the age groups that play the game less

 Women are more likely to spend on average more than male players on items
