In [1]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)

In [2]:
purchase_data = pd.DataFrame(purchase_data)
purchase_data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [3]:
players_duplicates = purchase_data.loc[:, ["Gender", "SN", "Age"]]    #Drop duplicates
players_duplicates = players_duplicates.drop_duplicates()

players_total = players_duplicates.count()[0]                         #Get the number of total players
players_df = pd.DataFrame({"Total Players":[players_total]})
players_df

Unnamed: 0,Total Players
0,576


## Purchasing Analysis (Total)

In [4]:
Average_price= purchase_data["Price"].mean()        #Average Price
Average_price

Total_revenue= purchase_data["Price"].sum()         #Revenue Total
Total_revenue

Total_item_id= purchase_data["Item ID"].nunique()   #Total Unique item id's
Total_item_id

Total_purchase= len(purchase_data)                  #Total Purchases
Total_purchase

#Summary of the Calculated Data  (Key and Value)
Summary_data=pd.DataFrame({"Average Price ($)" :  [Average_price],
                           "Total Revenue ($)":[Total_revenue],
                           "Total Number of Unique Items" : [Total_item_id], 
                           "Total Number of Purchases":[Total_purchase]}).round(2)                 
Summary_data

Unnamed: 0,Average Price ($),Total Revenue ($),Total Number of Unique Items,Total Number of Purchases
0,3.05,2379.77,179,780


## Gender Demographics

In [5]:
Gender_count= players_duplicates["Gender"].value_counts()                     #Gender Total
Gender_count

Percent_gender= Gender_count/players_total *100                     #Gender Percentages
Percent_gender 

Gender_data_summary= pd.DataFrame({"Total Number of Players": Gender_count,   #Gender Data Summary
                                   "Percentage of Players (%)": Percent_gender}).round(2)
Gender_data_summary


Unnamed: 0,Total Number of Players,Percentage of Players (%)
Male,484,84.03
Female,81,14.06
Other / Non-Disclosed,11,1.91


 ##  Purchasing Analysis (Gender)

In [6]:
#Basic Calculations by Gender
purchase_count_gender = purchase_data.groupby(["Gender"]).count()["Price"]
purchase_count_gender

avg_purchase_price_gender = purchase_data.groupby(["Gender"]).mean()["Price"]
avg_purchase_price_gender

avg_purchase_total_gender = purchase_data.groupby(["Gender"]).sum()["Price"]
avg_purchase_total_gender

#Average Total Purchase per Person
avg_purchase_total_per_person = avg_purchase_total_gender / Gender_data_summary ["Total Number of Players"]
avg_purchase_total_per_person

#Gender Purchase Analysis Summary

Gender_Purchase_summary = pd.DataFrame({"Purchase Count" :purchase_count_gender,
                                        "Average Purchase Count ($)" :avg_purchase_price_gender,
                                        "Average Purchase Total ($) " :avg_purchase_total_gender,
                                        "Average Purchase Total per Person ($)" :avg_purchase_total_per_person}).round(2)


Gender_Purchase_summary

Unnamed: 0_level_0,Purchase Count,Average Purchase Count ($),Average Purchase Total ($),Average Purchase Total per Person ($)
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,3.2,361.94,4.47
Male,652,3.02,1967.64,4.07
Other / Non-Disclosed,15,3.35,50.19,4.56


## Age Demographics

In [7]:
#Bins for Age Analysis
Age_bins = [0, 9.90, 14.90, 19.90, 24.90, 29.90, 34.90, 39.90, 99999]

#Name for the Groups
Age_group_names = ['<10','10-14','15-19','20-24','25-29','30-34','35-39','40+']

#Age Demographics
players_duplicates["Age Ranges"] = pd.cut(players_duplicates["Age"], Age_bins, labels=Age_group_names)

#Calculations of Age Demographics
Age_Total= players_duplicates["Age Ranges"].value_counts()
Age_Percent= Age_Total /players_total *100

#Display the Summary Data
Age_Summary = pd.DataFrame({"Total Age Group": Age_Total,
                            "Total Age Percent (%)": Age_Percent}).round(2)

Age_Summary = Age_Summary.sort_index()
Age_Summary

Unnamed: 0,Total Age Group,Total Age Percent (%)
<10,17,2.95
10-14,22,3.82
15-19,107,18.58
20-24,258,44.79
25-29,77,13.37
30-34,52,9.03
35-39,31,5.38
40+,12,2.08


## Purchasing Analysis (Age)

In [8]:
#Bin Age Purchase Data
purchase_data["Age Ranges"] = pd.cut(purchase_data["Age"], Age_bins, labels = Age_group_names)

# #Basic Calculations by Age
purchase_count_Age = purchase_data.groupby(["Age Ranges"]).count()["Price"]
purchase_count_Age

avg_purchase_price_Age = purchase_data.groupby(["Age Ranges"]).mean()["Price"]
avg_purchase_price_Age

avg_purchase_total_Age = purchase_data.groupby(["Age Ranges"]).sum()["Price"]
avg_purchase_total_Age

#Average Purchase Total per Person
age_purchase_per_person= avg_purchase_total_Age / Age_Summary ["Total Number of Players"]
age_purchase_per_person

# # #Display the Purchase Data by Age
# age_purchase_summary = pd.DataFrame({"Purchase Count" : purchase_count_Age,
#                                      "Average Purchase Price ($)" : avg_purchase_price_Age,
#                                      "Total Purchase Value ($)" : avg_purchase_total_Age,
#                                      "Average Purchase Total per Person ($)" : age_purchase_per_person})
# age_purchase_summary

KeyError: 'Total Number of Players'

## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, average item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame

