### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import pandas as pd
import numpy as np

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)
purchase_data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


## Player Count

In [2]:
Players=purchase_data["SN"].nunique()
print (f" Total number of players is {Players}.")


 Total number of players is 576.


* Display the total number of players


## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [3]:
#Calculations
AveragePrice=purchase_data["Price"].mean()
MaxPrice=purchase_data["Price"].max()
MinPrice=purchase_data["Price"].min()
Total_Items_sold=purchase_data["Item ID"].count()
Revenues=Total_Items_sold*AveragePrice

#report
PurchasingAnalysis=pd.DataFrame(
{"Average Price":[AveragePrice],"Most expensive item":[MaxPrice],"Cheapest item":[MinPrice], "Number of Items in Store": [Total_Items_sold], "Total Revenues":[Revenues]})

## Formating

PurchasingAnalysis["Average Price"]=PurchasingAnalysis["Average Price"].map("${:.2f}".format)
PurchasingAnalysis["Most expensive item"]=PurchasingAnalysis["Most expensive item"].map("${:.2f}".format)
PurchasingAnalysis["Cheapest item"]=PurchasingAnalysis["Cheapest item"].map("${:.2f}".format)
PurchasingAnalysis["Total Revenues"]=PurchasingAnalysis["Total Revenues"].map("${:.2f}".format)

PurchasingAnalysis.transpose()




Unnamed: 0,0
Average Price,$3.05
Most expensive item,$4.99
Cheapest item,$1.00
Number of Items in Store,780
Total Revenues,$2379.77


## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [43]:
#Percentage and Count of Male Players

maleData=purchase_data.loc[(purchase_data["Gender"] == "Male")]
mPlayers=maleData["SN"].nunique()
mPercentage=round(((mPlayers/Players)*100),2)

#Percentage and Count of Female Players

femaleData=purchase_data.loc[(purchase_data["Gender"] == "Female")]
fPlayers=femaleData["SN"].nunique()
fPercentage=round(((fPlayers/Players)*100),2)

#Percentage and Count of Other / Non-Disclosed
ndPlayers=Players-(mPlayers+fPlayers)
ndPercentage=round(((ndPlayers/Players)*100),2)

fPercentage

14.06

In [69]:
GPercentage=purchase_data["Gender"].value_counts(normalize=True)*100
GCount=purchase_data["Gender"].value_counts()
GCount

genderCount= pd.DataFrame(GCount)
genderPercentage=pd.DataFrame(GPercentage)
genderPercentage

Unnamed: 0,Gender
Male,83.589744
Female,14.487179
Other / Non-Disclosed,1.923077


In [48]:
# Gender Demographics Report

genderDemoReport=pd.DataFrame({
    "Count of Female Players":[fPlayers],
    "Count of Male Players":[mPlayers],
     "Count of Non-Disclosed":[ndPlayers],
     "Percentage of Female Players":[fPercentage],
     "Percentage of Male Players":[mPercentage],
     "Percentage of Non-Disclosed Players":[ndPercentage]
})

#Formatting
genderDemoReport["Percentage of Female Players"]=genderDemoReport["Percentage of Female Players"].map("{:.2f}%".format)
genderDemoReport["Percentage of Male Players"]=genderDemoReport["Percentage of Male Players"].map("{:.2f}%".format)
genderDemoReport["Percentage of Non-Disclosed Players"]=genderDemoReport["Percentage of Non-Disclosed Players"].map("{:.2f}%".format)


genderDemoReport.T


Unnamed: 0,0
Count of Female Players,81
Count of Male Players,484
Count of Non-Disclosed,11
Percentage of Female Players,14.06%
Percentage of Male Players,84.03%
Percentage of Non-Disclosed Players,1.91%



## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [6]:
##Male data
maleData=purchase_data.loc[(purchase_data["Gender"] == "Male")]
malesales=maleData["Price"].sum()
malesalesaverage=maleData["Price"].mean()
malesalesunit=maleData["Price"].count()

##Female data
femaleData=purchase_data.loc[(purchase_data["Gender"] == "Female")]
femalesales=femaleData["Price"].sum()
femalesalesaverage=femaleData["Price"].mean()
femalesalesunit=femaleData["Price"].count()

In [7]:
##Report

genderSalesReport=pd.DataFrame({
    "Female Sales":[femalesales],
    "Male Sales":[malesales],
    "Female Average Prices":[femalesalesaverage],
    "Male Average Price":[malesalesaverage],
    "Number of sales by female":[femalesalesunit],
    "Number of sales by male":[malesalesunit],
    })

genderSalesReport["Female Sales"]=genderSalesReport["Female Sales"].map("${:.2f}".format)
genderSalesReport["Female Average Prices"]=genderSalesReport["Female Average Prices"].map("${:.2f}".format)
genderSalesReport["Male Sales"]=genderSalesReport["Male Sales"].map("${:.2f}".format)
genderSalesReport["Male Average Price"]=genderSalesReport["Male Average Price"].map("${:.2f}".format)

genderSalesReport.transpose()

Unnamed: 0,0
Female Sales,$361.94
Male Sales,$1967.64
Female Average Prices,$3.20
Male Average Price,$3.02
Number of sales by female,113
Number of sales by male,652


## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


In [17]:
#Bins by Age data
medianAge=purchase_data["Age"].median()
maxAge=purchase_data["Age"].max()
minAge=purchase_data["Age"].min()
firstStep=(medianAge+minAge)/2
secondStep=(medianAge+maxAge)/2


##
bins = [minAge, firstStep, medianAge, secondStep,maxAge]
group_names = ["Youngest Age Group", "Second Age Group", "Third Age Group", "Oldest Age Group"]
purchase_data["Age Group"] = pd.cut(purchase_data["Age"], bins, labels=group_names, right=False)

purchase_data.head()


Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price,Age Group
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53,Second Age Group
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56,Oldest Age Group
2,2,Ithergue48,24,Male,92,Final Critic,4.88,Third Age Group
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27,Third Age Group
4,4,Iskosia90,23,Male,131,Fury,1.44,Third Age Group


In [9]:
AgeTotal=purchase_data["Age Group"].value_counts()
AgeTotal=AgeTotal.reset_index()
AgeTotal

Unnamed: 0,index,Age Group
0,Third Age Group,369
1,Second Age Group,297
2,Oldest Age Group,62
3,Youngest Age Group,51


In [10]:
AgeSummary=(purchase_data["Age Group"].value_counts(normalize=True)*100)
AgeSummary=AgeSummary.reset_index()

#Report
AgeDemographicsTable= pd.merge(AgeTotal, AgeSummary, on="index")
AgeDemographicsTable
AgeDemographicsTable = AgeDemographicsTable.rename(columns={"index":"Age Group", "Age Group_x":"Count", "Age Group_y":"Percentage"})
AgeDemographicsTable["Percentage"]=AgeDemographicsTable["Percentage"].map("{:.2f}%".format)
AgeDemographicsTable

Unnamed: 0,Age Group,Count,Percentage
0,Third Age Group,369,47.37%
1,Second Age Group,297,38.13%
2,Oldest Age Group,62,7.96%
3,Youngest Age Group,51,6.55%


## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [11]:
Age_groups= purchase_data.groupby(['Age Group'])
#Getting the data
AgePlayers=Age_groups["SN"].nunique()

AgeGroupsSale=Age_groups.count()
AgeGroupsSale=AgeGroupsSale["Purchase ID"]
AgePlayers=Age_groups["SN"].nunique()

#Getting the data
AgeGroupsPrice=Age_groups.mean()
AgeGroupsPrice=AgeGroupsPrice["Price"]


In [12]:
#Summary Report
Agesummarytable = pd.DataFrame({"Number of Players": AgePlayers,
                                "Units Sold":AgeGroupsSale,
                                    "Average Purchase Price": AgeGroupsPrice})

Agesummarytable["Total Purchase Value"]=Agesummarytable["Units Sold"]*Agesummarytable["Average Purchase Price"]
Agesummarytable["Avg Total Purchase per Person"]=Agesummarytable["Total Purchase Value"]/Agesummarytable["Number of Players"]

#Formating
Agesummarytable["Average Purchase Price"]=Agesummarytable["Average Purchase Price"].map("${:.2f}".format)
Agesummarytable["Total Purchase Value"]=Agesummarytable["Total Purchase Value"].map("${:.2f}".format)
Agesummarytable["Avg Total Purchase per Person"]=Agesummarytable["Avg Total Purchase per Person"].map("${:.2f}".format)

Agesummarytable

Unnamed: 0_level_0,Number of Players,Units Sold,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Youngest Age Group,39,51,$3.14,$159.91,$4.10
Second Age Group,219,297,$3.06,$907.95,$4.15
Third Age Group,268,369,$2.99,$1103.87,$4.12
Oldest Age Group,49,62,$3.33,$206.34,$4.21


## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [13]:
#Grouping by Players and droping columns
buyers= purchase_data.groupby(['SN'])
spenders=buyers.count()
spenders=spenders["Purchase ID"]

#Calculations
buyersAverage=buyers.mean()
buyersAverage=buyersAverage["Price"]
buyersAverage

#Summary Report
buyersReport=pd.DataFrame({"Purchase Count": spenders,
                                    "Average Purchase Price": buyersAverage})
# Adding the Total Purchase Value
buyersReport["Total Purchase Value"]=buyersReport["Purchase Count"]*buyersReport["Average Purchase Price"]

#Sorting and Formatting
buyersReport=buyersReport.sort_values("Total Purchase Value",ascending=False)
buyersReport["Average Purchase Price"]=buyersReport["Average Purchase Price"].map("${:.2f}".format)
buyersReport["Total Purchase Value"]=buyersReport["Total Purchase Value"].map("${:.2f}".format)
buyersReport.head(10)

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,$3.79,$18.96
Idastidru52,4,$3.86,$15.45
Chamjask73,3,$4.61,$13.83
Iral74,4,$3.40,$13.62
Iskadarya95,3,$4.37,$13.10
Ilarin91,3,$4.23,$12.70
Ialallo29,3,$3.95,$11.84
Tyidaim51,3,$3.94,$11.83
Lassilsala30,3,$3.84,$11.51
Chadolyla44,3,$3.82,$11.46


## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [14]:
#Grouping by Item ID
products= purchase_data.groupby(['Item ID','Item Name'])
items=products.count()

#Calculations
items=items["Purchase ID"]
itemPrices=products.mean()
itemPrices=itemPrices["Price"]

#Summary Report
itemsReport=pd.DataFrame({"Purchase Count": items,"Item Price": itemPrices})
itemsReport["Total Purchase Value"]=itemsReport["Purchase Count"]*itemsReport["Item Price"]
PitemsReport=itemsReport.copy()


In [15]:
#Sorting and Formatting
itemsReport=itemsReport.sort_values("Purchase Count",ascending=False)
itemsReport["Total Purchase Value"]=itemsReport["Total Purchase Value"].map("${:.2f}".format)
itemsReport["Item Price"]=itemsReport["Item Price"].map("${:.2f}".format)

itemsReport.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
145,Fiery Glass Crusader,9,$4.58,$41.22
108,"Extraction, Quickblade Of Trembling Hands",9,$3.53,$31.77
82,Nirvana,9,$4.90,$44.10
19,"Pursuit, Cudgel of Necromancy",8,$1.02,$8.16


## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame



In [16]:
#Most Profitable Items Report
PitemsReport=PitemsReport.sort_values("Total Purchase Value",ascending=False)
PitemsReport["Total Purchase Value"]=PitemsReport["Total Purchase Value"].map("${:.2f}".format)
PitemsReport["Item Price"]=PitemsReport["Item Price"].map("${:.2f}".format)

PitemsReport.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
82,Nirvana,9,$4.90,$44.10
145,Fiery Glass Crusader,9,$4.58,$41.22
92,Final Critic,8,$4.88,$39.04
103,Singed Scalpel,8,$4.35,$34.80
