### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [2]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchaseData = pd.read_csv(file_to_load)

## Player Count

In [3]:
purchaseData.columns

Index(['Purchase ID', 'SN', 'Age', 'Gender', 'Item ID', 'Item Name', 'Price'], dtype='object')

* Display the total number of players


In [4]:
uniquePlayerCount = purchaseData["SN"].nunique()

print( f"Unique total player count is {uniquePlayerCount}")

Unique total player count is 576


## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [5]:
uniqueItemCount = purchaseData["Item ID"].nunique()
averageItemPrice = round(purchaseData["Price"].mean(),2)
totalItemPurchased = len(purchaseData)
totalItemRevenue = sum(purchaseData["Price"])


analysisPurchasing_df = pd.DataFrame({
    "Unique Item Count" : [uniqueItemCount],
    "Average Item Price" : [averageItemPrice],
    "Total Item Pruchased" : [totalItemPurchased],
    "Total Item Revenue" : [totalItemRevenue]
})

analysisPurchasing_df



Unnamed: 0,Unique Item Count,Average Item Price,Total Item Pruchased,Total Item Revenue
0,179,3.05,780,2379.77


## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [6]:
genderCount = purchaseData["Gender"].value_counts()
genderPercentage = purchaseData["Gender"].value_counts(normalize = True)

maleCount = genderCount["Male"]
femaleCount = genderCount["Female"]
otherCount = genderCount["Other / Non-Disclosed"]
malePercentage =round(genderPercentage["Male"],2)
femalePercentage = round(genderPercentage["Female"],2)
otherPercentage = round(genderPercentage["Other / Non-Disclosed"],2)

analysisGender_df = pd.DataFrame({
    "Gender" : ["Male", "Female", "Other / Non-Disclosed"],
    "Count" : [maleCount, femaleCount, otherCount],
    "Percentage" : [malePercentage, femalePercentage, otherPercentage]
    
})

analysisGender_df


Unnamed: 0,Gender,Count,Percentage
0,Male,652,0.84
1,Female,113,0.14
2,Other / Non-Disclosed,15,0.02



## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [7]:
malePurchaseData = purchaseData.loc[purchaseData["Gender"] == "Male",:]
femalePurchaseData = purchaseData.loc[purchaseData["Gender"] == "Female",:]
otherPurchaseData = purchaseData.loc[purchaseData["Gender"] == "Other / Non-Disclosed",:]


malePurchaseCount = malePurchaseData["Purchase ID"].count()
femalePurchaseCount = femalePurchaseData["Purchase ID"].count()
otherPurchaseCount = otherPurchaseData["Purchase ID"].count()

malePurchaseTotal = malePurchaseData["Price"].sum()
femalePurchaseTotal = femalePurchaseData["Price"].sum()
otherPurchaseTotal = otherPurchaseData["Price"].sum()

malePurchaseAverage = round(malePurchaseTotal / malePurchaseCount , 2)
femalePurchaseAverage = round(femalePurchaseTotal / femalePurchaseCount, 2)
otherPurchaseAverage = round(otherPurchaseTotal / otherPurchaseCount, 2)


maleGroupedData = malePurchaseData.groupby(["SN"]).sum()
femaleGroupedData = femalePurchaseData.groupby(["SN"]).sum()
otherGroupedData = otherPurchaseData.groupby(["SN"]).sum()

maleGroupedDataAverage = round(maleGroupedData["Price"].mean(), 2)
femaleGroupedDataAverage = round(femaleGroupedData["Price"].mean(), 2)
otherGroupedDataAverage = round(otherGroupedData["Price"].mean(), 2)


metaAnalysisGender_df = pd.DataFrame({
    "Gender" : ["Male", "Female", "Other / Non-Disclosed"],
    "Pruchase Count" : [malePurchaseCount, femalePurchaseCount, otherPurchaseCount],
    "Avg Purchase Price" : [malePurchaseAverage, femalePurchaseAverage, otherPurchaseAverage],
    "Avg Purchase Price per Person" : [maleGroupedDataAverage, femaleGroupedDataAverage, otherGroupedDataAverage]
})
    
metaAnalysisGender_df  
#by gender:
#purchase count
#avg purchase price 
#avg purchase total per person 



Unnamed: 0,Gender,Pruchase Count,Avg Purchase Price,Avg Purchase Price per Person
0,Male,652,3.02,4.07
1,Female,113,3.2,4.47
2,Other / Non-Disclosed,15,3.35,4.56


## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


In [8]:
agePurchaseData = purchaseData

bins = [0,9,14,19,24,29,100]
groupNames= ["<10","10-14","15-19","20-24","25-29",">30"]

agePurchaseData["Age Demographics"] = pd.cut(purchaseData["Age"], bins, labels =groupNames,include_lowest=True)
ageGroupedData = agePurchaseData.groupby("Age Demographics")

#genderCount = purchaseData["Gender"].value_counts()
#genderPercentage = purchaseData["Gender"].value_counts(normalize = True)

ageCount = ageGroupedData["SN"].nunique()
ageCount = ageCount.rename("Player Count")

ageCountSum = ageCount.sum()
agePercentage = round(ageCount/ageCountSum,2)
agePercentage = agePercentage.rename("Percent of Players")

ageCounts_df = pd.concat([ageCount,agePercentage], axis = 1)

ageCounts_df



Unnamed: 0_level_0,Player Count,Percent of Players
Age Demographics,Unnamed: 1_level_1,Unnamed: 2_level_1
<10,17,0.03
10-14,22,0.04
15-19,107,0.19
20-24,258,0.45
25-29,77,0.13
>30,95,0.16


## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [9]:
#agePurchaseData = purchaseData

#bins = [0,9,14,19,24,29,100]
#groupNames= ["<10","10-14","15-19","20-24","25-29",">30"]

#agePurchaseData["Age Demographics"] = pd.cut(purchaseData["Age"], bins, labels =groupNames,include_lowest=True)
#ageGroupedData = agePurchaseData.groupby("Age Demographics")

agePlayerCount = ageGroupedData["SN"].nunique()
agePlayerCount = ageCount.rename("Player Count")

ageAveragePrice = round(ageGroupedData["Price"].mean(),2)
ageAveragePrice = ageAveragePrice.rename("Average Price")

agePurchaseTotal = round(ageGroupedData["Price"].sum(),2)
agePurchaseTotal = agePurchaseTotal.rename("Total")

agePurchaseAverage = round(ageGroupedData["Price"].sum()/ageCount,2)
agePurchaseAverage = agePurchaseAverage.rename("Avereage Price per Person")

ageAnalysis_df = pd.concat([agePlayerCount, ageAveragePrice, agePurchaseTotal, agePurchaseAverage], axis=1)
ageAnalysis_df

Unnamed: 0_level_0,Player Count,Average Price,Total,Avereage Price per Person
Age Demographics,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
<10,17,3.35,77.13,4.54
10-14,22,2.96,82.78,3.76
15-19,107,3.04,412.89,3.86
20-24,258,3.05,1114.06,4.32
25-29,77,2.9,293.0,3.81
>30,95,3.15,399.91,4.21


## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [11]:
namePurchaseData = purchaseData

groupedNames = namePurchaseData.groupby(['SN'])



nameTotalCount = groupedNames["Purchase ID"].count()
nameTotalCount = nameTotalCount.rename("Purchase Count")

nameTotalAverage = round(groupedNames["Price"].mean(),2)
nameTotalAverage =nameTotalAverage.rename("Average Price")

nameTotalValue = groupedNames["Price"].sum()
nameTotalValue = nameTotalValue.rename("Total Purchase Value")

nameAnalysis_df = pd.concat([nameTotalCount, nameTotalAverage, nameTotalValue,], axis=1)
nameAnalysis_df = nameAnalysis_df.sort_values("Total Purchase Value", ascending=False)
nameAnalysis_df.head(5)



Unnamed: 0_level_0,Purchase Count,Average Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,3.79,18.96
Idastidru52,4,3.86,15.45
Chamjask73,3,4.61,13.83
Iral74,4,3.4,13.62
Iskadarya95,3,4.37,13.1


## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, average item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [31]:
itemPurchaseData = purchaseData

groupedItems = itemPurchaseData.groupby(["Item ID","Item Name"])


groupedItemsCount = groupedItems["Purchase ID"].count()
groupedItemsCount = groupedItemsCount.rename("Amount Purchased")

groupedItemsAverage = round(groupedItems["Price"].mean(),2)
groupedItemsAverage = groupedItemsAverage.rename("Average Price")

groupedItemsTotal = groupedItems["Price"].sum()
groupedItemsTotal = groupedItemsTotal.rename("Total Price")

ItemAnalysis_df = pd.concat([groupedItemsCount, groupedItemsAverage, groupedItemsTotal],axis = 1)
ItemAnalysisPopular_df = ItemAnalysis_df.sort_values("Amount Purchased", ascending=False)


ItemAnalysis_df.head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Amount Purchased,Average Price,Total Price
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,4.61,59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,4.23,50.76
145,Fiery Glass Crusader,9,4.58,41.22
132,Persuasion,9,3.22,28.99
108,"Extraction, Quickblade Of Trembling Hands",9,3.53,31.77


## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame



In [10]:
itemAnalysisTotal_df = ItemAnalysis_df.sort_values("Total Purchase Value", ascending = False)
itemAnalysis_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,$4.61,$59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
82,Nirvana,9,$4.90,$44.10
145,Fiery Glass Crusader,9,$4.58,$41.22
103,Singed Scalpel,8,$4.35,$34.80
