### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [67]:
# Dependencies and Setup
import pandas as pd
import random

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)
purchase_data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


## Player Count

* Display the total number of players


In [68]:
num_of_players = purchase_data["Purchase ID"].count()
num_of_players

780

## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [69]:

uni_items = len(purchase_data["Item Name"].unique())

avg_price = round(purchase_data["Price"].mean())

num_items = purchase_data["Item ID"].count()

sum_items = purchase_data["Price"].sum()

purchase_summary_table = pd.DataFrame({"Total Unique Items": [uni_items],
                              "Average Price": avg_price,
                              "Number of Items": num_items,
                              "Revenue": sum_items})
purchase_summary_table

Unnamed: 0,Total Unique Items,Average Price,Number of Items,Revenue
0,179,3,780,2379.77


## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [70]:
grouped_gender = purchase_data.groupby(['Gender'])

gender_count = purchase_data["Gender"].value_counts()

percent_gender = round(purchase_data['Gender'].value_counts(normalize=True) * 100)

gender_summary_table = pd.DataFrame({"Count": gender_count,
                                    "Percent": percent_gender})
gender_summary_table["Percent"]= gender_summary_table["Percent"].map("{0:,.2f}%".format)
gender_summary_table

Unnamed: 0,Count,Percent
Male,652,84.00%
Female,113,14.00%
Other / Non-Disclosed,15,2.00%



## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [71]:
#Purchasing Analysis(Gender)
avg_price_f = '${:,.2f}'.format((purchase_data[purchase_data["Gender"] == "Female"]['Price'].mean()))
avg_price_m = '${:,.2f}'.format((purchase_data[purchase_data["Gender"] == "Male"]['Price'].mean()))
avg_price_other = '${:,.2f}'.format((purchase_data[purchase_data["Gender"] == "Other / Non-Disclosed"]['Price'].mean()))
total_purchase_f = '${:,.2f}'.format(purchase_data[purchase_data["Gender"] == "Female"]['Price'].sum())
total_purchase_m = '${:,.2f}'.format(purchase_data[purchase_data["Gender"] == "Male"]['Price'].sum())
total_purchase_other = '${:,.2f}'.format(purchase_data[purchase_data["Gender"] == "Other / Non-Disclosed"]['Price'].sum())
count_purchase_f = purchase_data[purchase_data["Gender"] == "Female"]['Item ID'].count()
count_purchase_m = purchase_data[purchase_data["Gender"] == "Male"]['Item ID'].count()
count_purchase_other = purchase_data[purchase_data["Gender"] == "Other / Non-Disclosed"]['Item ID'].count()

avg_gender_summary_table = pd.DataFrame({"Gender": ["Male", "Female", "Other / Non-Disclosed"],
                                         "Average Price": [avg_price_m, avg_price_f, avg_price_other],
                                        "Total Purchases Sum": [total_purchase_m, total_purchase_f, total_purchase_other],
                                        "Total Purchase Count": [count_purchase_m, count_purchase_f, count_purchase_other]})
avg_gender_summary_table

Unnamed: 0,Gender,Average Price,Total Purchases Sum,Total Purchase Count
0,Male,$3.02,"$1,967.64",652
1,Female,$3.20,$361.94,113
2,Other / Non-Disclosed,$3.35,$50.19,15


## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


In [72]:
#Age Demographics
bins = [0, 10, 14, 18, 22, 26, 30, 34, 38, 45]

group_names = ["under_ten", "preteens", "teens", "young_adults", "millennials", "early_thirty", "established", "gen_x", "old_folks"]

purchase_data["Category"] = pd.cut(purchase_data["Age"], bins, labels=group_names, right=False)



category_total = purchase_data["Category"].value_counts()

#category_count = purchase_data["Category"].sum()
category_percents = ((category_total/num_of_players)*100).map("{0:.2f}%".format)

age_summary_table = pd.DataFrame({"Count": category_total,
                                  "Percent": category_percents})
age_summary_table

Unnamed: 0,Count,Percent
millennials,263,33.72%
young_adults,210,26.92%
teens,89,11.41%
established,64,8.21%
early_thirty,42,5.38%
gen_x,35,4.49%
old_folks,27,3.46%
preteens,26,3.33%
under_ten,23,2.95%


## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [73]:
#Purchasing Analysis (Age)
purchase_data["Age"] = pd.cut(purchase_data["Age"], bins, labels=group_names, right=False)
age_purchase_total = purchase_data.groupby(["Age"]).sum()["Price"].rename("Total Purchase Value")
age_avg = purchase_data.groupby(["Age"]).mean()["Price"].rename("Average Purchase Price").map("${0:,.2f}".format)
age_count = purchase_data.groupby(["Age"]).count()["Price"].rename("Purchase Count")
normalized_age = age_purchase_total / category_total

age_data = pd.DataFrame({"Purchase Count": age_count,
                        "Average Purchase Price": age_avg,
                        "Total Purchase Value": age_purchase_total,
                        "Normalized Total": normalized_age
                        })

age_data["Normalized Total"] = age_data["Normalized Total"].map("${0:,.2f}".format)

age_data

Unnamed: 0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Total
early_thirty,42,$2.65,111.1,$2.65
established,64,$3.00,191.87,$3.00
gen_x,35,$3.21,112.33,$3.21
millennials,263,$3.05,800.9,$3.05
old_folks,27,$3.48,94.01,$3.48
preteens,26,$2.92,75.87,$2.92
teens,89,$3.01,267.6,$3.01
under_ten,23,$3.35,77.13,$3.35
young_adults,210,$3.08,647.26,$3.08


## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [74]:
grouped_sn = purchase_data.groupby(["SN"])
total_grouped_sn = grouped_sn.sum()["Price"]
count_grouped_sn = grouped_sn.count()["Price"]
avg_grouped_sn = grouped_sn.mean()["Price"]

top_spenders_df = pd.DataFrame({"Top Total Purchases": total_grouped_sn,
                   "Counted Purchases": count_grouped_sn,
                   "Top Average Price": avg_grouped_sn})

sorted_spenders_df = top_spenders_df.sort_values("Top Total Purchases", ascending=False)
sorted_spenders_df["Top Total Purchases"] = sorted_spenders_df["Top Total Purchases"].map("${0:,.2f}".format)
sorted_spenders_df["Top Average Price"] = sorted_spenders_df["Top Average Price"].map("${0:,.2f}".format)

sorted_spenders_df.head()

Unnamed: 0_level_0,Top Total Purchases,Counted Purchases,Top Average Price
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,$18.96,5,$3.79
Idastidru52,$15.45,4,$3.86
Chamjask73,$13.83,3,$4.61
Iral74,$13.62,4,$3.40
Iskadarya95,$13.10,3,$4.37


## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [75]:
grouped_item = setting_id.groupby(["Item ID", "Item Name"])
total_item = grouped_item.sum()["Price"]
counted_item = grouped_item.count()["Price"]
avg_item_price = grouped_item.mean()["Price"]

top_items_df = pd.DataFrame({"Top Total Item Purchase": total_item,
                   "Counted Item Purchases": counted_item,
                   "Top Average Item Price": avg_item_price})

sorted_items_df = top_items_df.sort_values("Top Total Item Purchase", ascending=False)
sorted_items_df["Top Total Item Purchase"] = sorted_items_df["Top Total Item Purchase"].map("${0:,.2f}".format)
sorted_items_df["Top Average Item Price"] = sorted_items_df["Top Average Item Price"].map("${0:,.2f}".format)

sorted_items_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Top Total Item Purchase,Counted Item Purchases,Top Average Item Price
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
178,"Oathbreaker, Last Hope of the Breaking Storm",$50.76,12,$4.23
82,Nirvana,$44.10,9,$4.90
145,Fiery Glass Crusader,$41.22,9,$4.58
92,Final Critic,$39.04,8,$4.88
103,Singed Scalpel,$34.80,8,$4.35


## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame



In [76]:
sorted_items_df = top_items_df.sort_values("Top Average Item Price", ascending=False)
sorted_items_df["Top Total Item Purchase"] = sorted_items_df["Top Total Item Purchase"].map("${0:,.2f}".format)
sorted_items_df["Top Average Item Price"] = sorted_items_df["Top Average Item Price"].map("${0:,.2f}".format)

sorted_items_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Top Total Item Purchase,Counted Item Purchases,Top Average Item Price
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
63,Stormfury Mace,$9.98,2,$4.99
139,"Mercy, Katana of Dismay",$24.70,5,$4.94
173,Stormfury Longsword,$9.86,2,$4.93
147,"Hellreaver, Heirloom of Inception",$14.79,3,$4.93
128,"Blazeguard, Reach of Eternity",$24.55,5,$4.91
