### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [53]:
# Dependencies and Setup
import pandas as pd
import random

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)
purchase_data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


## Player Count

* Display the total number of players


In [54]:
num_of_players = purchase_data["Purchase ID"].count()
num_of_players

780

## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [55]:

uni_items = len(purchase_data["Item Name"].unique())

avg_price = round(purchase_data["Price"].mean())

num_items = purchase_data["Item ID"].count()

sum_items = purchase_data["Price"].sum()

purchase_summary_table = pd.DataFrame({"Total Unique Items": [uni_items],
                              "Average Price": avg_price,
                              "Number of Items": num_items,
                              "Revenue": sum_items})
purchase_summary_table

Unnamed: 0,Total Unique Items,Average Price,Number of Items,Revenue
0,179,3,780,2379.77


## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [56]:
grouped_gender = purchase_data.groupby(['Gender'])

gender_count = purchase_data["Gender"].value_counts()

percent_gender = round(purchase_data['Gender'].value_counts(normalize=True) * 100)

gender_summary_table = pd.DataFrame({"Count": gender_count,
                                    "Percent": percent_gender})
gender_summary_table["Percent"]= gender_summary_table["Percent"].map("{0:,.2f}%".format)
gender_summary_table

Unnamed: 0,Count,Percent
Male,652,84.00%
Female,113,14.00%
Other / Non-Disclosed,15,2.00%



## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [57]:
avg_price_f = (purchase_data[purchase_data["Gender"] == "Female"]['Price'].mean())
avg_price_m = (purchase_data[purchase_data["Gender"] == "Male"]['Price'].mean())
avg_price_other = (purchase_data[purchase_data["Gender"] == "Other / Non-Disclosed"]['Price'].mean())
total_purchase_f = purchase_data[purchase_data["Gender"] == "Female"]['Price'].sum()
total_purchase_m = purchase_data[purchase_data["Gender"] == "Male"]['Price'].sum()
total_purchase_other = purchase_data[purchase_data["Gender"] == "Other / Non-Disclosed"]['Price'].sum()
count_purchase_f = purchase_data[purchase_data["Gender"] == "Female"]['Item ID'].count()
count_purchase_m = purchase_data[purchase_data["Gender"] == "Male"]['Item ID'].count()
count_purchase_other = purchase_data[purchase_data["Gender"] == "Other / Non-Disclosed"]['Item ID'].count()

avg_gender_summary_table = pd.DataFrame({"Gender": ["Male", "Female", "Other / Non-Disclosed"],
                                         "Average Price": [avg_price_m, avg_price_f, avg_price_other],
                                        "Total Purchases Sum": [total_purchase_m, total_purchase_f, total_purchase_other],
                                        "Total Purchase Count": [count_purchase_m, count_purchase_f, count_purchase_other]})
avg_gender_summary_table

Unnamed: 0,Gender,Average Price,Total Purchases Sum,Total Purchase Count
0,Male,3.017853,1967.64,652
1,Female,3.203009,361.94,113
2,Other / Non-Disclosed,3.346,50.19,15


## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


In [58]:
bins = [0, 10, 14, 18, 22, 26, 30, 34, 38, 45]

group_names = ["under_ten", "preteens", "teens", "young_adults", "millennials", "early_thirty", "established", "gen_x", "old_folks"]

purchase_data["Category"] = pd.cut(purchase_data["Age"], bins, labels=group_names, right=False)



category_total = purchase_data["Category"].value_counts()

#category_count = purchase_data["Category"].sum()
category_percents = (category_total/num_of_players).map("{0:,.2f}%".format)

age_summary_table = pd.DataFrame({"Count": category_total,
                                  "Percent": category_percents})
age_summary_table

Unnamed: 0,Count,Percent
millennials,263,0.34%
young_adults,210,0.27%
teens,89,0.11%
established,64,0.08%
early_thirty,42,0.05%
gen_x,35,0.04%
old_folks,27,0.03%
preteens,26,0.03%
under_ten,23,0.03%


## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [59]:
purchase_data["Age"] = pd.cut(purchase_data["Age"], bins, labels=group_names, right=False)
age_purchase_total = purchase_data.groupby(["Age"]).sum()["Price"].rename("Total Purchase Value")
age_avg = purchase_data.groupby(["Age"]).mean()["Price"].rename("Average Purchase Price").map("${0:,.2f}".format)
age_count = purchase_data.groupby(["Age"]).count()["Price"].rename("Purchase Count")

#normalize_total = age_purchase_total / age_summary_table
age_data = pd.DataFrame({"Purchase Count": age_count,
                        "Average Purchase Price": age_avg,
                        "Total Purchase Value": age_purchase_total
                        #"Normalized Total": normalize_total
                        })
age_data

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
Age,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
under_ten,23,$3.35,77.13
preteens,26,$2.92,75.87
teens,89,$3.01,267.6
young_adults,210,$3.08,647.26
millennials,263,$3.05,800.9
early_thirty,42,$2.65,111.1
established,64,$3.00,191.87
gen_x,35,$3.21,112.33
old_folks,27,$3.48,94.01


## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame

