### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import pandas as pd
import numpy as np

In [2]:
# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

In [3]:
# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)
purchase_data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


## Player Count

* Display the total number of players


In [4]:
players = purchase_data.loc[:, ["SN", "Gender", "Age"]]
players = players.drop_duplicates()
number = players.count()[0]
pd.DataFrame({"player count":[number]})

Unnamed: 0,player count
0,576


## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [5]:
items = len(purchase_data["Item ID"].unique())
items

183

In [6]:
avg_price = purchase_data["Price"].mean()
avg_price

3.050987179487176

In [7]:
sum_data = pd.DataFrame({"Unique Items": items, "Average Price": [avg_price]})
sum_data


Unnamed: 0,Unique Items,Average Price
0,183,3.050987


## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [8]:
count_players = players["Gender"].value_counts()
count_players
per_players = (count_players / number) * 100
per_players
df = pd.DataFrame({"Player Total":count_players, "Percentage Players":per_players})
df

Unnamed: 0,Player Total,Percentage Players
Male,484,84.027778
Female,81,14.0625
Other / Non-Disclosed,11,1.909722



## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [9]:
##Groupby is used so that 
pur_count = purchase_data.groupby(["Gender"]).count()["Price"]
pur_count

pur_price_avg = purchase_data.groupby(["Gender"]).mean()["Price"].round(2)
pur_price_avg

tot_val = purchase_data.groupby(["Gender"]).sum()["Price"]
tot_val

#pur_person = purchase_data.groupby(["Gender"]).mean()["Price"].round(2)

pur_person = (tot_val / pur_count).round(2)
pur_person

Gender
Female                   3.20
Male                     3.02
Other / Non-Disclosed    3.35
Name: Price, dtype: float64

In [10]:
df_sum = pd.DataFrame({"Purchase Count":pur_count, "Average Purchase Price":pur_price_avg, "Total Purchase Value":tot_val, "Avg Total Purchase per Person":pur_person})
df_sum

#boom = len(["Avg Total Purchase per Person"]).value_counts()["Avg Total Purchase per Person"].round(2)
#boom


Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,3.2,361.94,3.2
Male,652,3.02,1967.64,3.02
Other / Non-Disclosed,15,3.35,50.19,3.35


## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


In [11]:
max_age = purchase_data["Age"].max()
max_age

45

In [12]:
min_age = purchase_data["Age"].min()
min_age

7

In [13]:
##I am creating the name of a group in order to align each age range to a letter category.
    
group_name = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]
bin_age = [0, 10, 14, 19, 24, 29, 34, 39, 45.1]

df_sum["Age Group", ] = pd.cut(purchase_data["Age"], bin_age, labels=group_name, right=False)
df_sum

df_sum.dtypes

Purchase Count                      int64
Average Purchase Price            float64
Total Purchase Value              float64
Avg Total Purchase per Person     float64
(Age Group,)                     category
dtype: object

## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [14]:
#purchase_data["Age Group"] = pd.cut(purchase_data["Age"], bin_age, labels=group_name, right=False)
#purchase_data

summary = purchase_data.describe().round(2)
summary

Unnamed: 0,Purchase ID,Age,Item ID,Price
count,780.0,780.0,780.0,780.0
mean,389.5,22.71,92.11,3.05
std,225.31,6.66,52.78,1.17
min,0.0,7.0,0.0,1.0
25%,194.75,20.0,48.0,1.98
50%,389.5,22.0,93.0,3.15
75%,584.25,25.0,139.0,4.08
max,779.0,45.0,183.0,4.99


In [15]:
count = purchase_data.pd("Age Group").count()
count

AttributeError: 'DataFrame' object has no attribute 'pd'

## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [None]:
spenders_df = purchase_data.sort_values("Price", ascending=False)
spenders_df.head()

## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [None]:
popular_items = purchase_data.loc[:, ["Item ID", "Item Name", "Price"]]

In [None]:
pop_count = popular_items["Item ID", "Item Name"].value_counts()
pop_count

#pur_count = purchase_data.groupby(["Gender"]).count()["Price"]
#pur_count

#pur_price_avg = purchase_data.groupby(["Gender"]).mean()["Price"].round(2)
#pur_price_avg

#tot_val = purchase_data.groupby(["Gender"]).sum()["Price"]
#tot_val

#df_sum = pd.DataFrame({"Item ID":pop_count, "Average Purchase Price":pur_price_avg, "Total Purchase Value":tot_val, "Avg Total Purchase per Person":pur_person})
#df_sum

In [None]:
pop_count = popular_items["Item ID"].count()
pop_count

In [None]:
pop_price = popular_items.groupby(["Item ID", "Item Name"]).sum()

In [None]:
df_pop = pd.DataFrame(popular_items)
df_pop

In [None]:
popular_df = pd.DataFrame({"Purchase Count":, 
                          "Item Price":,
                           "Total Purchase Value":,})
popular_df = popular_df.sort_values(["Purchase Count"], ascending=False)
popular_items.head()

## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame

