### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [232]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_df = pd.read_csv(file_to_load)
purchase_df


Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44
...,...,...,...,...,...,...,...
775,775,Aethedru70,21,Female,60,Wolf,3.54
776,776,Iral74,21,Male,164,Exiled Doomblade,1.63
777,777,Yathecal72,20,Male,67,"Celeste, Incarnation of the Corrupted",3.46
778,778,Sisur91,7,Male,92,Final Critic,4.19


## Player Count

In [233]:
unique = purchase_df["SN"].nunique()
unique_list = [{"Total Number of Players": unique}]

total_df = pd.DataFrame(unique_list)
total_df

Unnamed: 0,Total Number of Players
0,576


* Display the total number of players


## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [234]:
#find unique item numbers

unique_item = purchase_df["Item ID"].nunique()

#find item totals
item_total = purchase_df["Item ID"].count()

#find total revenue by adding the price column
total_revenue = purchase_df["Price"].sum()
total_revenue

#fine average price by first adding all the prices and dividing by the item totals

average_price = (purchase_df["Price"].sum())/ item_total
average_price

information_list= [{"Number of Unique Items": unique_item, "Average price": average_price, \
                    "Number of Purchases": item_total, "Total Revenue": total_revenue}]

#create the dataframe to display all the information
purchase_analysis_df = pd.DataFrame(information_list)
purchase_analysis_df

Unnamed: 0,Number of Unique Items,Average price,Number of Purchases,Total Revenue
0,179,3.050987,780,2379.77


In [235]:
purchase_analysis_df.info

<bound method DataFrame.info of    Number of Unique Items  Average price  Number of Purchases  Total Revenue
0                     179       3.050987                  780        2379.77>

In [236]:
#convert information to float and format to two decimal places, include a dollar sign and use a comma.
purchase_analysis_df["Average price"] = purchase_analysis_df["Average price"].astype(float).map("${:,.2f}".format)
purchase_analysis_df["Total Revenue"] = purchase_analysis_df["Total Revenue"].astype(float).map("${:,.2f}".format)
purchase_analysis_df

Unnamed: 0,Number of Unique Items,Average price,Number of Purchases,Total Revenue
0,179,$3.05,780,"$2,379.77"


## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [237]:
#narrow down the data to only include the Screen name, gender and price.

gender_sn_df = purchase_df.loc[:, ["SN", "Gender", "Price"]]
gender_sn_df

Unnamed: 0,SN,Gender,Price
0,Lisim78,Male,3.53
1,Lisovynya38,Male,1.56
2,Ithergue48,Male,4.88
3,Chamassasya86,Male,3.27
4,Iskosia90,Male,1.44
...,...,...,...
775,Aethedru70,Female,3.54
776,Iral74,Male,1.63
777,Yathecal72,Male,3.46
778,Sisur91,Male,4.19


In [238]:
#narrow down unique users by gender
gender_sn_dedupe_df =  gender_sn_df.drop_duplicates(subset=['SN'])
gender_sn_dedupe_df

Unnamed: 0,SN,Gender,Price
0,Lisim78,Male,3.53
1,Lisovynya38,Male,1.56
2,Ithergue48,Male,4.88
3,Chamassasya86,Male,3.27
4,Iskosia90,Male,1.44
...,...,...,...
773,Hala31,Male,1.02
774,Jiskjask80,Male,4.19
775,Aethedru70,Female,3.54
777,Yathecal72,Male,3.46


In [304]:
#calculate totals of players for each gender

male_total = gender_sn_dedupe_df[gender_sn_dedupe_df["Gender"]== "Male"]
male_total = len(male_total)

female_total = gender_sn_dedupe_df[gender_sn_dedupe_df["Gender"]== "Female"]
female_total = len(female_total)


other_total = gender_sn_dedupe_df[gender_sn_dedupe_df["Gender"]== "Other / Non-Disclosed"]
other_total = len(other_total)

#set the totals

total_genders = len(gender_sn_dedupe_df["Gender"])


#calculate percentages per gender

male_percent = round(((male_total/ total_genders) * 100), 2)


female_percent = round(((female_total/ total_genders) * 100), 2)

other_percent = round(((other_total/ total_genders) * 100), 2)

#create infomration in lists
genders = [[male_total, male_percent], [female_total, female_percent], [other_total, other_percent]]
#put it into dataframe and set the genders as the index
genders_df = pd.DataFrame(genders, index= ['Male', 'Female', 'Other/ Non-Disclosed'], columns = ['Total Count', 'Percentage of Players'])

#add correct formatting for percentages
genders_df["Percentage of Players"] = genders_df["Percentage of Players"].astype(float).map("{:,.2f}%".format)


genders_df



Unnamed: 0,Total Count,Percentage of Players
Male,484,84.03%
Female,81,14.06%
Other/ Non-Disclosed,11,1.91%



## Purchasing Analysis (Gender)

In [306]:
#calculate total purchases
purchase_count = gender_sn_df.groupby("Gender").count()
purchase_count

m_purchases_count = gender_sn_df[gender_sn_df["Gender"]== "Male"]
m_purchases_count = len(m_purchases_count)
m_purchases_count

f_purchases_count = gender_sn_df[gender_sn_df["Gender"]== "Female"]
f_purchases_count = len(f_purchases_count)
f_purchases_count

o_purchases_count = gender_sn_df[gender_sn_df["Gender"]== "Other / Non-Disclosed"]
o_purchases_count = len(o_purchases_count)
o_purchases_count

counts = [m_purchases_count, f_purchases_count, o_purchases_count]
counts

#add up all the male purchases, value and average
columns = ["Gender", "Price"]
men_average_df = gender_sn_df.loc[gender_sn_df['Gender']== "Male", columns]
men_average_df

men_total_value = men_average_df["Price"].sum()
men_total_value

men_average_value = round((men_total_value / m_purchases_count), 2)
men_average_value


#add up all the female purchases, value and average

f_average_df = gender_sn_df.loc[gender_sn_df['Gender']== "Female", columns]

f_total_value = f_average_df["Price"].sum()

f_average_value = round((f_total_value / f_purchases_count), 2)


#add up all the other purchases, value and average
o_average_df = gender_sn_df.loc[gender_sn_df['Gender']== "Other / Non-Disclosed", columns]


o_total_value = o_average_df["Price"].sum()


o_average_value = round((o_total_value / o_purchases_count), 2)

#avg total purchase per person
f_total_avg = round((f_total_value/female_total), 2)

m_total_avg = round((men_total_value/male_total), 2)

o_total_avg = round((o_total_value/other_total), 2)


analysis = [[f_purchases_count, f_average_value, f_total_value, f_total_avg],[m_purchases_count, \
                                                     men_average_value, men_total_value, m_total_avg], [o_purchases_count, \
                                                       o_average_value, o_total_value, o_total_avg]]

analysis_df = pd.DataFrame(analysis, index= ['Female', 'Male', 'Other/ Non-Disclosed'], \
                           columns = ['Purchase Count', 'Average Purchase Price', 'Total Purchase Value', 'Average Purchase per Person'])

# adjust formatting to represent monetary amounts
analysis_df["Average Purchase Price"] = analysis_df["Average Purchase Price"].astype(float).map("${:,.2f}".format)
analysis_df["Total Purchase Value"] = analysis_df["Total Purchase Value"].astype(float).map("${:,.2f}".format)
analysis_df["Average Purchase per Person"] = analysis_df["Average Purchase per Person"].astype(float).map("${:,.2f}".format)
analysis_df



Unnamed: 0,Purchase Count,Average Purchase Price,Total Purchase Value,Average Purchase per Person
Female,113,$3.20,$361.94,$4.47
Male,652,$3.02,"$1,967.64",$4.07
Other/ Non-Disclosed,15,$3.35,$50.19,$4.56


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, average item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame

