### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [22]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
file = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame

main_df = pd.read_csv(file, skipinitialspace=True)
column_names = list(main_df.columns)
#print(column_names)
# column 0 = purchase ID, 1 = SN(in game name), 2 = age, 3 = gender, 4 = item id, 5 = item name, 6 = price
# may have to clean up header

# you want to use the original dataframe and not the new one
# this is looking at purchases not unique players so the filtered dataset would be incomplete
# counting with gender will get the purchase count
# Purchase Count	Average Purchase Price	Total Purchase Value	Avg Total Purchase per Person
# ex male_purchases = purchase_data.loc[purchase_data.Gender == 'Male', 'Gender'].count()

# these are bins see link https://www.codespeedy.com/binning-or-bucketing-of-column-in-pandas-using-python/
# these will be placed in their own dataframe 
# cut based on these ages: <10, 10-13,15-19,20-24,25-29,30-34,35-39,40+ 
# make percentages based on this (use total players again)
original_bins = [9,13,19,24,29,34,39,150]
#doing this for now to account for no know greater than or lesser than function atm, subject to change
#bins = pd.IntervalIndex.from_arrays([0,9],[10,13],[15,19],[20,24],[25,29],[30,34],[35,39],[40,150])
age_df = pd.cut(main_df['Age'],original_bins)
age_df.head()

0     (19, 24]
1    (39, 150]
2     (19, 24]
3     (19, 24]
4     (19, 24]
Name: Age, dtype: category
Categories (7, interval[int64]): [(9, 13] < (13, 19] < (19, 24] < (24, 29] < (29, 34] < (34, 39] < (39, 150]]

## Player Count

* Display the total number of players


In [9]:
# to get total players, you need to see all unique player igns(column 3)
# it is case sensitive and len can be used to get the numerical value from unique()
# given the formatting, it may be necessary to assign this to a data frame later 
total_players = len(main_df['SN'].unique())
print(total_players)

576


## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [None]:
unique_items = len(main_df["Item ID"].unique())
print(unique_items)

#round to second decimal point here
average_revenue = round(main_df['Price'].mean(),2)
print(average_revenue)

num_of_purchases = len(main_df["Item ID"])
print(num_of_purchases)

# no need to round as prices only have 2 decimal places when added
total_revenue = main_df['Price'].sum()
print(total_revenue)

## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [13]:
gender_demo = main_df[['SN','Gender']]
gender_checker = gender_demo.drop_duplicates()


male_players = gender_checker.loc[gender_checker.Gender == 'Male', 'Gender'].count()
female_players = gender_checker.loc[gender_checker.Gender == 'Female', 'Gender'].count()
other_players = gender_checker.loc[gender_checker.Gender == 'Other / Non-Disclosed', 'Gender'].count()

percent_of_m = "{:.2%}".format(male_players / total_players)
percent_of_f = "{:.2%}".format(female_players / total_players)
percent_of_o = "{:.2%}".format(other_players / total_players)

# may be able to use this dataframe to analyze the next one by adding in price
# but that might not be possible with this code due to drop duplicates
# however if I were to not do that and simply use groupby, sum on price as I did earlier with male_df prototyping 
# then it is likely possible I could make the dataframe displayed with the following categories: 
# Gender->Purchase Count	Average Purchase Price	Total Purchase Value	Avg Total Purchase per Person


## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [None]:
# you want to use the original dataframe and not the new one
# this is looking at purchases not unique players so the filtered dataset would be incomplete
# counting with gender will get the purchase count
# ex male_purchases = purchase_data.loc[purchase_data.Gender == 'Male', 'Gender'].count()
# will also want to put these rows into their own data sets to analylze average price, total purchase value and average total purchase per person

#testing with the males first
#average total purchase per person
male_df = main_df[main_df.iloc[:,3]=='Male']
#male_df.reset_index()
#print(male_df)
m_count = male_df['Purchase ID'].count()
m_avg = round(male_df['Price'].mean(),2)
m_sum = male_df['Price'].sum()
print(m_count)
print(m_avg)
print(m_sum)
# examples
#df.groupby(['Fruit','Name'])['Number'].sum().reset_index()
#df.groupby(['Fruit','Name'])['Number'].sum()

# male average purchase has 4.47 listed as its expected outcome
# 484 is the total male population and the cost should add up to 1967.64
m_avg_purchase_df = male_df.groupby(male_df['SN'])['Price'].sum().reset_index()
m_avg_purchase_df.dropna(how = 'any')
print(m_avg_purchase_df)
m_sum = m_avg_purchase = round(m_avg_purchase_df['Price'].sum(),2)
print(m_sum)
m_avg_purchase = round(m_avg_purchase_df['Price'].mean(),2)
print(m_avg_purchase)
#m_avg_purchase_df = male_df.groupby(["SN"])

o_df = main_df[main_df.iloc[:,3]=='Other / Non-Disclosed']
#male_df.reset_index()
#print(male_df)
o_count = o_df['Purchase ID'].count()
o_avg = round(o_df['Price'].mean(),2)
o_sum = o_df['Price'].sum()
print(o_count)
print(o_avg)
print(o_sum)
# examples
#df.groupby(['Fruit','Name'])['Number'].sum().reset_index()
#df.groupby(['Fruit','Name'])['Number'].sum()

# male average purchase has 4.47 listed as its expected outcome
# 484 is the total male population and the cost should add up to 1967.64

# other is 15, 3.35, 50.19, 4.56
o_avg_purchase_df = o_df.groupby(o_df['SN'])['Price'].sum().reset_index()
o_avg_purchase_df.dropna(how = 'any')
print(o_avg_purchase_df)
o_sum = o_avg_purchase = round(o_avg_purchase_df['Price'].sum(),2)
print(o_sum)
o_avg_purchase = round(o_avg_purchase_df['Price'].mean(),2)
print(o_avg_purchase)


## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


In [None]:
# these are bins see link https://www.codespeedy.com/binning-or-bucketing-of-column-in-pandas-using-python/
# these will be placed in their own dataframe 
# cut based on these ages: <10, 10-13,15-19,20-24,25-29,30-34,35-39,40+ 
# make percentages based on this (use total players again)


## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [None]:
# just like gender, use sthe original data frame 
# bin it by age to move the rows around into a new data frame 
# calculate accordingly 

## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [None]:
# count how many times a repeating sn reappears
# place those names into a list 
# start a new dataframe with those list and the necessary columns
# make a column holding these value for purchase count 
# average and total those rows values 
# place those values into the new data frame


## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, average item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [None]:
# intended output is top 5 that want to be taken from the main data set
# same with player names, take the item names, count them and place the new values in a new dataframe

## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame



In [None]:
# sort the new dataframe by total purchase value 
# display the new data frame