### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [25]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)
#purchase_data


## Player Count

* Display the total number of players


In [26]:
# This data is transactions, so we need to find unique players
total_transactions = purchase_data.count()[0]
print('There are ' + str(total_transactions) + ' transactions in our data set.')

players_unique = len(pd.unique(purchase_data['SN']))
print('There are ' + str(players_unique) + ' unique players in our data set.')
# let's make a dataframe that's just the unique players
players_unique_df = purchase_data.drop_duplicates(subset='SN', keep="first")



There are 780 transactions in our data set.
There are 576 unique players in our data set.


## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [27]:
print('Total # of purchases = ' + str(total_transactions))
items_unique = len(purchase_data['Item Name'].unique())
print('unique items = ' + str(items_unique))
total_spent = '${:,.2f}'.format(purchase_data['Price'].sum())
print('total spent = ' + str(total_spent))
ave_price = '${:,.2f}'.format(purchase_data['Price'].mean())
print('average price = ' + str(ave_price))
min_price = '${:,.2f}'.format(purchase_data['Price'].min())
print('min price is ' + str(min_price))
max_price = '${:,.2f}'.format(purchase_data['Price'].max())
print('max price is ' + str(max_price)) 
# ----------
# here we need to use the unique players, not total transactions
# ------------
total_ages = players_unique_df['Age'].sum()
#ave_age = purchase_data['Age'].mean()
ave_age =  total_ages / players_unique 
print('average age = ' + str(ave_age))
min_age = purchase_data['Age'].min()
print('youngest player is ' + str(min_age))
max_age = purchase_data['Age'].max()
print('oldest player is ' + str(max_age))
print('Total ages = ' + str(total_ages))
print('')
print('------ simple summary stats')

summary = {'Minimum': [min_price, min_age, 'n/a'],
        'Average': [ave_price, ave_age, 'n/a' ],
        'Maximum': [max_price, max_age, 'n/a'],
        'Total': [total_spent, total_ages, items_unique]
        }

summary_df = pd.DataFrame(summary, columns = ['Minimum', 'Average', 'Maximum', 'Total'], \
                          index=['Price','Age','Unique items'])

print(summary_df)





Total # of purchases = 780
unique items = 179
total spent = $2,379.77
average price = $3.05
min price is $1.00
max price is $4.99
average age = 22.741319444444443
youngest player is 7
oldest player is 45
Total ages = 13099

------ simple summary stats
             Minimum  Average Maximum      Total
Price          $1.00    $3.05   $4.99  $2,379.77
Age                7  22.7413      45      13099
Unique items     n/a      n/a     n/a        179


## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [82]:
# find gender info
gender_info = purchase_data.groupby('Gender')['Purchase ID'].nunique()

female = gender_info.iloc[0]
female_percen = '{:.2%}'.format(female / players_unique)
#print('female = ' + str(female) + ' female percent = ' + str(female_percen))
male = gender_info.iloc[1]
male_percen = '{:.2%}'.format(male / players_unique)
#print('male = ' + str(male) + ' male percent = ' + str(male_percen))
nonD = gender_info.iloc[2]
nonD_percen = '{:.2%}'.format(nonD / players_unique)
#print('non-d = ' + str(nonD) + ' nonD percent = ' + str(nonD_percen))
print('')
print('unique players = ' + str(players_unique))
print('')
print('gender_info - on all transactions')
print(gender_info)
print('')
# let's see if we can get unique player stats
gender_info_unique = players_unique_df.groupby('Gender')['Purchase ID'].nunique()
print('only unique players')
print(gender_info_unique)
# using unique players, percent gender
percent_gender_unique = (gender_info_unique / players_unique) * 100
print('')
print('percent population by gender')
print(percent_gender_unique)
#
# put them together?
foo = pd.concat([percent_gender_unique, gender_info_unique], axis = 1)
# set % formatting
format_percen = "{:.2f}%".format
foo = foo.applymap(format_percen)
print('')
print(foo)
# are we working with dataframes?
giu_df = pd.DataFrame(gender_info_unique)
giu_df.rename(columns={'Purchase ID': 'Count'})
print('')
print(giu_df)
pgu_df = pd.DataFrame(percent_gender_unique)
format_percen = "{:.2f}%".format
pgu_df = pgu_df.applymap(format_percen)
print('')
print(pgu_df)
# put them together?
foo2 = pd.concat([pgu_df, giu_df], axis = 1)
print('')
print(foo2)



unique players = 576

gender_info - on all transactions
Gender
Female                   113
Male                     652
Other / Non-Disclosed     15
Name: Purchase ID, dtype: int64

only unique players
Gender
Female                    81
Male                     484
Other / Non-Disclosed     11
Name: Purchase ID, dtype: int64

percent population by gender
Gender
Female                   14.062500
Male                     84.027778
Other / Non-Disclosed     1.909722
Name: Purchase ID, dtype: float64

                      Purchase ID Purchase ID
Gender                                       
Female                     14.06%      81.00%
Male                       84.03%     484.00%
Other / Non-Disclosed       1.91%      11.00%

                       Purchase ID
Gender                            
Female                          81
Male                           484
Other / Non-Disclosed           11

                      Purchase ID
Gender                           
Female            


## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [38]:
# we already have purchase count from previous cell
# find average purchase price

#gender_info = purchase_data.groupby('Gender')['Purchase ID'].nunique()
print('----- count purchases with Groupby -----')
count_price = purchase_data.groupby('Gender')['Price'].count()
print(count_price)
print('')
print('----- count purchases by .sum -----')
count_purchases_male = purchase_data[purchase_data['Gender'] == 'Male'].count()[0]
print('count purch male = ' + str(count_purchases_male))
count_purchases_female = purchase_data[purchase_data['Gender'] == 'Female'].count()[0]
print('count purch female = ' + str(count_purchases_female))
count_purchases_nonD = purchase_data[purchase_data['Gender'] == 'Other / Non-Disclosed'].count()[0]
print('count purch nonD = ' + str(count_purchases_nonD))
total_spent = purchase_data['Price'].sum()
print('total spent = ' + str(total_spent))
sum_price = purchase_data.groupby('Gender')['Price'].sum()
print('')
print('---- total spend by gender using Groupby ----')
print(sum_price)
print('')
print('---- sums by gender ----')
sum_purchases_male = purchase_data[purchase_data['Gender'] == 'Male'].sum()[6]
print('sum purch male = ' + str(sum_purchases_male))
sum_purchases_female = purchase_data[purchase_data['Gender'] == 'Female'].sum()[6]
print('sum purch female = ' + str(sum_purchases_female))
sum_purchases_nonD = purchase_data[purchase_data['Gender'] == 'Other / Non-Disclosed'].sum()[6]
print('sum purch nonD = ' + str(sum_purchases_nonD))
print('')
ave_price_gender = purchase_data.groupby('Gender')['Price'].mean()
print('')
print('---- mean spend by gender using Groupby ----')
print(ave_price_gender)
print('')
# to get ave total purchase by person we need to use unique players (all of them, not by gender)
#ave_price_player_gender = purchase_data.groupby('Gender')['Price'].mean()
# this is wrong
# ave_price_gender = players_unique_df.groupby('Gender')['Price'].mean()
# try total spend by gender / total unique players
#ave_price_player_gender = sum_purchases_male / players_unique
# that wasn't it.  is it sum_purchases_male / unique male players
print('')
print('---- mean spend per PER PLAYER using Groupby ----')
print('not sure about this yet')
#print(ave_price_player_gender)
print('')
#
print('---- mean spend by gender ----')
purchases_male_df = purchase_data[purchase_data['Gender'] == 'Male']
ave_purchases_male = purchases_male_df['Price'].mean()
print('ave purchases male ')
print(ave_purchases_male)
purchases_female_df = purchase_data[purchase_data['Gender'] == 'Female']
ave_purchases_female = purchases_female_df['Price'].mean()
print('ave purchases female')
print(ave_purchases_female)
purchases_nonD_df = purchase_data[purchase_data['Gender'] == 'Other / Non-Disclosed']
ave_purchases_nonD = purchases_nonD_df['Price'].mean()
print('ave purchases nonD')
print(ave_purchases_nonD)
print('')
print('--- let us see if all the purchases are from unique users ---')
print('total number of transactions = ' + str(total_transactions))
print('total number of unique players = ' + str(players_unique))
print('total # of male purchase transactions = ' + str(count_purchases_male))
#males_unique = pd.unique(purchase_data[purchase_data['Gender'] == 'Male'])
#males_unique = pd.unique(purchase_data['Gender'] == 'Male')
#males_unique = len(pd.unique(purchase_data['Gender'] == 'Male'))
#males_unique = len(pd.unique(purchase_data['SN']))
#count_purchases_male = purchase_data[purchase_data['Gender'] == 'Male'].count()[0]
#count_purchases_male = purchase_data[purchase_data['Gender'] == 'Male'].count()[0]
print('# of unique males = ')
#males_unique



----- count purchases with Groupby -----
Gender
Female                   113
Male                     652
Other / Non-Disclosed     15
Name: Price, dtype: int64

----- count purchases by .sum -----
count purch male = 652
count purch female = 113
count purch nonD = 15
total spent = 2379.77

---- total spend by gender using Groupby ----
Gender
Female                    361.94
Male                     1967.64
Other / Non-Disclosed      50.19
Name: Price, dtype: float64

---- sums by gender ----
sum purch male = 1967.64
sum purch female = 361.94
sum purch nonD = 50.19


---- mean spend by gender using Groupby ----
Gender
Female                   3.203009
Male                     3.017853
Other / Non-Disclosed    3.346000
Name: Price, dtype: float64


---- mean spend per PER PLAYER using Groupby ----
not sure about this yet

---- mean spend by gender ----
ave purchases male 
3.0178527607361953
ave purchases female
3.203008849557519
ave purchases nonD
3.3460000000000005

--- let us see if al

## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


Unnamed: 0,Total Count,Percentage of Players
<10,17,2.95%
10-14,22,3.82%
15-19,107,18.58%
20-24,258,44.79%
25-29,77,13.37%
30-34,52,9.03%
35-39,31,5.38%
40+,12,2.08%


## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
Age Ranges,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
<10,23,$3.35,$77.13,$4.54
10-14,28,$2.96,$82.78,$3.76
15-19,136,$3.04,$412.89,$3.86
20-24,365,$3.05,"$1,114.06",$4.32
25-29,101,$2.90,$293.00,$3.81
30-34,73,$2.93,$214.00,$4.12
35-39,41,$3.60,$147.67,$4.76
40+,13,$2.94,$38.24,$3.19


## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,$3.79,$18.96
Idastidru52,4,$3.86,$15.45
Chamjask73,3,$4.61,$13.83
Iral74,4,$3.40,$13.62
Iskadarya95,3,$4.37,$13.10


## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, average item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,$4.61,$59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
145,Fiery Glass Crusader,9,$4.58,$41.22
132,Persuasion,9,$3.22,$28.99
108,"Extraction, Quickblade Of Trembling Hands",9,$3.53,$31.77


## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame



Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,$4.61,$59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
82,Nirvana,9,$4.90,$44.10
145,Fiery Glass Crusader,9,$4.58,$41.22
103,Singed Scalpel,8,$4.35,$34.80
