### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_df = pd.read_csv(file_to_load)
purchase_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


## Player Count

* Display the total number of players


In [2]:
#Find number of unique users using SN column
unique_players = purchase_df["SN"].unique()
#Count the unique users using the length of the list of unique users
player_amt = len(unique_players)

#Create a dictionary
total_player_dict = {"Total Players": player_amt}
#Pass the dictionary into a dataframe
total_player_df = pd.DataFrame([total_player_dict])
#Print the dataframe
total_player_df

Unnamed: 0,Total Players
0,576


## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [3]:
#Find number of unique items using Item ID column
unique_items = purchase_df["Item ID"].unique()
#Count the unique items using the length of the list of unique users
unique_item_total = len(unique_items)
unique_item_total

179

In [4]:
avg_price = round(purchase_df["Price"].mean(), 2)
avg_price

3.05

In [5]:
total_purchases = len(purchase_df["Purchase ID"])
total_purchases

780

In [6]:
total_revenue = purchase_df["Price"].sum()
total_revenue

2379.77

In [7]:
purchase_summary_df = pd.DataFrame({"Number of Unique Items": [unique_item_total], 
                                    "Average Price": [avg_price], 
                                    "Number of Purchases": [total_purchases],
                                    "Total Revenue": [total_revenue]})
purchase_summary_df

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,3.05,780,2379.77


## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [8]:
#Total male players
males = purchase_df.loc[purchase_df["Gender"] == "Male", ["Gender", "SN"]]
unique_males = males["SN"].unique()
unique_male_total = len(unique_males)

In [9]:
#Total female players
females = purchase_df.loc[purchase_df["Gender"] == "Female", ["Gender", "SN"]]
unique_females = females["SN"].unique()
unique_female_total = len(unique_females)

In [10]:
#Total other/nondisclosed players
nd = purchase_df.loc[purchase_df["Gender"] == "Other / Non-Disclosed", ["Gender", "SN"]]
unique_nd = nd["SN"].unique()
unique_nd_total = len(unique_nd)

In [11]:
#Total all unique players to find percentages
total_unique_players = (unique_male_total + unique_female_total + unique_nd_total)

In [12]:
#Unique male percentage
male_player_percent = unique_male_total / total_unique_players * 100
male_percent = round(male_player_percent, 2)

In [13]:
#Unique female percentage
female_player_percent = unique_female_total / total_unique_players * 100
female_percent = round(female_player_percent, 2)

In [14]:
#Unique other/non-disclosed percentage
nd_player_percent = unique_nd_total / total_unique_players * 100
nd_percent = round(nd_player_percent, 2)

In [15]:
gender_data = {"Gender": ["Male", "Female", "Other / Non-Disclosed"],
               "Total Count": [unique_male_total, unique_female_total, unique_nd_total],
               "Percentage of Players": [male_percent, female_percent, nd_percent]}
gender_df = pd.DataFrame(gender_data)
gender_df

Unnamed: 0,Gender,Total Count,Percentage of Players
0,Male,484,84.03
1,Female,81,14.06
2,Other / Non-Disclosed,11,1.91


In [16]:
gender_final_df = gender_df.set_index("Gender")
gender_final_df

Unnamed: 0_level_0,Total Count,Percentage of Players
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Male,484,84.03
Female,81,14.06
Other / Non-Disclosed,11,1.91


## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [17]:
#Start with initial data again
purchase_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [18]:
#Find Purchase Count by Gender
gender_count_df = purchase_df.groupby("Gender")["Price"].count()
gender_purch_count = pd.DataFrame(gender_count_df)
# gender_purch_count

In [19]:
#Assign variables to each gender/price pair
#female
female_purch_count = gender_purch_count.iloc[0, 0]
female_purch_count
# male
male_purch_count = gender_purch_count.iloc[1, 0]
male_purch_count
# other/non-disclosed
nd_purch_count = gender_purch_count.iloc[2, 0]
nd_purch_count

15

In [20]:
#Find Average Purchase Price by Gender
gender_price_df = purchase_df.groupby("Gender")["Price"].mean().round(2)
gender_avg_purch = pd.DataFrame(gender_price_df)
# gender_avg_purch

In [21]:
#Assign variables to each gender/average purchase price pair
#female
female_purch_price = gender_avg_purch.iloc[0, 0]
female_purch_price
# male
male_purch_price = gender_avg_purch.iloc[1, 0]
male_purch_price
# other/non-disclosed
nd_purch_price = gender_avg_purch.iloc[2, 0]
nd_purch_price

3.35

In [22]:
#Find Total Purchase Value by Gender

#Set index to Gender
gender_person_purch = purchase_df.set_index("Gender")
#Make into a dataframe
gender_person_purch_df = pd.DataFrame(gender_person_purch)
#Print to check results
# gender_person_purch_df

In [77]:
#Grab only the SN and Price categories
gpp_df_SN = gender_person_purch_df[["SN", "Price"]]
#Print to check results
# gpp_df_SN

Unnamed: 0_level_0,SN,Price
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Male,Lisim78,3.53
Male,Lisovynya38,1.56
Male,Ithergue48,4.88
Male,Chamassasya86,3.27
Male,Iskosia90,1.44
...,...,...
Female,Aethedru70,3.54
Male,Iral74,1.63
Male,Yathecal72,3.46
Male,Sisur91,4.19


In [24]:
# Grab only males from previous dataframe
male_purch_table = gpp_df_SN.loc["Male", :]
# Print to check the results
# male_purch_table

In [25]:
#Find sum of the price column for males
male_total_purch = male_purch_table["Price"].sum()
male_total_purch

1967.64

In [26]:
# Grab only females from previous dataframe
female_purch_table = gpp_df_SN.loc["Female", :]
# Print to check the results
# female_purch_table

In [27]:
#Find sum of the price column for females
female_total_purch = female_purch_table["Price"].sum()
female_total_purch

361.94

In [28]:
# Grab only other/non-discloseds from previous dataframe
nd_purch_table = gpp_df_SN.loc["Other / Non-Disclosed", :]
# Print to check the results
# nd_purch_table

In [29]:
#Find sum of the price column for other/nds
nd_total_purch = nd_purch_table["Price"].sum()
nd_total_purch

50.19

In [30]:
#Find Average Purchase Total Per Person by Gender

# Set the male purchase table index to SN
male_purch = male_purch_table.set_index("SN")
# Turn the result into a dataframe
male_purch_df = pd.DataFrame(male_purch)
# Group by the user screen name
male_purch_grp = male_purch_df.groupby(["SN"])
# Find the sum of each screen name's purchases & round to two decimals
male_purch_sum = male_purch_grp.sum().round(2)
# Print to check the results
# print(male_purch_sum)

In [31]:
#Find average of the purchase totals per person
male_pp_avg = male_purch_sum["Price"].mean().round(2)
#Print to check
male_pp_avg

4.07

In [32]:
# Set the female purchase table index to SN
female_purch = female_purch_table.set_index("SN")
# Turn the result into a dataframe
female_purch_df = pd.DataFrame(female_purch)
# Group by the user screen name
female_purch_grp = female_purch_df.groupby(["SN"])
# Find the sum of each screen name's purchases & round to two decimals
female_purch_sum = female_purch_grp.sum().round(2)
# Print to check results
# print(female_purch_sum)

In [33]:
#Find average of the purchase totals per person
female_pp_avg = female_purch_sum["Price"].mean().round(2)
#Print to check
female_pp_avg

4.47

In [34]:
# Set the nd purchase table index to SN
nd_purch = nd_purch_table.set_index("SN")
# Turn the result into a dataframe
nd_purch_df = pd.DataFrame(nd_purch)
# Group by the user screen name
nd_purch_grp = nd_purch_df.groupby(["SN"])
# Find the sum of each screen name's purchases & round to two decimals
nd_purch_sum = nd_purch_grp.sum().round(2)
# Print to check results
# print(nd_purch_mean)

In [35]:
#Find average of the purchase totals per person
nd_pp_avg = nd_purch_sum["Price"].mean().round(2)
#Print to check
nd_pp_avg

4.56

In [36]:
gender_data = {"Gender": ["Male", "Female", "Other / Non-Disclosed"],
               "Purchase Count": [male_purch_count, female_purch_count, nd_purch_count],
               "Average Purchase Price": [male_purch_price, female_purch_price, nd_purch_price],
               "Total Purchase Value": [male_total_purch, female_total_purch, nd_total_purch],
              "Avg Total Purchase Per Person": [male_pp_avg, female_pp_avg, nd_pp_avg]}
gender_df = pd.DataFrame(gender_data)
gender_df.set_index("Gender")

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase Per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Male,652,3.02,1967.64,4.07
Female,113,3.2,361.94,4.47
Other / Non-Disclosed,15,3.35,50.19,4.56


## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table

In [37]:
#Start again with initial data
purchase_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [38]:
#Create bins to split by age
age_bins = [0, 9, 14, 19, 24, 29, 34, 39, 100]
#Name the bins
age_bin_names = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]

In [39]:
#Changing the dataframe itself, so make a copy to work on instead
purchase_copy = purchase_df.copy()

In [40]:
purchase_copy["Age Range"] = pd.cut(purchase_copy["Age"], bins=age_bins, labels=age_bin_names, include_lowest=True)
purchase_copy.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price,Age Range
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53,20-24
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56,40+
2,2,Ithergue48,24,Male,92,Final Critic,4.88,20-24
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27,20-24
4,4,Iskosia90,23,Male,131,Fury,1.44,20-24


In [41]:
age_range = purchase_copy.groupby("Age Range")

unique_SN_age = age_range["SN"].unique()

uni_SN_age = pd.DataFrame(unique_SN_age)

uni_SN_age

Unnamed: 0_level_0,SN
Age Range,Unnamed: 1_level_1
<10,"[Eusri44, Haillyrgue51, Seuthep89, Heudai45, T..."
10-14,"[Lirtossa84, Aeralria27, Reulae52, Zhisrisu83,..."
15-19,"[Idai61, Sondim73, Aidaillodeu39, Yaliru88, Ji..."
20-24,"[Lisim78, Ithergue48, Chamassasya86, Iskosia90..."
25-29,"[Lisirra87, Yasrisu92, Phaelap26, Chamirraya83..."
30-34,"[Chamalo71, Siala43, Idaisuir85, Iaralrgue74, ..."
35-39,"[Itheria73, Chanosian48, Saesrideu94, Siarithr..."
40+,"[Lisovynya38, Eyrian71, Jiskjask85, Isursuir31..."


In [42]:
#Give a variable to each row for count
under_ten = len(uni_SN_age.iloc[0,0])
ten_fourtn = len(uni_SN_age.iloc[1,0])
fiftn_ninetn = len(uni_SN_age.iloc[2,0])
twenty_twfour = len(uni_SN_age.iloc[3,0])
twfive_twnine = len(uni_SN_age.iloc[4,0])
thirty_thfour = len(uni_SN_age.iloc[5,0])
thfive_thnine = len(uni_SN_age.iloc[6,0])
forty_plus = len(uni_SN_age.iloc[7,0])

In [43]:
under_ten_pct = round((under_ten / player_amt * 100), 2)
ten_fourtn_pct = round((ten_fourtn / player_amt * 100), 2)
fiftn_ninetn_pct = round((fiftn_ninetn / player_amt * 100), 2)
twenty_twfour_pct = round((twenty_twfour / player_amt * 100), 2)
twfive_twnine_pct = round((twfive_twnine / player_amt * 100), 2)
thirty_thfour_pct = round((thirty_thfour / player_amt * 100), 2)
thfive_thnine_pct = round((thfive_thnine / player_amt * 100), 2)
forty_plus_pct = round((forty_plus / player_amt * 100), 2)

In [52]:
age_data = {"Age Range": ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"],
               "Total Count": [under_ten, ten_fourtn, fiftn_ninetn, twenty_twfour, twfive_twnine, thirty_thfour, thfive_thnine, forty_plus],
              "Percentage of Players": [under_ten_pct, ten_fourtn_pct, fiftn_ninetn_pct, twenty_twfour_pct, twfive_twnine_pct, thirty_thfour_pct, thfive_thnine_pct, forty_plus_pct]}
age_df = pd.DataFrame(age_data)
age_df.set_index("Age Range")

Unnamed: 0_level_0,Total Count,Percentage of Players
Age Range,Unnamed: 1_level_1,Unnamed: 2_level_1
<10,17,2.95
10-14,22,3.82
15-19,107,18.58
20-24,258,44.79
25-29,77,13.37
30-34,52,9.03
35-39,31,5.38
40+,12,2.08


## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [53]:
#Already binned the purchase_df by age, grab age_range variable from above
#Count number of purchases in each age range
age_range_count = age_range.count()
# age_range_count

In [54]:
#Give variable to each age-range/purchase-count value
under_ten_count = age_range_count.iloc[0,0]
ten_fourtn_count = age_range_count.iloc[1,0]
fiftn_ninetn_count = age_range_count.iloc[2,0]
twenty_twfour_count = age_range_count.iloc[3,0]
twfive_twnine_count = age_range_count.iloc[4,0]
thirty_thfour_count = age_range_count.iloc[5,0]
thfive_thnine_count = age_range_count.iloc[6,0]
forty_plus_count = age_range_count.iloc[7,0]

In [68]:
#Find Total Purchase Value per age range
tprice_age = age_range["Price"].sum()
total_price_age = pd.DataFrame(tprice_age)
total_price_age

Unnamed: 0_level_0,Price
Age Range,Unnamed: 1_level_1
<10,77.13
10-14,82.78
15-19,412.89
20-24,1114.06
25-29,293.0
30-34,214.0
35-39,147.67
40+,38.24


In [73]:
#Assign variable to each age-range/total-purchase value (Total Purchase Value variables)
under_ten_purch = total_price_age.iloc[0,0]
ten_fourtn_purch = total_price_age.iloc[1,0]
fiftn_ninetn_purch = total_price_age.iloc[2,0]
twenty_twfour_purch = total_price_age.iloc[3,0]
twfive_twnine_purch = total_price_age.iloc[4,0]
thirty_thfour_purch = total_price_age.iloc[5,0]
thfive_thnine_purch = total_price_age.iloc[6,0]
forty_plus_purch = total_price_age.iloc[7,0]

In [74]:
#Calculate purchase average for each age range (Average Purchase Price variables)
under_ten_avg = round((under_ten_purch / under_ten_count), 2)
ten_fourtn_avg = round((ten_fourtn_purch / ten_fourtn_count), 2)
fiftn_ninetn_avg = round((fiftn_ninetn_purch / fiftn_ninetn_count), 2)
twenty_twfour_avg = round((twenty_twfour_purch / twenty_twfour_count), 2)
twfive_twnine_avg = round((twfive_twnine_purch / twfive_twnine_count), 2)
thirty_thfour_avg = round((thirty_thfour_purch / thirty_thfour_count), 2)
thfive_thnine_avg = round((thfive_thnine_purch / thfive_thnine_count), 2)
forty_plus_avg = round((forty_plus_purch / forty_plus_count), 2)

In [89]:
#Find Avg Total Purchase Per Person
#Start with binned data
# purchase_copy

In [90]:
#Grab only the SN, Price, and Age Range categories
age_pp_SN = purchase_copy[["SN", "Price", "Age Range"]]
#Print to check results
# age_pp_SN

In [93]:
#Set index to "Age Range"
age_pp_unique = age_pp_SN.set_index("Age Range")
# age_pp_unique

In [107]:
# Grab each category from previous dataframe
unique_under_ten = age_pp_unique.loc["<10", :]
unique_ten_fourtn = age_pp_unique.loc["10-14", :]
unique_fiftn_ninetn = age_pp_unique.loc["15-19", :]
unique_twenty_twfour = age_pp_unique.loc["20-24", :]
unique_twfive_twnine = age_pp_unique.loc["25-29", :]
unique_thirty_thfour = age_pp_unique.loc["30-34", :]
unique_thfive_thnine = age_pp_unique.loc["35-39", :]
unique_forty_plus = age_pp_unique.loc["40+", :]

Unnamed: 0_level_0,SN,Price
Age Range,Unnamed: 1_level_1,Unnamed: 2_level_1
10-14,Lirtossa84,1.61
10-14,Aeralria27,4.09
10-14,Reulae52,4.18
10-14,Zhisrisu83,4.35
10-14,Raesty92,3.53
10-14,Mindadaran26,3.75
10-14,Lisasi93,2.45
10-14,Zhisrisu83,3.54
10-14,Aillyrin83,2.89
10-14,Sidap51,4.24


In [108]:
#Find Average Purchase Total Per Person by Age Range

# Group by the user screen name
under_ten_grp = unique_under_ten.groupby(["SN"])
# Find the sum of each screen name's purchases & round to two decimals
under_ten_ppsum = under_ten_grp.sum().round(2)


# #Repeat for all age ranges
ten_fourtn_grp = unique_ten_fourtn.groupby(["SN"])
ten_fourtn_ppsum = unique_ten_fourtn.sum().round(2)

# fiftn_ninetn_grp = unique_fiftn_ninetn.groupby(["SN"])
# fiftn_ninetn_ppsum = unique_fiftn_ninetn.sum().round(2)

# ten_fourtn_grp = unique_ten_fourtn.groupby(["SN"])
# ten_fourtn_ppsum = unique_fiftn_ninetn.sum().round(2)

# ten_fourtn_grp = unique_ten_fourtn.groupby(["SN"])
# ten_fourtn_ppsum = unique_fiftn_ninetn.sum().round(2)

# ten_fourtn_grp = unique_ten_fourtn.groupby(["SN"])
# ten_fourtn_ppsum = unique_fiftn_ninetn.sum().round(2)

# ten_fourtn_grp = unique_ten_fourtn.groupby(["SN"])
# ten_fourtn_ppsum = unique_fiftn_ninetn.sum().round(2)

# ten_fourtn_grp = unique_ten_fourtn.groupby(["SN"])
# ten_fourtn_ppsum = unique_fiftn_ninetn.sum().round(2)

TypeError: can't multiply sequence by non-int of type 'float'

## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,$3.79,$18.96
Idastidru52,4,$3.86,$15.45
Chamjask73,3,$4.61,$13.83
Iral74,4,$3.40,$13.62
Iskadarya95,3,$4.37,$13.10


## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,$4.61,$59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
145,Fiery Glass Crusader,9,$4.58,$41.22
132,Persuasion,9,$3.22,$28.99
108,"Extraction, Quickblade Of Trembling Hands",9,$3.53,$31.77


## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame



Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,$4.61,$59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
82,Nirvana,9,$4.90,$44.10
145,Fiery Glass Crusader,9,$4.58,$41.22
103,Singed Scalpel,8,$4.35,$34.80
