### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [3]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)
purchase_data

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44
...,...,...,...,...,...,...,...
775,775,Aethedru70,21,Female,60,Wolf,3.54
776,776,Iral74,21,Male,164,Exiled Doomblade,1.63
777,777,Yathecal72,20,Male,67,"Celeste, Incarnation of the Corrupted",3.46
778,778,Sisur91,7,Male,92,Final Critic,4.19


## Player Count

* Display the total number of players


In [95]:
player_count = purchase_data.loc[:,["Gender","SN"]]
player_count = player_count.drop_duplicates()
player_count_total = len(player_count)
player_count_total

576

In [102]:
player_count_by_gender = player_count.groupby("Gender").count()
player_count_by_gender

Unnamed: 0_level_0,SN
Gender,Unnamed: 1_level_1
Female,81
Male,484
Other / Non-Disclosed,11


In [9]:
#number of unique items
unique_items = purchase_data["Item ID"].nunique()
unique_items

179

## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [11]:
#average price
average_price = purchase_data["Price"].mean()
average_price

3.050987179487176

## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [12]:
#number of purchases
purchases_num = purchase_data["Item ID"].count()
purchases_num

780


## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [13]:
#total revenue
total_rev = purchase_data["Price"].sum()
total_rev

2379.77

In [22]:
summary = {
    "Number of Unique Items": "179", "Average Price": "3.05", 
    "Number of Purchases": "780", "Total Revenue": "2379.77"
}
summary_df = pd.DataFrame(list(summary.items()), index=['0', '1', '2', '3'])
summary_df

Unnamed: 0,0,1
0,Number of Unique Items,179.0
1,Average Price,3.05
2,Number of Purchases,780.0
3,Total Revenue,2379.77


## Age Demographics

In [34]:
summary_df.transpose()

Unnamed: 0,0,1,2,3
0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
1,179,3.05,780,2379.77


* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


652

113

## Purchasing Analysis (Age)

In [86]:
gender_count = purchase_data["Gender"].value_counts()
gender_count

Male                     652
Female                   113
Other / Non-Disclosed     15
Name: Gender, dtype: int64

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [30]:
percent_male = purchase_data["Gender"].value_counts()["Male"]/purchase_data["Gender"].count()*100
percent_male

83.58974358974359

In [31]:
percent_female = purchase_data["Gender"].value_counts()["Female"]/purchase_data["Gender"].count()*100
percent_female

14.487179487179489

## Top Spenders

In [32]:
percent_undisc = purchase_data["Gender"].value_counts()["Other / Non-Disclosed"]/purchase_data["Gender"].count()*100
percent_undisc

1.9230769230769231

In [38]:
gender = {'Total Count':[682, 113, 15],'Percentages of Players':[83.59,14.49,1.92]}


ValueError: DataFrame constructor not properly called!

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [48]:
gender_df = pd.DataFrame(gender, index = ["Male","Female","Other/Non-Disclosed"])
gender_df

Unnamed: 0,Total Count,Percentages of Players
Male,682,83.59
Female,113,14.49
Other/Non-Disclosed,15,1.92


## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [83]:
purchase_count_gender = purchase_data.groupby("Gender").count()["Item ID"]
purchase_count_gender

Gender
Female                   113
Male                     652
Other / Non-Disclosed     15
Name: Item ID, dtype: int64

## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame



In [50]:
average_price_gender = purchase_data.groupby ("Gender").mean()["Price"]
average_price_gender

Gender
Female                   3.203009
Male                     3.017853
Other / Non-Disclosed    3.346000
Name: Price, dtype: float64

In [52]:
total_by_gender =purchase_data.groupby ("Gender").sum()["Price"]
total_by_gender

Gender
Female                    361.94
Male                     1967.64
Other / Non-Disclosed      50.19
Name: Price, dtype: float64

In [105]:
# total_aver_pp_gender = total_by_gender.droplevel[(1)]/player_count_by_gender.droplevel[(1)]
# total_aver_pp_gender
merge_by_gender = pd.merge (player_count_by_gender, total_by_gender, on = "Gender")
merge_by_gender

Unnamed: 0_level_0,SN,Price
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,81,361.94
Male,484,1967.64
Other / Non-Disclosed,11,50.19


In [106]:
total_aver_by_gender = merge_by_gender["Price"]/merge_by_gender["SN"]
total_aver_by_gender

Gender
Female                   4.468395
Male                     4.065372
Other / Non-Disclosed    4.562727
dtype: float64

In [107]:
purchases_gender = {'Purchase Count':[682, 113, 15],'Average Price':[3.20,3.02,3.35], "Total Value":[361.94,1967.64,50.19], "Total Average Per Person":[4.47,4.07,4.56]}
sum_gender_df = pd.DataFrame(purchases_gender, index = ["Male","Female","Other/Non-Disclosed"])
sum_gender_df

Unnamed: 0,Purchase Count,Average Price,Total Value,Total Average Per Person
Male,682,3.2,361.94,4.47
Female,113,3.02,1967.64,4.07
Other/Non-Disclosed,15,3.35,50.19,4.56


In [54]:
# Establish bins for ages
# Categorize the existing players using the age bins. Hint: use pd.cut()
# Calculate the numbers and percentages by age group
# Create a summary data frame to hold the results
# Optional: round the percentage column to two decimal points
# Display Age Demographics Table
age_bins = [0,9, 14, 19, 24, 29,34,39, 100]
ageGroups= ["<10", "10-14","15-19","20-24","25-29","30-34","35-39",">40"]
purchase_data["Age Group"] = pd.cut (purchase_data["Age"], bins = age_bins, labels =ageGroups)
purchase_data

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price,Age Group
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53,20-24
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56,>40
2,2,Ithergue48,24,Male,92,Final Critic,4.88,20-24
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27,20-24
4,4,Iskosia90,23,Male,131,Fury,1.44,20-24
...,...,...,...,...,...,...,...,...
775,775,Aethedru70,21,Female,60,Wolf,3.54,20-24
776,776,Iral74,21,Male,164,Exiled Doomblade,1.63,20-24
777,777,Yathecal72,20,Male,67,"Celeste, Incarnation of the Corrupted",3.46,20-24
778,778,Sisur91,7,Male,92,Final Critic,4.19,<10


In [55]:
purchase_data.groupby("Age Group").count()["Age"]

Age Group
<10       23
10-14     28
15-19    136
20-24    365
25-29    101
30-34     73
35-39     41
>40       13
Name: Age, dtype: int64

In [58]:
percent_age = purchase_data.groupby("Age Group").count()["Age"]/purchase_data["Age"].count()*100
percent_age

Age Group
<10       2.948718
10-14     3.589744
15-19    17.435897
20-24    46.794872
25-29    12.948718
30-34     9.358974
35-39     5.256410
>40       1.666667
Name: Age, dtype: float64

In [59]:
age_demo = {'Total Count':[23, 28, 136,365,101,73,41,13],'Percentage of Players':[2.95,3.59,17.44,46.80,12.95,9.36,5.26, 1.67]}
age_demo_df = pd.DataFrame(age_demo, index = ["<10", "10-14","15-19","20-24","25-29","30-34","35-39",">40"])
age_demo_df

Unnamed: 0,Total Count,Percentage of Players
<10,23,2.95
10-14,28,3.59
15-19,136,17.44
20-24,365,46.8
25-29,101,12.95
30-34,73,9.36
35-39,41,5.26
>40,13,1.67


In [62]:
# group the purchase_data data frame by age group
# Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below
# Create a summary data frame to hold the results
# Optional: give the displayed data cleaner formatting
# Display the summary data frame
average_price_age = purchase_data.groupby ("Age Group").mean()["Price"]
average_price_age

Age Group
<10      3.353478
10-14    2.956429
15-19    3.035956
20-24    3.052219
25-29    2.900990
30-34    2.931507
35-39    3.601707
>40      2.941538
Name: Price, dtype: float64

In [63]:
total_by_age =purchase_data.groupby ("Age Group").sum()["Price"]
total_by_age

Age Group
<10        77.13
10-14      82.78
15-19     412.89
20-24    1114.06
25-29     293.00
30-34     214.00
35-39     147.67
>40        38.24
Name: Price, dtype: float64

In [65]:
purchases_age = {'Purchase Count':[23, 28, 136,365,101,73,41,13],'Average Price':[3.35,2.96,3.04,3.05,2.90,2.93,3.60,2.94], "Total Value":[77.13,82.78,412.89,1114.06, 293.00, 214.00, 147.67, 38.24]}
sum_age_df = pd.DataFrame(purchases_age, index = ["<10","10-14","15-19","20-24","25-29","30-34","35-39",">40"])
sum_age_df

Unnamed: 0,Purchase Count,Average Price,Total Value
<10,23,3.35,77.13
10-14,28,2.96,82.78
15-19,136,3.04,412.89
20-24,365,3.05,1114.06
25-29,101,2.9,293.0
30-34,73,2.93,214.0
35-39,41,3.6,147.67
>40,13,2.94,38.24


In [81]:
#top spenders - purchase count, average price, total value
top_spender = purchase_data.groupby ("SN")["Price"].sum ().nlargest (5)
top_spender

SN
Lisosia93      18.96
Idastidru52    15.45
Chamjask73     13.83
Iral74         13.62
Iskadarya95    13.10
Name: Price, dtype: float64

In [117]:
top_spender1= purchase_data.loc[purchase_data ["SN"]=="Lisosia93", ["Item ID", "Price"]]
top_spender2= purchase_data.loc[purchase_data ["SN"]=="Idastidru52", ["Item ID", "Price"]]
top_spender3= purchase_data.loc[purchase_data ["SN"]=="Chamjask73", ["Item ID", "Price"]]
top_spender4= purchase_data.loc[purchase_data ["SN"]=="Iral74", ["Item ID", "Price"]]
top_spender5= purchase_data.loc[purchase_data ["SN"]=="Iskadarya95", ["Item ID", "Price"]]
print (top_spender1)
print (top_spender2)
print (top_spender3)
print (top_spender4)
print (top_spender5)

     Item ID  Price
74        89   4.64
120       24   3.81
224      157   4.80
603      132   3.19
609       40   2.52
     Item ID  Price
290      147   4.93
490      148   4.03
543      121   1.60
676      111   4.89
     Item ID  Price
222      178   4.23
520      109   4.76
564       52   4.84
     Item ID  Price
128       58   4.14
623      114   3.82
758      182   4.03
776      164   1.63
     Item ID  Price
148      148   4.03
247       82   4.90
318       91   4.17


In [120]:
top_spender_df = pd.DataFrame({
    "SN": ["Lisosia93", "Idastidru52", "Chamjask73", "Iral74", "Iskadarya95"],
    "Purchase Count": [5,4,3,4,3],
    "Average Purchase Price": [3.79, 3.86,4.61,3.41, 4.37],
    "Total Purchase Value": [18.96, 15.45, 13.83, 13.62, 13.10]
})
top_spender_df

Unnamed: 0,SN,Purchase Count,Average Purchase Price,Total Purchase Value
0,Lisosia93,5,3.79,18.96
1,Idastidru52,4,3.86,15.45
2,Chamjask73,3,4.61,13.83
3,Iral74,4,3.41,13.62
4,Iskadarya95,3,4.37,13.1


In [121]:
# Retrieve the Item ID, Item Name, and Item Price columns
# Group by Item ID and Item Name. Perform calculations to obtain purchase count, item price, and total purchase value
# Create a summary data frame to hold the results
# Sort the purchase count column in descending order
items_df = pd.DataFrame ({"Item ID": purchase_data['Item ID'],
                         "Item Name": purchase_data["Item Name"],
                          "Item Price": purchase_data["Price"]
                         })
items_df

Unnamed: 0,Item ID,Item Name,Item Price
0,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,143,Frenzied Scimitar,1.56
2,92,Final Critic,4.88
3,100,Blindscythe,3.27
4,131,Fury,1.44
...,...,...,...
775,60,Wolf,3.54
776,164,Exiled Doomblade,1.63
777,67,"Celeste, Incarnation of the Corrupted",3.46
778,92,Final Critic,4.19


In [127]:
item_df_grouped = items_df.groupby (["Item ID","Item Name"])["Item Price"].sum ().nlargest(5)
item_df_grouped

Item ID  Item Name                                   
92       Final Critic                                    59.99
178      Oathbreaker, Last Hope of the Breaking Storm    50.76
82       Nirvana                                         44.10
145      Fiery Glass Crusader                            41.22
103      Singed Scalpel                                  34.80
Name: Item Price, dtype: float64

In [131]:
item_df_count1= items_df.loc[items_df ["Item ID"]==92].count()
item_df_count2= items_df.loc[items_df ["Item ID"]==178].count()
item_df_count3= items_df.loc[items_df ["Item ID"]==82].count()
item_df_count4= items_df.loc[items_df ["Item ID"]==145].count()
item_df_count5= items_df.loc[items_df ["Item ID"]==103].count()
print (item_df_count1)
print (item_df_count2)
print (item_df_count3)
print (item_df_count4)
print (item_df_count5)

Item ID       13
Item Name     13
Item Price    13
dtype: int64
Item ID       12
Item Name     12
Item Price    12
dtype: int64
Item ID       9
Item Name     9
Item Price    9
dtype: int64
Item ID       9
Item Name     9
Item Price    9
dtype: int64
Item ID       8
Item Name     8
Item Price    8
dtype: int64


In [134]:
item_df_price1= items_df.loc[items_df ["Item ID"]==92]
item_df_price2= items_df.loc[items_df ["Item ID"]==178]
item_df_price3= items_df.loc[items_df ["Item ID"]==82]
item_df_price4= items_df.loc[items_df ["Item ID"]==145]
item_df_price5= items_df.loc[items_df ["Item ID"]==103]
print (item_df_price1)
print (item_df_price2)
print (item_df_price3)
print (item_df_price4)
print (item_df_price5)

     Item ID     Item Name  Item Price
2         92  Final Critic        4.88
99        92  Final Critic        4.19
252       92  Final Critic        4.88
273       92  Final Critic        4.88
277       92  Final Critic        4.88
411       92  Final Critic        4.19
536       92  Final Critic        4.19
712       92  Final Critic        4.88
722       92  Final Critic        4.88
767       92  Final Critic        4.88
768       92  Final Critic        4.88
774       92  Final Critic        4.19
778       92  Final Critic        4.19
     Item ID                                     Item Name  Item Price
25       178  Oathbreaker, Last Hope of the Breaking Storm        4.23
61       178  Oathbreaker, Last Hope of the Breaking Storm        4.23
62       178  Oathbreaker, Last Hope of the Breaking Storm        4.23
72       178  Oathbreaker, Last Hope of the Breaking Storm        4.23
222      178  Oathbreaker, Last Hope of the Breaking Storm        4.23
464      178  Oathbreaker, L

In [4]:
top_items_df = pd.DataFrame ({
    "Item ID": [92, 178,82,145,103],
    "Item Name": ["Final Critic", "Oathbreaker, Last Hope of the Breaking Storm",
                 "Nirvana", "Fiery Glass Crusader", "Singed Scalpel"],
    "Purchase Count": [13,12,9,9,8],
    "Purchase Price": [4.88, 4.23,4.9, 4.58, 4.35],
    "Total Purchase Value": [59.99,50.76, 44.10, 41.22,34.80]
})
top_items_df

Unnamed: 0,Item ID,Item Name,Purchase Count,Purchase Price,Total Purchase Value
0,92,Final Critic,13,4.88,59.99
1,178,"Oathbreaker, Last Hope of the Breaking Storm",12,4.23,50.76
2,82,Nirvana,9,4.9,44.1
3,145,Fiery Glass Crusader,9,4.58,41.22
4,103,Singed Scalpel,8,4.35,34.8
