### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [13]:
# Dependencies and Setup
import numpy
import pandas as pd

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data_df = pd.read_csv(file_to_load)
purchase_data_df

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44
...,...,...,...,...,...,...,...
775,775,Aethedru70,21,Female,60,Wolf,3.54
776,776,Iral74,21,Male,164,Exiled Doomblade,1.63
777,777,Yathecal72,20,Male,67,"Celeste, Incarnation of the Corrupted",3.46
778,778,Sisur91,7,Male,92,Final Critic,4.19


In [14]:
# Determine if player names appear more than once
purchase_data_df.duplicated(subset=['SN'])

0      False
1      False
2      False
3      False
4      False
       ...  
775    False
776     True
777    False
778    False
779     True
Length: 780, dtype: bool

## Player Count

* Display the total number of players


In [15]:
# Creating a dataframe that drops the duplicate names
no_duplicate_names_df = purchase_data_df.drop_duplicates(subset=['SN'])
no_duplicate_names_df["SN"].value_counts()

# Creating a player count by taking the length of the new list of players with dropped duplicates
total_players = len(no_duplicate_names_df["SN"].value_counts())
print(f"Total players: {total_players}")

Total players: 576


In [16]:
# See if there are duplicate item names (applies to cell below)
purchase_data_df.duplicated(subset=['Item Name'])

0      False
1      False
2      False
3      False
4      False
       ...  
775     True
776     True
777     True
778     True
779     True
Length: 780, dtype: bool

## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [17]:
# Creating a dataframe that drops the duplicate items
no_duplicate_items_df = purchase_data_df.drop_duplicates(subset=['Item Name'])
no_duplicate_items_df["Item Name"].value_counts()

# Creating an item count by taking the length of the new list of items with dropped duplicates
unique_items = len(no_duplicate_items_df["Item Name"].value_counts())
print(f"Unique items: {unique_items}")

Unique items: 179


In [18]:
# Number of purchases
purchase_data_df["Purchase ID"].value_counts()
num_purchases = len(purchase_data_df["Purchase ID"].value_counts())
print(f"Number of purchases: {num_purchases}")

Number of purchases: 780


In [19]:
# Total revenue
revenue = purchase_data_df["Price"].sum()
print(f"Total revenue: ${revenue}")

Total revenue: $2379.77


In [20]:
# Average price
avg_price = round(revenue/num_purchases, 2)
print(f"Average price: ${avg_price}")

Average price: $3.05


In [21]:
# Create a summary dataframe for purchases
purchasing_analysis_df = pd.DataFrame(
{"No. Unique Items": [unique_items],
 "No. Purchases": [num_purchases],
 "Total Revenue": [revenue],
 "Average Price": [avg_price]
})
purchasing_analysis_df

Unnamed: 0,No. Unique Items,No. Purchases,Total Revenue,Average Price
0,179,780,2379.77,3.05


## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [22]:
# Filter data so that only gender is in the DataFrame
gender_df = no_duplicate_names_df["Gender"]

In [23]:
# Gender count
Gender_count = no_duplicate_names_df["Gender"].value_counts()

# Gender total
Gender_total = Gender_count.sum()

# Gender percentages
Gender_percent = (Gender_count/Gender_total)*100

In [24]:
# Create a DataFrame for gender demographics
gender_df = pd.DataFrame ({"Total Count": Gender_count, 
                           "Percentage of Players": Gender_percent})

# Format the percentages so that it rounds to the second decimal place and adds a "%
gender_df["Percentage of Players"] = gender_df["Percentage of Players"].map("{:.2f}%".format)
gender_df

Unnamed: 0,Total Count,Percentage of Players
Male,484,84.03%
Female,81,14.06%
Other / Non-Disclosed,11,1.91%



## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [25]:
# Using GroupBy in order to separate the data into fields according to "Gender"
gender_purchases_df = purchase_data_df.groupby(['Gender'])
gender_purchases_df.count().head()

Unnamed: 0_level_0,Purchase ID,SN,Age,Item ID,Item Name,Price
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Female,113,113,113,113,113,113
Male,652,652,652,652,652,652
Other / Non-Disclosed,15,15,15,15,15,15


In [26]:
# Purchase count by gender
gender_purchase_count = gender_purchases_df["Purchase ID"].count()
gender_purchase_count

Gender
Female                   113
Male                     652
Other / Non-Disclosed     15
Name: Purchase ID, dtype: int64

In [27]:
# Total purchase value by gender
gender_total_purchases = gender_purchases_df["Price"].sum()
gender_total_purchases

Gender
Female                    361.94
Male                     1967.64
Other / Non-Disclosed      50.19
Name: Price, dtype: float64

In [28]:
# Average purchase price by gender
gender_avgprice = round(gender_total_purchases/gender_purchase_count, 2)
gender_avgprice

Gender
Female                   3.20
Male                     3.02
Other / Non-Disclosed    3.35
dtype: float64

In [77]:
# Assigning players a unique id to account for multiple purchases
person_totals_df = purchase_data_df.assign(id=(purchase_data_df['SN'] + '_').astype('category').cat.codes)

In [78]:
# Calculating price totals for each player based on unique id
price_per_person = person_totals_df.groupby(['Gender', 'id'])['Price'].sum()
price_per_person

Gender                 id 
Female                 1      4.48
                       20     8.64
                       30     3.54
                       35     3.45
                       38     4.48
                              ... 
Other / Non-Disclosed  329    1.33
                       370    5.28
                       441    2.22
                       477    6.91
                       500    4.75
Name: Price, Length: 576, dtype: float64

In [79]:
# Calculating number of purchases for each player by taking counts based on unique id
purchases_per_person = person_totals_df.groupby(['Gender', 'id'])['Purchase ID'].count()
purchases_per_person

Gender                 id 
Female                 1      1
                       20     2
                       30     1
                       35     1
                       38     1
                             ..
Other / Non-Disclosed  329    1
                       370    2
                       441    1
                       477    2
                       500    1
Name: Purchase ID, Length: 576, dtype: int64

In [73]:
# Calculating avg. total purchase per person
avg_tot_pers = price_per_person/purchases_per_person
avg_tot_pers

# NEXT: HOW TO GET THIS INTO THE SUMMARY DATAFRAME & MATCH UP TO PRIOR DATA!!!!

id   Gender
0    Male      2.280000
1    Female    4.480000
2    Male      4.910000
3    Male      4.320000
4    Male      1.790000
                 ...   
571  Female    2.073333
572  Male      3.010000
573  Female    4.580000
574  Male      3.945000
575  Male      2.676667
Length: 576, dtype: float64

In [74]:
# Create a DataFrame for purchasing analysis by gender

gender_df = pd.DataFrame ({"Total Count": gender_purchase_count,
                           "Average Purchase Price": gender_avgprice,
                           "Total Purchase Value": gender_total_purchases})
gender_df

Unnamed: 0_level_0,Total Count,Average Purchase Price,Total Purchase Value
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Female,113,3.2,361.94
Male,652,3.02,1967.64
Other / Non-Disclosed,15,3.35,50.19


## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, average item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame

