### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [21]:
# Dependencies and Setup
import pandas as pd
import os


# File to Load (Remember to Change These)
file_to_load = ("Resources/purchase_data.csv")

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)

In [26]:
purchase_data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [23]:
purchase_data.count()

Purchase ID    780
SN             780
Age            780
Gender         780
Item ID        780
Item Name      780
Price          780
dtype: int64

In [24]:
purchase_data.dtypes

Purchase ID      int64
SN              object
Age              int64
Gender          object
Item ID          int64
Item Name       object
Price          float64
dtype: object

In [25]:
purchase_data["SN"].value_counts()

Lisosia93       5
Idastidru52     4
Iral74          4
Siallylis44     3
Hiaral50        3
               ..
Saerallora71    1
Aeral97         1
Frichosia58     1
Undimsya85      1
Malon70         1
Name: SN, Length: 576, dtype: int64

In [6]:
players_count = len(purchase_data["SN"].unique())
players_count

576

In [7]:
summary_player = pd.DataFrame({"Total Players": [players_count]})
summary_player

Unnamed: 0,Total Players
0,576


## Player Count

* Display the total number of players


In [8]:
players_count = len(purchase_data["SN"].unique())
players_count 

576

In [9]:
summary_player = pd.DataFrame({"Total Players": [players_count]})
summary_player

Unnamed: 0,Total Players
0,576


## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [10]:
number_items = len(purchase_data["Item Name"].unique())
number_items

179

In [11]:
average_df = purchase_data["Price"].mean()
average_df

3.050987179487176

In [12]:
purchase_number = purchase_data["Purchase ID"].count()
purchase_number

780

In [13]:
revenue_suma = purchase_data["Price"].sum()
revenue_suma

2379.77

In [14]:
summary_rev = pd.DataFrame({"Number of Unique Items": [number_items],"Average Price": average_df,
                              "Number of Purchase": purchase_number,
                              "Total Revenue": revenue_suma})
summary_rev.style.format({"Average Price": "${:.2f}", "Total Revenue": "${:,.2f}"})

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchase,Total Revenue
0,179,$3.05,780,"$2,379.77"


## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [46]:
gendertable = purchase_data[['Gender','SN','Price']]
gendertable.head()

Unnamed: 0,Gender,SN,Price
0,Male,Lisim78,3.53
1,Male,Lisovynya38,1.56
2,Male,Ithergue48,4.88
3,Male,Chamassasya86,3.27
4,Male,Iskosia90,1.44


In [50]:
gendertable['Gender'].value_counts()

Male                     652
Female                   113
Other / Non-Disclosed     15
Name: Gender, dtype: int64

In [47]:
players_number = len(gendertable["SN"].unique())
players_number

576

In [68]:
male_players = gendertable[gendertable['Gender'] == 'Male']["SN"].nunique()
male_percentage = male_players/players_number*100

male_percentage

84.02777777777779

In [59]:
female_players = gendertable[gendertable['Gender'] == 'Female']["SN"].nunique()
female_percentage = female_players/players_number*100


In [62]:
other_players = gendertable[gendertable['Gender'] == 'Other / Non-Disclosed']["SN"].nunique()
other_percentage = other_players/players_number*100


In [89]:
gen_demographics = pd.DataFrame({"Gender":["Male", "Female", "Other / Non-Disclose"],"Total Count":[male_players, female_players, other_players], 
                                 "Percentage of Players": [male_percentage, female_percentage, other_percentage]}) 

gen_demographics = gen_demographics.set_index('Gender')
gen_demographics["Percentage of Players"] = gen_demographics["Percentage of Players"].map ("{:.2f}%".format)
gen_demographics.head()

Unnamed: 0_level_0,Total Count,Percentage of Players
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Male,484,84.03%
Female,81,14.06%
Other / Non-Disclose,11,1.91%


In [82]:

gender_unique = gendertable['Gender'].unique()
players_number = len(gendertable["SN"].unique())
# HERE YOU CAN DO column_names = ["Gender", "Total Count", "Percentage of Players"]
# Replace columns = with column names 
# Then in your for loop you can use zip to do the rest. So something like zip(columns_names, list(x, y, z))
gen_demographics = pd.DataFrame(columns = ["Gender", "Total Count", "Percentage of Players"])
for Gender in gender_unique:
    
    players = gendertable[gendertable['Gender'] == Gender]["SN"].nunique()
    percentage = players/players_number*100
    data = {'Gender': Gender, 'Total Count': players, 'Percentage of Players': percentage}
    gen_demographics = gen_demographics.append(data, ignore_index =True)
    
print(gen_demographics)

                  Gender Total Count  Percentage of Players
0                   Male         484              84.027778
1  Other / Non-Disclosed          11               1.909722
2                 Female          81              14.062500



## Purchasing Analysis (Gender)


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [107]:
male_purchase = gendertable[gendertable['Gender'] == 'Male']["Price"].count()
male_average = gendertable[gendertable['Gender'] == 'Male']["Price"].mean()
male_total = gendertable[gendertable['Gender'] == 'Male']["Price"].sum()
male_per = male_total/male_players
male_per

4.065371900826446

In [108]:
female_purchase = gendertable[gendertable['Gender'] == 'Female']["Price"].count()
female_average = gendertable[gendertable['Gender'] == 'Female']["Price"].mean()
female_total = gendertable[gendertable['Gender'] == 'Female']["Price"].sum()
female_per = female_total/female_players
female_per

4.468395061728395

In [109]:
other_purchase = gendertable[gendertable['Gender'] == 'Other / Non-Disclosed']["Price"].count()
other_average = gendertable[gendertable['Gender'] == 'Other / Non-Disclosed']["Price"].mean()
other_total = gendertable[gendertable['Gender'] == 'Other / Non-Disclosed']["Price"].sum()
other_per = other_total/other_players
other_per

4.5627272727272725

## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame

