### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [None]:
# Julie Baker
# Heroes of Pymoli
# June 2021

In [58]:
# Dependencies and Setup --> need to make sure it's on my 3.8.5 to get to work right now
import pandas as pd
import os
import csv

# File to Load (Remember to Change These)
file_to_load = os.path.join(os.getcwd(), "Resources", "purchase_data.csv")

# Read Purchasing File and store into Pandas data frame
purchase_df = pd.read_csv(file_to_load)

In [59]:
# just looking at data
purchase_df.head(5)

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


## Player Count

* Display the total number of players


In [60]:
# SN unique
user_count = len(purchase_df['SN'].unique())
print(user_count)

576


## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [61]:
## WORK ON FORMATTING IF TIME LATER ##
# ===================================
# unique items
item_count = purchase_df["Item ID"].nunique()
# average purchase price
avg_purchase = purchase_df.Price.mean()
#.. avg_purchase['Average Price'] = avg_purchase['Average Price'].map("${:,.2}".format)
# total number of purchases
total_purchases = len(purchase_df['Purchase ID'])
# total revenue
total_rev = purchase_df.Price.sum()

purchase_analysis_df = pd.DataFrame(data = [[user_count, item_count, avg_purchase, total_purchases, total_rev]], columns=["Unique Users", "Unique Items", "Average Price", "Total Purchases", "Total Revenue"])

### pd.options.display.float_format = '${:, .2f}'.format

purchase_analysis_df

Unnamed: 0,Unique Users,Unique Items,Average Price,Total Purchases,Total Revenue
0,576,179,3.050987,780,2379.77


In [62]:
# Renaming columns to remove spaces 
update_purchase_df = purchase_df.rename(columns={'Purchase ID':'PurchaseID', 'Item ID' : 'ItemID', 'Item Name' : 'ItemName'})

update_purchase_df

Unnamed: 0,PurchaseID,SN,Age,Gender,ItemID,ItemName,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44
...,...,...,...,...,...,...,...
775,775,Aethedru70,21,Female,60,Wolf,3.54
776,776,Iral74,21,Male,164,Exiled Doomblade,1.63
777,777,Yathecal72,20,Male,67,"Celeste, Incarnation of the Corrupted",3.46
778,778,Sisur91,7,Male,92,Final Critic,4.19


## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [334]:
# gender count
gender_fixed_df = update_purchase_df.groupby(['Gender'])['SN'].nunique()
gender_fixed_df


Gender
Female                    81
Male                     484
Other / Non-Disclosed     11
Name: SN, dtype: int64

In [338]:
#  purchase count by gender
gender_purch_df = update_purchase_df.groupby(['Gender'])['SN'].count()
gender_purch_df

Gender
Female                   113
Male                     652
Other / Non-Disclosed     15
Name: SN, dtype: int64

In [339]:
gender_compiling_df = pd.merge(gender_fixed_df, gender_purch_df, on='Gender')
gender_compiling_df

Unnamed: 0_level_0,SN_x,SN_y
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,81,113
Male,484,652
Other / Non-Disclosed,11,15


In [353]:
gender_compiling_df = gender_compiling_df.rename(columns={'SN_x': 'Player Count', 'SN_y': 'Purchase Count'})
gender_compiling_df

Unnamed: 0_level_0,Player Count,Purchase Count,Percentage of Players
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Female,81,113,14.0625
Male,484,652,84.027778
Other / Non-Disclosed,11,15,1.909722


In [354]:
gender_percent_df = gender_compiling_df['Percentage of Players'] = (gender_compiling_df['Player Count'] / user_count)*100
gender_percent_df

Gender
Female                   14.062500
Male                     84.027778
Other / Non-Disclosed     1.909722
Name: Player Count, dtype: float64

In [355]:
# gender demographics table
gender_demographics_df = pd.merge(gender_fixed_df, gender_percent_df, on='Gender')
gender_demographics_df = gender_demographics_df.rename(columns={'SN':'Player Count', 'Player Count':'Percent of Players'})
gender_demographics_df

Unnamed: 0_level_0,Player Count,Percent of Players
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,81,14.0625
Male,484,84.027778
Other / Non-Disclosed,11,1.909722


In [356]:
# average purchase price
gender_price_df = update_purchase_df.groupby(['Gender'])['Price'].mean()
gender_price_df

Gender
Female                   3.203009
Male                     3.017853
Other / Non-Disclosed    3.346000
Name: Price, dtype: float64

In [357]:
# total purchase value
gender_value_df = update_purchase_df.groupby(['Gender'])['Price'].sum()
gender_value_df

Gender
Female                    361.94
Male                     1967.64
Other / Non-Disclosed      50.19
Name: Price, dtype: float64

In [362]:
gender_compile_df2 = pd.merge(gender_compiling_df, gender_price_df, on='Gender')
gender_compile_df2

Unnamed: 0_level_0,Player Count,Purchase Count,Percentage of Players,Price
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,81,113,14.0625,3.203009
Male,484,652,84.027778,3.017853
Other / Non-Disclosed,11,15,1.909722,3.346


In [363]:
gender_compile_df3 = pd.merge(gender_compile_df2, gender_value_df, on='Gender')
gender_compile_df3

Unnamed: 0_level_0,Player Count,Purchase Count,Percentage of Players,Price_x,Price_y
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Female,81,113,14.0625,3.203009,361.94
Male,484,652,84.027778,3.017853,1967.64
Other / Non-Disclosed,11,15,1.909722,3.346,50.19


In [365]:
# rename columns
gender_compile_df3 = gender_compile_df3.rename(columns={'Price_x':'Average Purchase Price','Price_y':'Total Purchase Value'})
gender_compile_df3

Unnamed: 0_level_0,Player Count,Purchase Count,Percentage of Players,Average Purchase Price,Total Purchase Value
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Female,81,113,14.0625,3.203009,361.94
Male,484,652,84.027778,3.017853,1967.64
Other / Non-Disclosed,11,15,1.909722,3.346,50.19


In [367]:
# average purchase by person
gender_compile_df3['Average Purchase Total per Person'] = (gender_compile_df3['Total Purchase Value'] /gender_compile_df3['Player Count'])
gender_compile_df3

Unnamed: 0_level_0,Player Count,Purchase Count,Percentage of Players,Average Purchase Price,Total Purchase Value,Average Purchase Total per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Female,81,113,14.0625,3.203009,361.94,4.468395
Male,484,652,84.027778,3.017853,1967.64,4.065372
Other / Non-Disclosed,11,15,1.909722,3.346,50.19,4.562727


In [369]:
# ** PURCHASING ANALYSIS (GENDER) **
gender_results_df = gender_compile_df3.drop(columns=['Player Count','Percentage of Players'])
gender_results_df

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Average Purchase Total per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,3.203009,361.94,4.468395
Male,652,3.017853,1967.64,4.065372
Other / Non-Disclosed,15,3.346,50.19,4.562727


In [64]:
# Creating dataframes for each gender group because using the original dataframe doesn't take duplicate users into account

# Female users
female_users_df = update_purchase_df.loc[update_purchase_df["Gender"] == "Female", :]
# Male users
male_users_df = update_purchase_df.loc[update_purchase_df["Gender"] == "Male", :]
# Other users
other_users_df = update_purchase_df.loc[update_purchase_df["Gender"] == "Other / Non-Disclosed", :]

In [65]:
# female count
female_count = len(female_users_df.SN.unique())
# female percentage
female_percent = (female_count / user_count)*100

In [66]:
# male count & percentage
male_count = len(male_users_df.SN.unique())
male_percent = male_count / user_count*100

In [67]:
# other count & percentage
other_count = len(other_users_df.SN.unique())
other_percent = (other_count / user_count)*100

In [68]:
# Gender Demographics Output

gender_demo_df = pd.DataFrame({
    "Gender": ["Female", "Male", "Other / Not Disclosed", "Total Players"],
    "Player Count": [female_count, male_count, other_count, user_count],
    "Percent of Players": [female_percent, male_percent, other_percent, 100]
})

# gender_demo_df["Percent of Players"] = gender_demo_df["Percent of Players"].map('{:.2%f}')
# pd.options.display.float_format = '{:.2%}'.format FIGURE THIS OUT LATER, WAS MESSING UP STUFF LATER

#purchase_analysis_df = pd.DataFrame(data = [[user_count, item_count, avg_purchase, total_purchases, total_rev]], columns=["Unique Users", "Unique Items", "Average Price", "Total Purchases", "Total Revenue"])
gender_demo_df

Unnamed: 0,Gender,Player Count,Percent of Players
0,Female,81,14.0625
1,Male,484,84.027778
2,Other / Not Disclosed,11,1.909722
3,Total Players,576,100.0



## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [69]:
# purchase count by gender
female_purchases = female_users_df["PurchaseID"].count()
male_purchases = male_users_df["PurchaseID"].count()
other_purchases = other_users_df["PurchaseID"].count()

In [70]:
# average purchase price by gender
avg_price_female = female_users_df.Price.mean()
avg_price_male = male_users_df.Price.mean()
avg_price_other = other_users_df.Price.mean()

In [71]:
# total purchase value by gender
total_rev = purchase_df.Price.sum()
total_purchase_female = female_users_df.Price.sum()
total_purchase_male = male_users_df.Price.sum()
total_purchase_other = other_users_df.Price.sum()

In [72]:
# average purchase total per PERSON per GENDER
avg_purchase = purchase_df.Price.mean()
avg_total_females = total_purchase_female/female_count
avg_total_males = total_purchase_male/male_count
avg_total_other = total_purchase_other/other_count


In [73]:
## PURCHASING ANALYSIS (Gender)

gender_purchasing_df = pd.DataFrame({
    "Gender": ["Female", "Male", "Other / Not Disclosed"],
    "Player Count": [female_count, male_count, other_count],
    "Percent of Players": [female_percent, male_percent, other_percent],
    "Purchase Count": [female_purchases, male_purchases, other_purchases],
    "Average Purchase Price": [avg_price_female, avg_price_male, avg_price_other],
    "Total Purchase Value": [total_purchase_female, total_purchase_male, total_purchase_other],
    "Average Purchase Total per Person by Gender": [avg_total_females, avg_total_males, avg_total_other]
})
gender_purchasing_df

Unnamed: 0,Gender,Player Count,Percent of Players,Purchase Count,Average Purchase Price,Total Purchase Value,Average Purchase Total per Person by Gender
0,Female,81,14.0625,113,3.203009,361.94,4.468395
1,Male,484,84.027778,652,3.017853,1967.64,4.065372
2,Other / Not Disclosed,11,1.909722,15,3.346,50.19,4.562727


## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


In [259]:
# esatblish bins for ages
bins = [0, 9, 14, 19, 24, 29, 34, 39, 44, 49]
age_labels = ["under 10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40-44", "45-49"]

In [265]:
# use pd.cut() to categorize players using age bins
age_df = update_purchase_df
age_df["Age Group"] = pd.cut(age_df["Age"], bins, labels=age_labels, include_lowest=False)
age_df

Unnamed: 0,PurchaseID,SN,Age,Gender,ItemID,ItemName,Price,Age Group
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53,20-24
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56,40-44
2,2,Ithergue48,24,Male,92,Final Critic,4.88,20-24
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27,20-24
4,4,Iskosia90,23,Male,131,Fury,1.44,20-24
...,...,...,...,...,...,...,...,...
775,775,Aethedru70,21,Female,60,Wolf,3.54,20-24
776,776,Iral74,21,Male,164,Exiled Doomblade,1.63,20-24
777,777,Yathecal72,20,Male,67,"Celeste, Incarnation of the Corrupted",3.46,20-24
778,778,Sisur91,7,Male,92,Final Critic,4.19,under 10


In [299]:
# age demographics; counts members for each age group & drops duplicates based on 'SN'
age_counts_df2 = age_df.groupby('Age Group')['SN'].nunique()

In [301]:
# AGE ANALYSIS NEEDS:  group count, percent of players
# I couldn't get it to add my calculated column to the df above, so I created this one & merged them. AND THEN...it was totally fine with me adding a calculated column.
age_counts_df3 = age_df.groupby('Age Group')['SN'].nunique()
age_demos_df = pd.merge(age_counts_df2, age_counts_df3, on='Age Group')

In [302]:
# calculate percentage of players by age
age_demos_df['Percentage of Players'] = (age_demos_df['SN_x'] / user_count)*100

In [315]:
# renaming the columns I want to keep then dropping the column I don't need
age_demos_df = age_demos_df.rename(columns={'SN_x': 'Player Count', 'Percentage of Players': 'Percentage of Players'})
age_demos_df = age_demos_df.drop(columns='SN_y')
age_demos_df


Unnamed: 0_level_0,Player Count,Percentage of Players
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1
under 10,17,2.951389
10-14,22,3.819444
15-19,107,18.576389
20-24,258,44.791667
25-29,77,13.368056
30-34,52,9.027778
35-39,31,5.381944
40-44,11,1.909722
45-49,1,0.173611


In [None]:
## Building purchase analysis next

In [320]:
# purchase count
purchase_byAge_df = age_df.groupby('Age Group')['SN'].count()

In [321]:
# avg purchase price
avg_byAge_df = age_df.groupby('Age Group')['Price'].mean()

In [322]:
# total purchase value
total_byAge_df = age_df.groupby('Age Group')['Price'].sum()

In [314]:
# creating compiled df for results
age_results_df = pd.merge(purchase_byAge_df, avg_byAge_df, on='Age Group')
age_results_df = pd.merge(age_results_df, total_byAge_df, on='Age Group')
age_results_df = age_results_df.rename(columns={'SN': 'Purchase Count', 'Price_x': 'Average Purchase', 'Price_y':'Total Purchase Value'})
age_results_df

Unnamed: 0_level_0,Purchase Count,Average Purchase,Total Purchase Value
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
under 10,23,3.353478,77.13
10-14,28,2.956429,82.78
15-19,136,3.035956,412.89
20-24,365,3.052219,1114.06
25-29,101,2.90099,293.0
30-34,73,2.931507,214.0
35-39,41,3.601707,147.67
40-44,12,3.045,36.54
45-49,1,1.7,1.7


In [318]:
# adding the calculated column for average purchase total per person
age_results_df['Average Total per Person'] = (age_results_df['Total Purchase Value'] / age_demos_df['Player Count'])
age_results_df

Unnamed: 0_level_0,Purchase Count,Average Purchase,Total Purchase Value,Average Total per Person
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
under 10,23,3.353478,77.13,4.537059
10-14,28,2.956429,82.78,3.762727
15-19,136,3.035956,412.89,3.858785
20-24,365,3.052219,1114.06,4.318062
25-29,101,2.90099,293.0,3.805195
30-34,73,2.931507,214.0,4.115385
35-39,41,3.601707,147.67,4.763548
40-44,12,3.045,36.54,3.321818
45-49,1,1.7,1.7,1.7


## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [84]:
# Compile all summary data
print(grp_10_results_df)

                                           Age Group  Users in Group  \
0  248      Isursuir31
674         Aeral68
728   ...               4   

   Purchase Count  Average Purchase Price  Total Purchase Value  \
0               4                   2.765                 11.06   

   Average Purchase Total per Person by Age  
0                                     2.765  


In [85]:
# display Purchasing Analysis (Age)

## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [195]:
# Find Top 5 Spenders by total purchase value

# creating df for finding Top 5 Spenders
top_spenders_df = update_purchase_df[['SN','Price']]
top_spenders_df = top_spenders_df.groupby('SN')

{'Adairialis76': [467], 'Adastirin33': [142], 'Aeda94': [388], 'Aela59': [28], 'Aelaria33': [630], 'Aelastirin39': [218, 766], 'Aelidru27': [705], 'Aelin32': [52, 87, 584], 'Aelly27': [43, 428], 'Aellynun67': [286], 'Aellyria80': [746], 'Aelollo59': [203, 431], 'Aenarap34': [183], 'Aeral43': [263], 'Aeral68': [674], 'Aeral97': [167], 'Aeralria27': [32], 'Aeralstical35': [583], 'Aeri84': [303], 'Aerillorin70': [381], 'Aerithllora36': [274, 578], 'Aerithnucal56': [324], 'Aerithnuphos61': [517], 'Aerithriaphos45': [40], 'Aerithriaphos46': [236], 'Aesri53': [541], 'Aesty53': [124, 253], 'Aestysu37': [309, 416], 'Aesur96': [468], 'Aesurstilis64': [405], 'Aethedru70': [775], 'Aidai53': [219], 'Aidai61': [282], 'Aidai73': [720], 'Aidaillodeu39': [42, 146], 'Aidain51': [516], 'Aidaira26': [730], 'Aiduecal76': [97, 670], 'Aiduesu86': [429], 'Aillyriadru65': [523], 'Aillyrin83': [155], 'Aina42': [83, 363, 373], 'Aina43': [94], 'Airi27': [637], 'Aisur51': [184], 'Aisurdru79': [440, 618], 'Aisurri

In [202]:
# ** Total Spent **
# This new df contains the sum of the prices for each player
spend_compare_df = top_spenders_df.sum()

# This df saves the top 5 SPENDERS by sorting price as descending.
top5_spend_df = spend_compare_df.sort_values(['Price'],ascending=False).head(5)
top5_spend_df

Unnamed: 0_level_0,Price
SN,Unnamed: 1_level_1
Lisosia93,18.96
Idastidru52,15.45
Chamjask73,13.83
Iral74,13.62
Iskadarya95,13.1


In [203]:
## ** Number of Purchases **
# This df contains the number of transactions for each player
purchase_counts_df = top_spenders_df.count()
purchase_counts_df

Unnamed: 0_level_0,Price
SN,Unnamed: 1_level_1
Adairialis76,1
Adastirin33,1
Aeda94,1
Aela59,1
Aelaria33,1
...,...
Yathecal82,3
Yathedeu43,2
Yoishirrala98,1
Zhisrisu83,2


In [204]:
# Join top 5 spenders to their purchase counts via inner join
top5_hold_df = pd.merge(top5_spend_df, purchase_counts_df, on='SN')
top5_hold_df

Unnamed: 0_level_0,Price_x,Price_y
SN,Unnamed: 1_level_1,Unnamed: 2_level_1
Lisosia93,18.96,5
Idastidru52,15.45,4
Chamjask73,13.83,3
Iral74,13.62,4
Iskadarya95,13.1,3


In [205]:
# Renaming columns
top5_hold_df2 = top5_hold_df.rename(columns={'Price_x': 'Total Purchase Value', 'Price_y': 'Purchase Count'})
top5_hold_df2

Unnamed: 0_level_0,Total Purchase Value,Purchase Count
SN,Unnamed: 1_level_1,Unnamed: 2_level_1
Lisosia93,18.96,5
Idastidru52,15.45,4
Chamjask73,13.83,3
Iral74,13.62,4
Iskadarya95,13.1,3


In [250]:
# create a new column with calculated avg price per transaction
top5_hold_df2['Average Purchase Price'] = (top5_hold_df2['Total Purchase Value'] / top5_hold_df2['Purchase Count'])
top5_hold_df2

Unnamed: 0_level_0,Total Purchase Value,Purchase Count,Average Purchase Price
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,18.96,5,3.792
Idastidru52,15.45,4,3.8625
Chamjask73,13.83,3,4.61
Iral74,13.62,4,3.405
Iskadarya95,13.1,3,4.366667


In [208]:
## TOP SPENDERS ##
# Table should have: SN, purchase count, average purchase price, total purchase value
top5_final_df = top5_hold_df2[['Purchase Count', 'Average Purchase Price', 'Total Purchase Value']].head(5)
top5_final_df

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,3.792,18.96
Idastidru52,4,3.8625,15.45
Chamjask73,3,4.61,13.83
Iral74,4,3.405,13.62
Iskadarya95,3,4.366667,13.1


## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, average item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [212]:
# create df for items
item_df = update_purchase_df[['ItemID', 'ItemName', 'Price']]
item_df = item_df.groupby(['ItemID', 'ItemName'])

item_df.head()

Unnamed: 0,ItemID,ItemName,Price
0,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,143,Frenzied Scimitar,1.56
2,92,Final Critic,4.88
3,100,Blindscythe,3.27
4,131,Fury,1.44
...,...,...,...
764,113,Solitude's Reaver,4.07
765,130,Alpha,2.07
766,58,"Freak's Bite, Favor of Holy Might",4.14
777,67,"Celeste, Incarnation of the Corrupted",3.46


In [222]:
# purchase count sorted descending
item_counts_df = item_df.count()
item_counts_df = item_counts_df.sort_values(['Price'],ascending=False)
item_counts_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Price
ItemID,ItemName,Unnamed: 2_level_1
92,Final Critic,13
178,"Oathbreaker, Last Hope of the Breaking Storm",12
145,Fiery Glass Crusader,9
132,Persuasion,9
108,"Extraction, Quickblade Of Trembling Hands",9
...,...,...
42,The Decapitator,1
51,Endbringer,1
118,"Ghost Reaver, Longsword of Magic",1
104,Gladiator's Glaive,1


In [219]:
# average item price
item_avg_df = item_df.mean()
item_avg_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Price
ItemID,ItemName,Unnamed: 2_level_1
0,Splinter,1.2800
1,Crucifer,2.9425
2,Verdict,2.4800
3,Phantomlight,2.4900
4,Bloodlord's Fetish,1.7000
...,...,...
178,"Oathbreaker, Last Hope of the Breaking Storm",4.2300
179,"Wolf, Promise of the Moonwalker",4.4800
181,Reaper's Toll,1.6600
182,Toothpick,4.0300


In [215]:
# total purchase value
item_values_df = item_df.sum()
item_values_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Price
ItemID,ItemName,Unnamed: 2_level_1
0,Splinter,5.12
1,Crucifer,11.77
2,Verdict,14.88
3,Phantomlight,14.94
4,Bloodlord's Fetish,8.50
...,...,...
178,"Oathbreaker, Last Hope of the Breaking Storm",50.76
179,"Wolf, Promise of the Moonwalker",26.88
181,Reaper's Toll,8.30
182,Toothpick,12.09


In [236]:
# merge counts to avg
item_hold_df = pd.merge(item_counts_df, item_avg_df, on=['ItemID', 'ItemName'],how='outer')
item_hold_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price_x,Price_y
ItemID,ItemName,Unnamed: 2_level_1,Unnamed: 3_level_1
92,Final Critic,13,4.614615
178,"Oathbreaker, Last Hope of the Breaking Storm",12,4.23
145,Fiery Glass Crusader,9,4.58
132,Persuasion,9,3.221111
108,"Extraction, Quickblade Of Trembling Hands",9,3.53


In [238]:
# merge total values to others
item_hold2_df = pd.merge(item_hold_df, item_values_df, on=['ItemID', 'ItemName'],how='outer')
item_hold2_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Price_x,Price_y,Price
ItemID,ItemName,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,4.614615,59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,4.230000,50.76
145,Fiery Glass Crusader,9,4.580000,41.22
132,Persuasion,9,3.221111,28.99
108,"Extraction, Quickblade Of Trembling Hands",9,3.530000,31.77
...,...,...,...,...
42,The Decapitator,1,1.750000,1.75
51,Endbringer,1,4.660000,4.66
118,"Ghost Reaver, Longsword of Magic",1,2.170000,2.17
104,Gladiator's Glaive,1,1.930000,1.93


In [240]:
# rename columns
item_analysis_df = item_hold2_df.rename(columns={'Price_x': 'Purchase Count', 'Price_y': 'Item Price', 'Price': 'Total Purchase Value'})
item_analysis_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
ItemID,ItemName,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,4.614615,59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,4.230000,50.76
145,Fiery Glass Crusader,9,4.580000,41.22
132,Persuasion,9,3.221111,28.99
108,"Extraction, Quickblade Of Trembling Hands",9,3.530000,31.77
...,...,...,...,...
42,The Decapitator,1,1.750000,1.75
51,Endbringer,1,4.660000,4.66
118,"Ghost Reaver, Longsword of Magic",1,2.170000,2.17
104,Gladiator's Glaive,1,1.930000,1.93


In [243]:
# Most Popular Items Table
top5_items_df = item_analysis_df.head(5)
top5_items_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
ItemID,ItemName,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,4.614615,59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,4.23,50.76
145,Fiery Glass Crusader,9,4.58,41.22
132,Persuasion,9,3.221111,28.99
108,"Extraction, Quickblade Of Trembling Hands",9,3.53,31.77


## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame



In [247]:
# Resorting item_analysis_df by purchase value rather than item count
item_profit_df = item_analysis_df.sort_values(['Total Purchase Value'],ascending=False)
item_profit_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
ItemID,ItemName,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,4.614615,59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,4.230000,50.76
82,Nirvana,9,4.900000,44.10
145,Fiery Glass Crusader,9,4.580000,41.22
103,Singed Scalpel,8,4.350000,34.80
...,...,...,...,...
28,"Flux, Destroyer of Due Diligence",2,1.060000,2.12
126,Exiled Mithril Longsword,1,2.000000,2.00
125,Whistling Mithril Warblade,2,1.000000,2.00
104,Gladiator's Glaive,1,1.930000,1.93


In [249]:
# Table for Most Profitable Items
top5_profit_df = item_profit_df.head(5)
top5_profit_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
ItemID,ItemName,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,4.614615,59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,4.23,50.76
82,Nirvana,9,4.9,44.1
145,Fiery Glass Crusader,9,4.58,41.22
103,Singed Scalpel,8,4.35,34.8
