### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [39]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
file = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame

main_df = pd.read_csv(file, skipinitialspace=True)
column_names = list(main_df.columns)
total_players = len(main_df['SN'].unique())
#print(total_players)
#print(column_names)
# column 0 = purchase ID, 1 = SN(in game name), 2 = age, 3 = gender, 4 = item id, 5 = item name, 6 = price
# may have to clean up header

# you want to use the original dataframe and not the new one
# this is looking at purchases not unique players so the filtered dataset would be incomplete
# counting with gender will get the purchase count
spender_df = main_df[["SN", "Price"]]

# Perform calculations to obtain purchase count, average item price, and total purchase value
num_spender = spender_df.groupby(["SN"])["SN"].count()
avg_spent = round(spender_df.groupby(["SN"])["Price"].mean(),2)
# get purchase count and then retrieve the item's price and multiply that to get the total purchase value 
total_spent = num_spender.multiply(avg_spent)

# collate here. item id and item name are a part of these counts and will be collated in the dataframe
spender_summary = {"Purchase Count": num_spender,
               "Average Purchase Price": avg_spent,
               "Total Purchase Value": total_spent}

# will require descending order so make sure to set ascending as false  
complete_spent_df = pd.DataFrame(spender_summary).sort_values(["Purchase Count"],ascending=False)

# display
complete_spent_df.head()

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,3.79,18.95
Iral74,4,3.4,13.6
Idastidru52,4,3.86,15.44
Asur53,3,2.48,7.44
Inguron55,3,3.7,11.1


## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [3]:
unique_items = len(main_df["Item ID"].unique())
print(unique_items)

#round to second decimal point here
average_revenue = round(main_df['Price'].mean(),2)
print(average_revenue)

num_of_purchases = len(main_df["Item ID"])
print(num_of_purchases)

# no need to round as prices only have 2 decimal places when added
total_revenue = main_df['Price'].sum()
print(total_revenue)

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,$3.05,780,"$2,379.77"


## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [8]:
# count how many times a repeating sn reappears
# place those names into a list 
# start a new dataframe with those list and the necessary columns
# make a column holding these value for purchase count 
# average and total those rows values 
# place those values into the new data frame

# groupby and count by sn 
# group by sn and sum up their purchases 
# divide total purchase by purchase count to get average purchase price 

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,$3.79,$18.96
Idastidru52,4,$3.86,$15.45
Chamjask73,3,$4.61,$13.83
Iral74,4,$3.40,$13.62
Iskadarya95,3,$4.37,$13.10


## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, average item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [35]:
# make a new dataframe with just item id, name and price
item_df = main_df[["Item ID", "Item Name", "Price"]]

# Perform calculations to obtain purchase count, average item price, and total purchase value
counts = item_df.groupby(["Item ID", "Item Name"])["Item Name"].count()
avg_item_price = round(item_df.groupby(["Item ID", "Item Name"])["Price"].mean(),2)
# get purchase count and then retrieve the item's price and multiply that to get the total purchase value 
total_item_price = counts.multiply(avg_item_price)


# collate here. item id and item name are a part of these counts and will be collated in the dataframe
item_summary = {"Purchase Count": counts,
               "Item Price": avg_item_price,
               "Total Purchase Value": total_item_price}

# will require descending order so make sure to set ascending as false  
complete_item_df = pd.DataFrame(item_summary).sort_values(["Purchase Count"],ascending=False)

# display
complete_item_df.head()


Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,4.61,59.93
178,"Oathbreaker, Last Hope of the Breaking Storm",12,4.23,50.76
145,Fiery Glass Crusader,9,4.58,41.22
132,Persuasion,9,3.22,28.98
108,"Extraction, Quickblade Of Trembling Hands",9,3.53,31.77


## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame



In [10]:
# sort the new dataframe by total purchase value 
# display the new data frame
most_profitable = complete_item_df.sort_values(["Total Purchase Value"], ascending = False)
most_profitable.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,$4.61,$59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
82,Nirvana,9,$4.90,$44.10
145,Fiery Glass Crusader,9,$4.58,$41.22
103,Singed Scalpel,8,$4.35,$34.80
