In [2]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)
purchase_data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [64]:
#Display the total number of players - unique values in the SN column
totalplayers = purchase_data["SN"].value_counts()
len(totalplayers) 
player_dict = [{"Total Players": len(totalplayers) }]
totalplayer_df = pd.DataFrame(player_dict)
totalplayer_df

totalplayers["Lisosia93"]
                              
new_df = purchase_data.groupby('Age')['Price'].mean()
new_df

Age
7     3.654444
8     3.246250
9     3.045000
10    3.536667
11    2.684286
12    2.633333
13    2.362500
14    3.455000
15    3.018571
16    3.018667
17    2.930909
18    3.162308
19    3.042609
20    3.174949
21    2.915161
22    2.955000
23    3.013881
24    3.137612
25    3.083051
26    2.870714
27    2.723000
28    1.690000
29    2.710000
30    3.152000
31    3.435714
32    2.836250
33    2.486429
34    2.458889
35    3.716429
36    2.532000
37    3.644286
38    3.794444
39    3.886667
40    2.785000
41    3.270000
42    3.930000
43    4.000000
44    2.680000
45    1.700000
Name: Price, dtype: float64

In [69]:
# Purchase Analysis (total)
# Run basic calculations to obtain number of unique items, average price, number of purchases, and total revenue
unique_items = len(purchase_data["Item ID"].value_counts())
avg_price = purchase_data["Price"].mean()
number_purchases = purchase_data["Purchase ID"].count()
total_revenue = purchase_data["Price"].sum()

# Create a summary data frame to hold the results

purchase_df = pd.DataFrame([{"Number of Unique Items": unique_items,
                "Average Price": avg_price,
                "Number of Purchases": number_purchases,
                "Total Revenue": total_revenue}])
purchase_df

# Give the displayed data cleaner formatting
# Use Map to format all the columns
purchase_df["Average Price"] = purchase_df["Average Price"].map("${:.2f}".format)
purchase_df["Total Revenue"] = purchase_df["Total Revenue"].map("${:,.2f}".format)

# Display the summary data frame
purchase_df

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,$3.05,780,"$2,379.77"


In [77]:

gender_SN_df = purchase_data.loc[:, ["Gender", "SN", "Age"]]
new_df=gender_SN_df.groupby('SN')["Gender"]

new_df.transform(lambda x: ','.join(x)).apply(lambda x: x[0]).value_counts()

M    652
F    113
O     15
Name: Gender, dtype: int64

In [81]:
# Gender Demographics: Percentage and Count of Male Players, Percentage and Count of Female Players
# Percentage and Count of Other / Non-Disclosed

#group_SN = purchase_data.groupby(['SN'])
#group_SN_df = pd.Dataframe(group_SN)
#group_gender_df = group_SN_df.groupby(['Gender'])
#print(group_SN)
#total = group_SN["Gender"].count()
#len(total)
#gender_counts = group_SN["Gender"].sum()
#gender_counts

# get just gender and SN, remove duplicates, create series for count and percent; note Age added for use later
gender_SN_df.groupby('SN').count()

gender_SN_df = purchase_data.loc[:, ["Gender", "SN", "Age"]]
gender_SN_unique_df = gender_SN_df.drop_duplicates()
gender_counts = gender_SN_unique_df["Gender"].value_counts()
gender_percents = gender_counts / gender_SN_unique_df["Gender"].count() * 100

# create the gender dataframe
gender_df = pd.DataFrame({
    "Total Count": gender_counts,
    "Percentage": gender_percents
})

# format the % column
gender_df["Percentage"] = gender_df["Percentage"].map("{:.2f}%".format)

# display the dataframe
gender_df


Unnamed: 0,Total Count,Percentage
Male,484,84.03%
Female,81,14.06%
Other / Non-Disclosed,11,1.91%


In [85]:
# Purchasing Analysis (Gender)
# Run basic calculations to obtain: 
# purchase count, avg. purchase price, total purchase value, and avg. purchase total per person, all by gender
#use groupby
#grouped_gender_df = purchase_data.groupby(["Gender"]).sum()
#grouped_gender_df
purchase_count = purchase_data.groupby(["Gender"]).count()["Purchase ID"]
#purchase_count
avg_purch_price = purchase_data.groupby(["Gender"]).mean()["Price"]
total_purch_value = purchase_data.groupby(["Gender"]).sum()["Price"]
avg_total_purch = total_purch_value / gender_counts

# Create a summary data frame to hold the results
summary_df = pd.DataFrame({"Purchase Count": purchase_count,
                          "Average Purchase Price": avg_purch_price,
                          "Total Purchase Value": total_purch_value,
                          "Total Purchase Avg. Per Person": avg_total_purch})
# Optional: give the displayed data cleaner formatting
# format the % column
summary_df["Average Purchase Price"] = summary_df["Average Purchase Price"].map("${:.2f}".format)
summary_df["Total Purchase Value"] = summary_df["Total Purchase Value"].map("${:.2f}".format)
summary_df["Total Purchase Avg. Per Person"] = summary_df["Total Purchase Avg. Per Person"].map("${:.2f}".format)

# Display the summary data frame
summary_df

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Total Purchase Avg. Per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,$3.20,$361.94,$4.47
Male,652,$3.02,$1967.64,$4.07
Other / Non-Disclosed,15,$3.35,$50.19,$4.56


In [91]:
# Age Demographics
# Establish bins for ages
bins = [0, 9, 14, 19, 24, 29, 34, 39, 200]

# Create labels for these bins
group_labels = ["<10", "10 to 14", "15 to 19", "20 to 24", "25 to 29", "30 to 34",
                "35 to 39", "40+"]

# Categorize the existing players using the age bins. Hint: use pd.cut()
#Create new column Age Ranges using pd.cut
gender_SN_unique_df["Age Ranges"] = pd.cut(gender_SN_unique_df["Age"], bins, labels=group_labels)
#age_group_df = gender_SN_unique_df.groupby("Age Ranges")
#age_group_df
#len(totalplayers)

# Calculate the numbers and percentages by age group
counts_by_group = gender_SN_unique_df["Age Ranges"].value_counts()
percent_by_group = counts_by_group / len(totalplayers) * 100

# Create a summary data frame to hold the results
agesummary_df = pd.DataFrame({"Total Count": counts_by_group,
                 "Percentage of Players": percent_by_group})

# Optional: round the percentage column to two decimal points
agesummary_df["Percentage of Players"] = agesummary_df["Percentage of Players"].map("{:.2f}%".format)

# Display Age Demographics Table (after sorting by index)
agesummary_df = agesummary_df.sort_index()
agesummary_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()


Unnamed: 0,Total Count,Percentage of Players
<10,17,2.95%
10 to 14,22,3.82%
15 to 19,107,18.58%
20 to 24,258,44.79%
25 to 29,77,13.37%
30 to 34,52,9.03%
35 to 39,31,5.38%
40+,12,2.08%


In [92]:
# Purchasing Analysis (Age)
# Bin the purchase_data data frame by age
bins = [0, 9, 14, 19, 24, 29, 34, 39, 200]

# Create labels for these bins
group_labels = ["<10", "10 to 14", "15 to 19", "20 to 24", "25 to 29", "30 to 34",
                "35 to 39", "40+"]

#use pd.cut to create Age Ranges
purchase_data["Age Ranges"] = pd.cut(purchase_data["Age"], bins, labels=group_labels)
#newage_group_df = purchase_data.groupby("Age Ranges")
#print(newage_group_df.sum())

#age_counts = newage_group_df["Age Ranges"]
#print(age_counts.count())

# Run basic calculations to obtain: 
# Purchase Count, Average Purchase Price, Total Purchase Value, Avg Total Purchase per Person
# use groupby
grouped_age = purchase_data.groupby(["Age Ranges"]).sum()
grouped_age
purchase_count_agerange = purchase_data.groupby(["Age Ranges"]).count()["Purchase ID"]
#purchase_count
avg_purch_price_agerange = purchase_data.groupby(["Age Ranges"]).mean()["Price"]
#avg_purch_price_agerange
total_purch_value_agerange = purchase_data.groupby(["Age Ranges"]).sum()["Price"]
#total_purch_value_agerange
avg_total_purch_agerange = total_purch_value_agerange / counts_by_group
#avg_total_purch_agerange

# Create a summary data frame to hold the results
agesummary2_df = pd.DataFrame({"Purchase Count": purchase_count_agerange,
                          "Average Purchase Price": avg_purch_price_agerange,
                          "Total Purchase Value": total_purch_value_agerange,
                          "Total Purchase Avg. Per Person": avg_total_purch_agerange})

# Optional: give the displayed data cleaner formatting
agesummary2_df["Average Purchase Price"] = agesummary2_df["Average Purchase Price"].map("${:.2f}".format)
agesummary2_df["Total Purchase Value"] = agesummary2_df["Total Purchase Value"].map("${:.2f}".format)
agesummary2_df["Total Purchase Avg. Per Person"] = agesummary2_df["Total Purchase Avg. Per Person"].map("${:.2f}".format)

# Display the summary data frame
agesummary2_df


Unnamed: 0,Purchase Count,Average Purchase Price,Total Purchase Value,Total Purchase Avg. Per Person
<10,23,$3.35,$77.13,$4.54
10 to 14,28,$2.96,$82.78,$3.76
15 to 19,136,$3.04,$412.89,$3.86
20 to 24,365,$3.05,$1114.06,$4.32
25 to 29,101,$2.90,$293.00,$3.81
30 to 34,73,$2.93,$214.00,$4.12
35 to 39,41,$3.60,$147.67,$4.76
40+,13,$2.94,$38.24,$3.19


In [94]:
user_total = purchase_data.groupby(["SN"]).sum()["Price"]
user_average = purchase_data.groupby(["SN"]).mean()["Price"]
user_count = purchase_data.groupby(["SN"]).count()["Price"]

user_data = pd.DataFrame({"Total Purchase Value": user_total,
                         "Average Purchase Value": user_average,
                         "Purchase Count": user_count})

user_sorted = user_data.sort_values("Total Purchase Value", ascending = False)

user_sorted["Total Purchase Value"] = user_sorted["Total Purchase Value"].map("${:.2f}".format)
user_sorted["Average Purchase Value"] = user_sorted["Average Purchase Value"].map("${:.2f}".format)



user_sorted.head(5)


Unnamed: 0_level_0,Total Purchase Value,Average Purchase Value,Purchase Count
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,$18.96,$3.79,5
Idastidru52,$15.45,$3.86,4
Chamjask73,$13.83,$4.61,3
Iral74,$13.62,$3.40,4
Iskadarya95,$13.10,$4.37,3


In [19]:
user_total = purchase_data.groupby(["SN"]).sum()["Price"]
user_average = purchase_data.groupby(["SN"]).mean()["Price"]
user_count = purchase_data.groupby(["SN"]).count()["Price"]

user_data = pd.DataFrame({"Total Purchase Value": user_total,
                         "Average Purchase Value": user_average,
                         "Purchase Count": user_count})

user_sorted = user_data.sort_values("Total Purchase Value", ascending = False)

user_sorted["Total Purchase Value"] = user_sorted["Total Purchase Value"].map("${:.2f}".format)
user_sorted["Average Purchase Value"] = user_sorted["Average Purchase Value"].map("${:.2f}".format)

user_sorted.head(5)
#user_total

Unnamed: 0_level_0,Total Purchase Value,Average Purchase Value,Purchase Count
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,$18.96,$3.79,5
Idastidru52,$15.45,$3.86,4
Chamjask73,$13.83,$4.61,3
Iral74,$13.62,$3.40,4
Iskadarya95,$13.10,$4.37,3


In [21]:
# Top Spenders
# Find top 5 spenders and create dataframe with:
# Index: SN; Columns: Purchase Count, Average Purchase Price, Total Purchase Value
brandnew_df = purchase_data.groupby(["SN"])
#print(brandnew_df.count())

totalperSN = brandnew_df.sum()[["Price"]]
type(brandnew_df)

#countperSN = brandnew_df.count()[["Price"]]
#countperSN

#avgperSN = brandnew_df.mean()[["Price"]]
#avgperSN

#final_df = pd.DataFrame({"Purchase Count": countperSN,
#                          "Average Purchase Price": avgperSN,
#                          "Total Purchase Value": totalperSN})

#final_df

#print(brandnew_df["SN"].count())

#brandnew_df[["SN", "Price"]].sum()

#bigspend_df = final_df.sort_values(by=["Price"], ascending=False)
#bigspend_df

#df = df_original.set_index("last_name") ------ set_index


# Create a summary data frame to hold the results
# Sort the total purchase value column in descending order
# Optional: give the displayed data cleaner formatting
# Display a preview of the summary data frame

pandas.core.groupby.generic.DataFrameGroupBy

In [95]:
# Most Popular Items
# Retrieve the Item ID, Item Name, and Item Price columns
item_df = purchase_data[["Item ID", "Item Name", "Price"]]

# Group by Item ID and Item Name. 
#Perform calculations to obtain purchase count, item price, and total purchase value
purchase_count = item_df.groupby(["Item ID", "Item Name"]).count()["Price"]
item_avg_price = item_df.groupby(["Item ID", "Item Name"]).mean()["Price"]
item_total_amount = item_df.groupby(["Item ID", "Item Name"]).sum()["Price"]

# Create a summary data frame to hold the results
price_df = pd.DataFrame({"Purchase Count": purchase_count,
                        "Item Price": item_avg_price,
                        "Total Purchase Value": item_total_amount })

# Sort the purchase count column in descending order
sorted_price_df = price_df.sort_values(["Purchase Count"], ascending=False)


# Optional: give the displayed data cleaner formatting
sorted_price_df["Item Price"] = sorted_price_df["Item Price"].map("${:.2f}".format)
sorted_price_df["Total Purchase Value"] = sorted_price_df["Total Purchase Value"].map("${:.2f}".format)

# Display a preview of the summary data frame
sorted_price_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,$4.61,$59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
145,Fiery Glass Crusader,9,$4.58,$41.22
132,Persuasion,9,$3.22,$28.99
108,"Extraction, Quickblade Of Trembling Hands",9,$3.53,$31.77


In [41]:
# Most Profitable Items
# Sort the above table by total purchase value in descending order
sorted_profit_df = price_df.sort_values(["Total Purchase Value"], ascending=False)

# Optional: give the displayed data cleaner formatting
sorted_profit_df["Item Price"] = sorted_profit_df["Item Price"].map("${:.2f}".format)
sorted_profit_df["Total Purchase Value"] = sorted_profit_df["Total Purchase Value"].map("${:.2f}".format)

# Display a preview of the data frame
sorted_profit_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,$4.61,$59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
82,Nirvana,9,$4.90,$44.10
145,Fiery Glass Crusader,9,$4.58,$41.22
103,Singed Scalpel,8,$4.35,$34.80
