In [1]:
import pandas as pd
import numpy as np


In [6]:
#Import file

file_path = "../Resources/purchase_data.csv"
#Read file 
purchase_data = pd.read_csv(file_path)
purchase_data.head()

 

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [3]:
#count number of player
total_players = len(purchase_data["SN"].unique())
 

summary_df = pd.DataFrame({"Total Unique Players": [total_players]})
summary_df                           

Unnamed: 0,Total Unique Players
0,576


In [5]:
purchase_data.dtypes

Purchase ID      int64
SN              object
Age              int64
Gender          object
Item ID          int64
Item Name       object
Price          float64
dtype: object

In [12]:
#Run basic calculations to obtain number of unique items, average price, etc.
unique_items = len(purchase_data["Item ID"].unique())
average_price = round(purchase_data["Price"].mean(),2)
average_age = round(purchase_data["Age"].mean(),0)
number_purchases = purchase_data["Item ID"].count()
revenue = purchase_data["Price"].sum()

#Create a summary data frame to hold the results
#Optional: give the displayed data cleaner formatting
purchasing_analysis = pd.DataFrame({"Number of Unique Items": [unique_items],
                             "Average Price": [average_price],
                              "Average Age": [average_age],
                              "Number Purchases": [number_purchases],
                              "Revenue": [revenue]})

 

 
#Display the summary data frame
purchasing_analysis

Unnamed: 0,Number of Unique Items,Average Price,Average Age,Number Purchases,Revenue
0,179,3.05,23.0,780,2379.77


In [None]:
 
#count female players
female_players = purchase_data.loc[purchase_data["Gender"] =="Female"]
total_female_players = len(female_players["SN"].unique())

#percent female players
percent_female_players = round(total_female_players/total_players *100, 0)

#count other players
other_players = purchase_data.loc[purchase_data["Gender"] =="Other / Non-Disclosed"]
total_other_players = len(other_players["SN"].unique())

#percent other players
percent_other_players = round(total_other_players/total_players *100,0)

#count male players
male_players = purchase_data.loc[purchase_data["Gender"] == "Male", :]
total_male_players = len(male_players["SN"].unique())

#percent male players
percent_male_players = round(total_male_players/total_players *100,0)


#create DataFrame

gender_demographics = pd.DataFrame(np.array([["Female", percent_female_players, total_female_players], 
                                             ["Male", percent_male_players, total_male_players], 
                                             ["Other", percent_other_players, total_other_players]]),
                                             columns=['Gender', 'Percent', 'Count'])

gender_demographics = gender_demographics.set_index('Gender')

gender_demographics

In [None]:
#establish bins
bins = [0, 15, 25, 40, 50]

# Create labels for these bins
group_labels = ["0-15", "16-25", "26-40", "40-50"]

#cut data and put in bins

purchase_data["Age Group"] = pd.cut(purchase_data["Age"], bins, labels=group_labels)

#count players
count_players = pd.DataFrame({"Total Count":purchase_data.groupby(["Age Group"])["SN"].count()}).reset_index()

#percent_players
percentages=round(count_players["Total Count"]/count_players["Total Count"].sum()*100, 0 )
count_players["Percentages"]=percentages
count_players = count_players.set_index('Age Group')
count_players






In [None]:

#establish bins
bins = [0, 15, 25, 40, 50]

# Create labels for these bins
group_labels = ["0-15", "16-25", "26-40", "40-50"]

#cut data and put in bins

purchase_data["Age Group"] = pd.cut(purchase_data["Age"], bins, labels=group_labels)

#purchase Count
purchase_count = pd.DataFrame({"Purchase Count": purchase_data.groupby(["Age Group"])["Purchase ID"].count()}).reset_index()

#purchase price
purchase_price = pd.DataFrame({"Purchase Price": purchase_data.groupby(["Age Group"])["Price"].mean()}).reset_index().round(2)

#purchase Value
purchase_value = pd.DataFrame({"Purchase Value": purchase_data.groupby(["Age Group"])["Price"].sum()})

#purchase_total
average_purchase_price = pd.DataFrame({"Total Price": purchase_data.groupby(["Age Group", "SN"])["Price"].sum()}).reset_index().round(2)
purchase_total = pd.DataFrame({"Average Purchase Price per Person":average_purchase_price.groupby(["Age Group"])["Total Price"].mean()}).reset_index().round(2)
age_purchase_analysis = purchase_count.merge(purchase_price, on="Age Group", how="left")

age_purchase_analysis = age_purchase_analysis.merge(purchase_total, on="Age Group", how="left")
age_purchase_analysis = age_purchase_analysis.merge(purchase_value, on="Age Group", how="left")
age_purchase_analysis = age_purchase_analysis.set_index('Age Group')
age_purchase_analysis

In [None]:
purchase_data.head()

#purchase count

purchase_count = pd.DataFrame({"Purchase Count": purchase_data.groupby(["SN"])["Purchase ID"].count()}).reset_index()

#average purchase price
average_purchase_price = pd.DataFrame({"Average Purchase Price": purchase_data.groupby(["SN"])["Price"].mean()}).reset_index().round(2)


#total purchase value
average_purchase_price_sum = pd.DataFrame({"Total Price": purchase_data.groupby(["SN"])["Price"].sum()}).reset_index()
total_purchase_value = pd.DataFrame({"Total Purchase Value":average_purchase_price_sum.groupby(["SN"])["Total Price"].mean()}).reset_index()


big_spenders = purchase_count.merge(average_purchase_price, on="SN", how="left")
big_spenders = big_spenders.merge(total_purchase_value, on="SN", how="left")
big_spenders = big_spenders.set_index('SN')
big_spenders = big_spenders.sort_values(["Total Purchase Value"], ascending=False)
big_spenders.head()

In [8]:

total_purchase_value = pd.DataFrame({"Total Purchase Value":purchase_data.groupby(["Item Name"])["Price"].sum()}).reset_index()

purchase_count = pd.DataFrame({"Purchase Count":purchase_data.groupby(["Item Name"])["Price"].count()}).reset_index()

item_price = pd.DataFrame({"Item Price":purchase_data.groupby(["Item Name"])["Price"].mean()}).reset_index()

most_popular = total_purchase_value.merge(purchase_count, on="Item Name", how="left")

most_popular = most_popular.merge(item_price, on="Item Name", how="left")
most_popular = most_popular.set_index('Item Name')

most_popular = most_popular.sort_values(["Purchase Count"], ascending=False)
most_popular.head()

Unnamed: 0_level_0,Total Purchase Value,Purchase Count,Item Price
Item Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Final Critic,59.99,13,4.614615
"Oathbreaker, Last Hope of the Breaking Storm",50.76,12,4.23
Persuasion,28.99,9,3.221111
Nirvana,44.1,9,4.9
"Extraction, Quickblade Of Trembling Hands",31.77,9,3.53


In [10]:
most_popular = most_popular.sort_values(["Total Purchase Value"], ascending=False)
most_popular

Unnamed: 0_level_0,Total Purchase Value,Purchase Count,Item Price
Item Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Final Critic,59.99,13,4.614615
"Oathbreaker, Last Hope of the Breaking Storm",50.76,12,4.230000
Nirvana,44.10,9,4.900000
Fiery Glass Crusader,41.22,9,4.580000
Singed Scalpel,34.80,8,4.350000
...,...,...,...
"Flux, Destroyer of Due Diligence",2.12,2,1.060000
Exiled Mithril Longsword,2.00,1,2.000000
Whistling Mithril Warblade,2.00,2,1.000000
Gladiator's Glaive,1.93,1,1.930000
