In [9]:
import pandas as pd

In [23]:
csv_path = "purchase_data.csv"
purchase_df = pd.read_csv(csv_path)
purchase_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [24]:
### Player Count

# Total Number of Players
total_player_count = purchase_df["SN"]
len(total_player_count.unique())

576

In [37]:
### Purchasing Analysis (Total)

# Number of Unique Items
# Average Purchase Price
# Total Number of Purchases
# Total Revenue
summary_df = pd.DataFrame(data=["Summary Analysis Data"])
summary_df["Number of Unique Items"] = len(purchase_df["Item Name"].unique())
summary_df["Average Purchase Price"] = round(purchase_df["Price"].mean(),2)
summary_df["Total Number of Purchases"] = purchase_df["Item ID"].count()
summary_df["Total Revenue"] = purchase_df["Price"].sum()


summary_df

Unnamed: 0,0,Number of Unique Items,Average Purchase Price,Total Number of Purchases,Total Revenue
0,Summary Analysis Data,179,3.05,780,2379.77


In [48]:
### Gender Demographics

# Percentage and Count of Male Players
male_column = purchase_df["Gender"]
male_count = male_column == "Male"
male_count_percent = (male_count.sum() / total_player_count)
"{:.2%}".format(male_count_percent)




'83.59%'

In [49]:
# Percentage and Count of Female Players
female_column = purchase_df["Gender"]
female_count = female_column == "Female"
female_count_percent = (female_count.sum() / total_player_count)
"{:.2%}".format(female_count_percent)

'14.49%'

In [50]:
# Percentage and Count of Other / Non-Disclosed
other = 1 - (male_count_percent + female_count_percent)
"{:.2%}".format(other)

'1.92%'

In [51]:
### Purchasing Analysis (Gender)

# The below each broken by gender
# Purchase Count
grouped_gender = purchase_df.groupby(['Gender'])
grouped_gender["Price"].count()
 
 


Gender
Female                   113
Male                     652
Other / Non-Disclosed     15
Name: Price, dtype: int64

In [52]:
 # Average Purchase Price
grouped_gender["Price"].mean()

Gender
Female                   3.203009
Male                     3.017853
Other / Non-Disclosed    3.346000
Name: Price, dtype: float64

In [53]:
 # Total Purchase Value
grouped_gender["Price"].sum()

Gender
Female                    361.94
Male                     1967.64
Other / Non-Disclosed      50.19
Name: Price, dtype: float64

In [54]:
  # Average Purchase Total per Person by Gender
avg_per_person_df = pd.DataFrame(data=[])
avg_per_person_df["Sum Price"] = grouped_gender["Price"].sum()
avg_per_person_df["Total by Gender"] = grouped_gender["Gender"].count()
avg_per_person_df["Per Gender"] = avg_per_person_df["Sum Price"] / avg_per_person_df["Total by Gender"]

avg_per_person_df

Unnamed: 0_level_0,Sum Price,Total by Gender,Per Gender
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Female,361.94,113,3.203009
Male,1967.64,652,3.017853
Other / Non-Disclosed,50.19,15,3.346


In [55]:
### Age Demographics

# The below each broken into bins of 4 years (i.e. &lt;10, 10-14, 15-19, etc.)
bins = [0, 10, 14, 19, 24, 29, 34, 39, 44, 100]
bin_names = ["10 & under", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40-44", "45 & older"]
purchase_df["Age Group"] = pd.cut(purchase_df["Age"], bins, labels=bin_names)

In [56]:
# Purchase Count
# Average Purchase Price
# Total Purchase Value
# Average Purchase Total per Person by Age Group

age_demo_df = purchase_df.groupby("Age Group")
avg_demo_per_person_df = pd.DataFrame(data=[])
avg_demo_per_person_df["Number of Items Purchased"] = age_demo_df["Item Name"].count()
avg_demo_per_person_df["Average Purchase Price"] = age_demo_df["Price"].mean()
avg_demo_per_person_df["User Count"] = age_demo_df["SN"].nunique()
avg_demo_per_person_df["Total Spend"] = age_demo_df["Price"].sum()
avg_demo_per_person_df["Spend per Person"] = avg_demo_per_person_df["Total Spend"] / avg_demo_per_person_df["User Count"]

avg_demo_per_person_df

Unnamed: 0_level_0,Number of Items Purchased,Average Purchase Price,User Count,Total Spend,Spend per Person
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
10 & under,32,3.405,24,108.96,4.54
10-14,19,2.681579,15,50.95,3.396667
15-19,136,3.035956,107,412.89,3.858785
20-24,365,3.052219,258,1114.06,4.318062
25-29,101,2.90099,77,293.0,3.805195
30-34,73,2.931507,52,214.0,4.115385
35-39,41,3.601707,31,147.67,4.763548
40-44,12,3.045,11,36.54,3.321818
45 & older,1,1.7,1,1.7,1.7


In [112]:
### Top Spenders

# Identify the the top 5 spenders in the game by total purchase value, then list (in a table):
  # SN
  # Purchase Count
  # Average Purchase Price
  # Total Purchase Value

group_spenders_df = purchase_df.groupby("SN")
top_spenders_df = pd.DataFrame(data=[])
top_spenders_df["Purchase Count"] = group_spenders_df["Item Name"].count()
top_spenders_df["Average Purchase Price"] = round(group_spenders_df["Price"].mean(),2)
top_spenders_df["Total Purchase Value"] = group_spenders_df["Price"].sum()

top_spenders_df.sort_values("Total Purchase Value", ascending=False).head()


Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,3.79,18.96
Idastidru52,4,3.86,15.45
Chamjask73,3,4.61,13.83
Iral74,4,3.4,13.62
Iskadarya95,3,4.37,13.1


In [146]:
### Most Popular Items

# Identify the 5 most popular items by purchase count, then list (in a table):
  # Item ID
  # Item Name
  # Purchase Count
  # Item Price
  # Total Purchase Value
    
group_item_df = purchase_df.groupby("Item ID")
popular_items_df = pd.DataFrame(data=[])
popular_items_df["Item Name"] = group_item_df["Item Name"].unique()
popular_items_df["Purchase Count"] = group_item_df["Item Name"].count()
popular_items_df["Item Price"] = group_item_df["Price"].unique()
popular_items_df["Total Purchase Value"] = group_item_df["Price"].sum()


popular_items_df.sort_values("Purchase Count", ascending=False).head()

Unnamed: 0_level_0,Item Name,Purchase Count,Item Price,Total Purchase Value
Item ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
178,"[Oathbreaker, Last Hope of the Breaking Storm]",12,[4.23],50.76
145,[Fiery Glass Crusader],9,[4.58],41.22
108,"[Extraction, Quickblade Of Trembling Hands]",9,[3.53],31.77
82,[Nirvana],9,[4.9],44.1
19,"[Pursuit, Cudgel of Necromancy]",8,[1.02],8.16


In [147]:
### Most Profitable Items

# Identify the 5 most profitable items by total purchase value, then list (in a table):
  # Item ID
  # Item Name
  # Purchase Count
  # Item Price
  # Total Purchase Value
    
popular_items_df.sort_values("Total Purchase Value", ascending=False).head()

Unnamed: 0_level_0,Item Name,Purchase Count,Item Price,Total Purchase Value
Item ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
178,"[Oathbreaker, Last Hope of the Breaking Storm]",12,[4.23],50.76
82,[Nirvana],9,[4.9],44.1
145,[Fiery Glass Crusader],9,[4.58],41.22
92,[Final Critic],8,[4.88],39.04
103,[Singed Scalpel],8,[4.35],34.8
