In [1]:
# Dependencies
import pandas as pd

In [2]:
# Save file path to variable
purchase_data_csv = 'Resources/purchase_data.csv'

In [3]:
# Read with Pandas
heroes_df = pd.read_csv(purchase_data_csv)
heroes_df.head()


Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [4]:
# Player Count: Total Number of Players
player_demo_df = heroes_df.loc[:,['SN', 'Age','Gender']]
player_demo_df = player_demo_df.drop_duplicates()
total_players = player_demo_df.count()[0]
total_players_dict = [{"Total Players":total_players}]
total_players_df = pd.DataFrame(total_players_dict)
total_players_df

Unnamed: 0,Total Players
0,576


In [5]:
# Purchasing Analysis (Total)

# Number of Unique Items
items = heroes_df['Item Name'].nunique()

# Total Number of Purchases
total_number_purchases = heroes_df['Purchase ID'].count()

# Total Revenue
total_revenue = heroes_df['Price'].sum()

# Average Purchase Price
average_purchase_price = total_revenue/total_number_purchases

# organize and format the output
purchase_analysis_list = [{"Number of Unique Items": items,
                           "Average Price": average_purchase_price,
                           "Number of Purchases": total_number_purchases,
                            "Total Revenue": total_revenue }]
purchase_analysis_df = pd.DataFrame(purchase_analysis_list)
purchase_analysis_df = pd.DataFrame(purchase_analysis_list)
purchase_analysis_df['Average Price'] = purchase_analysis_df['Average Price'].astype(float).map("${:,.2f}".format)
purchase_analysis_df['Total Revenue'] = purchase_analysis_df['Total Revenue'].astype(float).map("${:,.2f}".format)

purchase_analysis_df

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,$3.05,780,"$2,379.77"


In [6]:
# Gender Demographics
gender_summary = player_demo_df['Gender'].value_counts()

gender_percent = gender_summary/total_players*100

gender_summary_df = pd.DataFrame({"Total Count":gender_summary,
                                 "Percent of Players":gender_percent})

gender_summary_df['Percent of Players'] = gender_summary_df['Percent of Players'].astype(float).map("{0:.2f}%".format)

gender_summary_df


Unnamed: 0,Total Count,Percent of Players
Male,484,84.03%
Female,81,14.06%
Other / Non-Disclosed,11,1.91%


In [62]:
# Purchasing Analysis (Gender)
# the below, each broken by gender
gender_df = heroes_df.loc[:,['Purchase ID','SN', 'Age','Gender', 'Price']]

#group by
gender_group = heroes_df.groupby(['Gender'])
gender_purchases_count = gender_df.groupby(["Gender"])

# Purchase Count: 
gender_purchases = gender_purchases_count.count()['Purchase ID']  

## Total Purchase Value
total_purchase_value = gender_df.groupby(["Gender"]).sum()['Price']


## Average Purchase Price
average_purchase_price = gender_df.groupby(['Gender']).mean()['Price']

## Average Purchase Total Per Person By Gender
average_person_purchase = total_purchase_value / gender_purchases


# Add to the summary dataframe 
purchase_by_gender_dict = {'Purchase Count':gender_purchases,
                          'Average Purchase Price':average_purchase_price,
                          'Total Purchase Value': total_purchase_value,
                          'Average Purchase Total per Person by Gender': average_person_purchase}



purchase_by_gender_summary_df = pd.DataFrame(purchase_by_gender_dict)

purchase_by_gender_summary_df['Average Purchase Price'] = purchase_by_gender_summary_df['Average Purchase Price'].astype(float).map("${:,.2f}".format)
purchase_by_gender_summary_df['Total Purchase Value'] = purchase_by_gender_summary_df['Total Purchase Value'].astype(float).map("${:,.2f}".format)
purchase_by_gender_summary_df['Average Purchase Total per Person by Gender'] = purchase_by_gender_summary_df['Average Purchase Total per Person by Gender'].astype(float).map("${:,.2f}".format)

purchase_by_gender_summary_df

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Average Purchase Total per Person by Gender
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,$3.20,$361.94,$3.20
Male,652,$3.02,"$1,967.64",$3.02
Other / Non-Disclosed,15,$3.35,$50.19,$3.35


In [None]:

# DELETE ME?
unique_count = heroes_df["SN"].nunique()
print(unique_count)
#unique-count = gender_analysis_df["SN"].
#print(unique-count)
# Purchase Count 
#gender_purchase_count = heroes_df.groupby(['Gender']).count()
#gender_purchase_count
#gender_comparison_df

In [58]:
# Age Demographics
# The below, each broken into bins of 4 years (i.i < 10, 10-14, 15-19, etc.)

bins = [0,10,15,20,25,30,35,40,45, 50]  #note to self, max age is 45

group_names = ["under 10","10-14","15-19","20-24", "25-29","30-34","35-39","40-44","45-49"]

heroes_df["Age Groups"] = pd.cut(heroes_df["Age"], bins, labels=group_names, include_lowest=True)
heroes_df



# Purchase Count
age_purchase_count =  heroes_df.groupby(['Age Groups']).count()['Price']
age_purchase_count

# Average Purchase Price
age_average_purch_price = heroes_df.groupby(['Age Groups']).mean()['Price']
age_average_purch_price

# Total Purchase Value
age_total_purchase_value = heroes_df.groupby(['Age Groups']).sum()['Price']
age_total_purchase_value

## Average Purchase Total Per Person By Age Group
age_average_person_purchase = tage_total_purchase_value / age_purchase_count

age_summary_dict = {"Age Group":['under 10','10-14','15-19','20-24','25-29','30-34','35-39','40-44','45-49'],
                    "Purchase Count": [under_10, age_10_14, age_15_19,age_20_24,age_25_29,age_30_34,age_35_39,age_40_44,age_45_50],
                   "Average Purchase Price":age_average_purch_price,
                   "Total Purchase Value": age_total_purchase_value,
                   "Aver":age_average_person_purchase}

age_summary_df = pd.DataFrame(age_summary_dict)
age_summary_df['Average Purchase Price'] = age_summary_df['Average Purchase Price'].astype(float).map("${:,.2f}".format)
age_summary_df['Total Purchase Value'] = age_summary_df['Total Purchase Value'].astype(float).map("${:,.2f}".format)

age_summary_df

Unnamed: 0_level_0,Age Group,Purchase Count,Average Purchase Price,Total Purchase Value
Age Groups,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
under 10,under 10,32,$3.40,$108.96
10-14,10-14,54,$2.90,$156.60
15-19,15-19,200,$3.11,$621.56
20-24,20-24,325,$3.02,$981.64
25-29,25-29,77,$2.88,$221.42
30-34,30-34,52,$2.99,$155.71
35-39,35-39,33,$3.40,$112.35
40-44,40-44,7,$3.08,$21.53
45-49,45-49,0,$nan,$0.00


In [37]:
# testing stuff... delete me
new_summary = age_demo_summary.stack(0)
new_summary


0  under 10     32
   10-14        54
   15-19       200
   20-24       325
   25-29        77
   30-34        52
   35-39        33
   40-44         7
   45-49         0
dtype: int64

In [38]:
#  Average Purchase Price

age_group = heroes_df.groupby("Age Groups")
age_group.count()


Unnamed: 0_level_0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
Age Groups,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
under 10,32,32,32,32,32,32,32
10-14,54,54,54,54,54,54,54
15-19,200,200,200,200,200,200,200
20-24,325,325,325,325,325,325,325
25-29,77,77,77,77,77,77,77
30-34,52,52,52,52,52,52,52
35-39,33,33,33,33,33,33,33
40-44,7,7,7,7,7,7,7
45-49,0,0,0,0,0,0,0


In [None]:
print(age_group["Price"].mean())
age_summary_df["Ave Purchase Price"] = age_group["Price"].mean()
age_summary_df

In [None]:
#  Total Purchase Value
print(age_group["Price"].sum())
some_variable = age_group["Price"].sum()[0]
print(f'this is some variable' + str(some_variable))
      

In [None]:
#some_variable.dtype()
age_summary_df.loc[:,"Total Purchase Value"] = some_variable  # I get 'NaN'
#age_summary_df = age_summary_df.assign("Total Purchase Value"= some_variable)
age_summary_df

In [None]:
#  Average Purchase Total per Person by Age Group

#  age_group["Ave Purchase Total per Person by Age Group"] = age_group["Total Purchase Value"]/
#  print(age_group["Price"].sum()/age_demo_summary[])

In [None]:
# Top Spenders
# Identify the top 5 spenders in the game by total purchase value, then list (in a table):
spenders_group = heroes_df.groupby("SN")
#spenders["SN","Amount Spent"] = spenders_group["Price"].sum
#spenders
print(spenders["Price"].sum())
#  SN (Screen Name)
#  Purchase Count
#  Average Purchase Price
#  Total Purchase Value

In [None]:
# Most Popular Items
# Identify the 5 most popular items by purchase count, then list (in a table):
#  Item ID
#  Item Name
#  Purchase Count
#  Item Price
#  Total Purchase Value

In [None]:
# Most Profitable Items
# Identify the 5 most profitable items by total purchase value, then list (in a table)
#  Item ID
#  Item Name
#  Purchase Count
#  Item Price
#  Total Purchase Value