In [1]:
import pandas as pd

purchase_data_file = "purchase_data.csv"
purchase_data = pd.read_csv(purchase_data_file)

In [2]:
#Create a dataframe (purchase_data_df)
purchase_data_df = pd.DataFrame(purchase_data)
purchase_data_df

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44
...,...,...,...,...,...,...,...
775,775,Aethedru70,21,Female,60,Wolf,3.54
776,776,Iral74,21,Male,164,Exiled Doomblade,1.63
777,777,Yathecal72,20,Male,67,"Celeste, Incarnation of the Corrupted",3.46
778,778,Sisur91,7,Male,92,Final Critic,4.19


In [3]:
#Total number of players - Players made more than one purchase = duplicates values 
total_players = len(purchase_data_df['SN'].unique())
total_players

576

In [4]:
#Total number of unique items - Items purchased more than one time = duplicate values
total_items = len(purchase_data_df['Item Name'].unique())
print(total_items)

179


In [5]:
#Total number of purchases = row length of dataframe
total_purchases = len(purchase_data_df)
print(total_purchases)

780


In [6]:
#Total revenue = Sum of 'Price' column 
total_revenue = purchase_data_df['Price'].sum()
print(total_revenue)

#Average price paid = mean of 'Price' column
average_price = purchase_data_df['Price'].mean()
print(average_price)

2379.77
3.050987179487176


In [7]:
#Unique values in 'Gender' column
unique_gender_values = purchase_data_df['Gender'].unique()
unique_gender_values

array(['Male', 'Other / Non-Disclosed', 'Female'], dtype=object)

In [8]:
#Create a data frame ('gender_df') of only 'SN' (players) and 'Gender'
gender_df = purchase_data_df[['SN','Gender']]
gender_df['Gender'].astype('string')

#Removed duplicate values from dataframe 'gender_df'
gender_df = gender_df.drop_duplicates()

#Rename value 'Other / Non-Disclosed' to 'Other'
gender_df = gender_df.replace({"Other / Non-Disclosed": "Other"})
print(gender_df)

                SN  Gender
0          Lisim78    Male
1      Lisovynya38    Male
2       Ithergue48    Male
3    Chamassasya86    Male
4        Iskosia90    Male
..             ...     ...
773         Hala31    Male
774     Jiskjask80    Male
775     Aethedru70  Female
777     Yathecal72    Male
778        Sisur91    Male

[576 rows x 2 columns]


In [9]:
#Create a data frame of just 'Male' values by filtering out 'gender_df' by 'Male' 
columns = ['SN', 'Gender']
male_df = gender_df.loc[gender_df['Gender'] == "Male", columns]

#total number of male players by counting length of 'Male' dataframe 
total_male = len(male_df)
print(total_male)

484


In [10]:
#Create a data frame of just 'Female' values by filtering out 'gender_df' by 'Female' 
columns = ['SN', 'Gender']
female_df = gender_df.loc[gender_df['Gender'] == "Female", columns]

#total number of male players by counting length of 'Female' dataframe 
total_female = len(female_df)
print(total_female)

81


In [11]:
#Create a data frame of just 'Other' values by filtering out 'gender_df' by 'Other' 
columns = ['SN', 'Gender']
other_df = gender_df.loc[gender_df['Gender'] == "Other", columns]

#total number of male players by counting length of 'Other' dataframe 
total_other = len(other_df)
print(total_other)

11


In [12]:
#Percentages of player demographics
percent_male = total_male / total_players
percent_female = total_female / total_players
percent_other = total_other / total_players
print(percent_male)
print(percent_female)
print(percent_other)

0.8402777777777778
0.140625
0.019097222222222224


In [63]:
##'Purhcase Analysis (Gender)'
#Create a data frame using 'purchase_data_df', set index to 'Gender', & only keep 'Price' values
purchase_analysis_gender_df = purchase_data_df.set_index('Gender')
price_analysis_gender_df = purchase_analysis_gender_df[['Price']]
price_analysis_gender_df

Unnamed: 0_level_0,Price
Gender,Unnamed: 1_level_1
Male,3.53
Male,1.56
Male,4.88
Male,3.27
Male,1.44
...,...
Female,3.54
Male,1.63
Male,3.46
Male,4.19


In [83]:
#create a 'Total Purchase Value' data frame
purchase_analysis_gender_group = price_analysis_gender_df.groupby(['Gender'])
purchase_analysis_gender_tpv_df = purchase_analysis_gender_group.sum()

#rename 'Price' column to 'Total Purchase Value'
purchase_analysis_gender_tpv_df=purchase_analysis_gender_tpv_df.rename(columns={"Price":"Total Purchase Value"})
purchase_analysis_gender_tpv_df=purchase_analysis_gender_tpv_df['Total Purchase Value'].map("${:.2f}".format)
purchase_analysis_gender_tpv_df

Gender
Female                    $361.94
Male                     $1967.64
Other / Non-Disclosed      $50.19
Name: Total Purchase Value, dtype: object

In [80]:
#create an 'Average Purhcase Price' data frame
purchase_analysis_gender_avg_df = purchase_analysis_gender_group.mean()

#rename 'Price' column to 'Average Purchase Price'
purchase_analysis_gender_avg_df=purchase_analysis_gender_avg_df.rename(columns={"Price":"Average Purchase Price"})
purchase_analysis_gender_avg_df=purchase_analysis_gender_avg_df['Average Purchase Price'].map("${:.2f}".format)
purchase_analysis_gender_avg_df

Gender
Female                   $3.20
Male                     $3.02
Other / Non-Disclosed    $3.35
Name: Average Purchase Price, dtype: object

In [66]:
#create a 'Total Count of Purchases' data frame
purchase_analysis_gender_tcp_df = purchase_analysis_gender_group.count()

#rename 'Price' column to 'Total Count of Purchases'
purchase_analysis_gender_tcp_df=purchase_analysis_gender_tcp_df.rename(columns={"Price":"Total Count of Purchases"})
purchase_analysis_gender_tcp_df

Unnamed: 0_level_0,Total Count of Purchases
Gender,Unnamed: 1_level_1
Female,113
Male,652
Other / Non-Disclosed,15


In [67]:
## *skipped* create a 'Average Total Purchase Per Person' data frame
#individual_purchase_gender_df = purchase_analysis_gender_df[['SN','Price']]
#individual_purchase_gender_df 

In [87]:
#merge data frames 'Total Purchase Value', 'Average Purchase Price', 'Total Count of Purchases', & 'Average Total Purchase per Person' to 'Purchase Analysis (Gender)' data frame - i.e. ('purchase_analysis_gender_df')
#original 'purchase_analysis_gender_df' data frame will be replaced
purchase_analysis_gender_tcp_df['Average Purchase Price'] = purchase_analysis_gender_avg_df
purchase_analysis_gender_tcp_df['Total Purchase Value'] = purchase_analysis_gender_tpv_df
purchase_analysis_gender_df = purchase_analysis_gender_tcp_df
purchase_analysis_gender_df


Unnamed: 0_level_0,Total Count of Purchases,Average Purchase Price,Total Purchase Value
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Female,113,$3.20,$361.94
Male,652,$3.02,$1967.64
Other / Non-Disclosed,15,$3.35,$50.19
