In [None]:
 # Dependencies and Setup
import pandas as pd

In [None]:
# File to Load (Remember to Change These)
file_to_load = "purchase_data.csv"

In [None]:
# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)

purchase_data.head()

In [None]:
purchase_data = purchase_data.dropna(how="any")

# Player Count

In [None]:
#Display the total number of players
total_player_unique = purchase_data.groupby("SN")["SN"].nunique()
total_player = len(total_player_unique)
player_df = pd.DataFrame ({"Total Players": [total_player]})
player_df

# Purchasing Analysis

In [None]:
#Getting unique items
unique_items = purchase_data["Item ID"].unique()
unique_items_len = len(unique_items)

#Calculating the average price
average_price = purchase_data["Price"].mean()

#Calculating total number of pruchases
totalPurchases = purchase_data['Purchase ID'].count()

#Total Revenue
total_revenue = purchase_data["Price"].sum()

#Storing the results into a dataframe
purchasing_analysis_df = pd.DataFrame({"Number of Unique Items": [unique_items_len],
                                      "Average Price": [average_price],
                                      "Number of Purchases": [totalPurchases],
                                      "Total Revenue": [total_revenue]})

#Changing the format for the output
purchasing_analysis_df['Average Price'] = purchasing_analysis_df['Average Price'].map('${:.2f}'.format)
purchasing_analysis_df['Total Revenue'] = purchasing_analysis_df['Total Revenue'].map('${:.2f}'.format)

#Displaying the the dataframe to the screen
purchasing_analysis_df

# Gender Demographics

In [None]:
#Creating a copy of the dataframe to filter out unique players
gender_demo = purchase_data.drop_duplicates(["SN"])

#Percentage and Count Of Male Players
count_male = gender_demo.loc[gender_demo["Gender"] == "Male"]
total_male = len(count_male)
perc_male = (total_male/total_player)*100

#Count of Female Players
count_female = gender_demo.loc[gender_demo["Gender"] == "Female", :]
total_female = len(count_female)
perc_female = (total_female/total_player)*100

#Percentage and count of Other/Non-Disclosed
other_count = gender_demo.loc[gender_demo["Gender"] == "Other / Non-Disclosed", :]
total_other = len(other_count)
perc_other = (total_other/total_player)*100

#Storing the results into the dataframe
gender_demo = pd.DataFrame({"Total Count": [total_male,total_female, total_other],
                            "Percentage of Players": [perc_male, perc_female, perc_other]},
                           index = ["Males","Females", "Other/Non-Disclosed"])

#Changing the format for output
gender_demo['Percentage of Players'] = gender_demo['Percentage of Players'].map('{:.2f}%'.format)

#Displaying the results
gender_demo

# Purchasing Analysis(Gender)

In [None]:
#Getting the purchase count for each gender
pcount_female=  len(purchase_data.loc[purchase_data["Gender"] == "Female"])
pcount_male = len(purchase_data.loc[purchase_data["Gender"] == "Male"])
pcount_other =  len(purchase_data.loc[purchase_data["Gender"] == "Other / Non-Disclosed"])

#Defined a dataframe for the output
purch_ana = pd.DataFrame({"Purchase Count": [pcount_female, pcount_male, pcount_other],
                        "Gender": ["Female", "Male", "Other/Non-Disclosed"]})

#Setting Gender as the index for the dataframe
purch_ana = purch_ana.set_index("Gender")

#Creating a dataframe with the help of groupby function
grouped_df = purchase_data.groupby(['Gender'])

#Calculating the average and adding it to the dataframe
dfAvg = grouped_df['Price'].mean().reset_index()
AvgList = []
for i in  dfAvg['Price']:
    AvgList.append(i)
purch_ana['Average Purchase Price'] = AvgList

#Calculating the total price by gender and adding it into the dataframe
TotalPriceList = [] 
df = grouped_df['Price'].sum().reset_index()
for i in  df['Price']:
   TotalPriceList.append(i)
purch_ana['Total Purchase Value'] = TotalPriceList

#Calculating the average per person
purch_ana['Avg Total Purchase per Person'] = purch_ana['Total Purchase Value']/grouped_df['SN'].nunique()

#Changing the format for output
purch_ana['Average Purchase Price'] = purch_ana['Average Purchase Price'].map('${:.2f}'.format)
purch_ana['Total Purchase Value'] = purch_ana['Total Purchase Value'].map('${:.2f}'.format)
purch_ana['Avg Total Purchase per Person'] = purch_ana['Avg Total Purchase per Person'].map('${:.2f}'.format)

purch_ana

# Age Demographics

In [None]:
#Created a bin to store the age ranges
bins = [0, 9, 14, 19, 24, 29, 34, 39, 99]

#Created the group labels to make the cut against
group_labels = ["<10", "10-14", "15-19", 
                "20-24", "25-29", "30-34", "35-39", "40+"]

#Making a copy of the original dataframe to work on Analysis for this section
age_demo = purchase_data.copy()

#Removing the duplicates in the data
age_demo = age_demo.drop_duplicates(['SN'])

#Getting the values by binning
age_demo["Age Ranges"] = pd.cut(purchase_data["Age"], bins, labels=group_labels)
total_count_age = age_demo.groupby("Age Ranges")
total = total_count_age["SN"].count()

#Creating the dataframe for the output
age = pd.DataFrame({"Total Count": total})

#Calculating the Percentage
age["Percentage of Players"] = age["Total Count"]/age["Total Count"].sum() * 100

#Changing the format for output
age["Percentage of Players"] = age["Percentage of Players"].map('{:.2f}%'.format)

#Printing the dataframe
age

# Purchasing Analysis(Age)

In [None]:
#Creaating a new dataframe for the analysis in this section
age_ana = purchase_data.copy()

age_ana["Age Ranges"] = pd.cut(purchase_data["Age"], bins, labels=group_labels)
total_age_ana = age_ana.groupby('Age Ranges')
total_age_count = total_age_ana['SN'].count()

#Storing the value in the output dataframe
age_ana_df = pd.DataFrame({"Purchase Count": total_age_count})

#Calculating average price by this group
TotalAvgAge = [] 
df4 = total_age_ana['Price'].mean().reset_index()
for i in  df4['Price']:
    TotalAvgAge.append(i)
age_ana_df['Average Purchase Price'] =TotalAvgAge

#Calculating total price by this group
TotalPurchaseAge = [] 
df3 = total_age_ana['Price'].sum().reset_index()
for i in  df3['Price']:
    TotalPurchaseAge.append(i)
age_ana_df['Total Purchase Value'] =TotalPurchaseAge

#Calculating average per person
age_ana_df['Avg Total Purchase per Person'] = age_ana_df['Total Purchase Value']/age['Total Count']

#Changing the format for output
age_ana_df['Average Purchase Price'] = age_ana_df['Average Purchase Price'].map('${:.2f}'.format)
age_ana_df['Total Purchase Value'] = age_ana_df['Total Purchase Value'].map('${:.2f}'.format)
age_ana_df['Avg Total Purchase per Person'] = age_ana_df['Avg Total Purchase per Person'].map('${:.2f}'.format)

#Printing the result
age_ana_df

# Top Spenders

In [None]:
#Getting the purchase count  by age ranges
top_tot_ana = purchase_data['SN'].value_counts()

#Grouping the dataframe by SN
top = purchase_data.groupby(['SN'])

#Adding those values to the dataframe
top_ana = pd.DataFrame({"Purchase Count": top_tot_ana})

# #Calculating the average of purchase grouped by age ranges
avg_age = top['Price'].mean()

top_ana['Average Purchase Price'] = avg_age

# #Calculating total purchase value by grouped age ranges
price_sum = top['Price'].sum()
price_sum_list = []

top_ana['Total Purchase Value'] = price_sum

#Setting the index title for the output table
top_ana.index.name = 'SN'

# #Sorting the dataframe in descending order
top_ana_descen = top_ana.sort_values('Total Purchase Value',ascending=False)

#Changing the format for output
top_ana_descen['Average Purchase Price'] = top_ana_descen['Average Purchase Price'].map('${:.2f}'.format)
top_ana_descen['Total Purchase Value'] = top_ana_descen['Total Purchase Value'].map('${:.2f}'.format)

#Printing the output
top_ana_descen

# Most Popular Items

In [None]:
#Grouping the dataframe by item id and item name
item_id_groupby_df = purchase_data.groupby(['Item ID','Item Name'])

#Calculate the purchase count based on group
most_purch_count = item_id_groupby_df['Price'].count()

#Getting Item price
most_item_price = item_id_groupby_df['Price'].mean()

#Calculating the purchase value
most_purch_value = item_id_groupby_df['Price'].sum()

#Storing the results in dataframe
most_popular = pd.DataFrame({'Purchase Count': most_purch_count,
                             'Item Price': most_item_price,
                            'Total Purchase Value': most_purch_value})

#Changing the format for output
most_popular['Item Price'] = most_popular['Item Price'].map('${:.2f}'.format)
most_popular['Total Purchase Value'] = most_popular['Total Purchase Value'].map('${:.2f}'.format)

#Sorting the dataframe on the purchase count in descending order
most_popular = most_popular.sort_values('Purchase Count', ascending=False).head(5)

#Printing the result
most_popular

# Most Profitable Items

In [None]:
#Sorting the dataframe on the column total purchase in descending order
most_popular.sort_values(['Purchase Count', 'Total Purchase Value'], ascending=False).head(5)