In [60]:
#Import dependencies
import pandas as pd

In [61]:
#Set file path to read csv
file_path = '../Resources/purchase_data.csv'

#Create dataframe from csv file
heroes_of_pymoli = pd.read_csv(file_path)
heroes_of_pymoli.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [62]:
#Find number of players
player_count = heroes_of_pymoli['SN'].nunique()
player_count

576

In [63]:
#Purchasing Analysis
#Find number of unique items
unique_items = heroes_of_pymoli['Item ID'].nunique()

#Find average price of items
average_price = heroes_of_pymoli['Price'].mean()

#Total number of purchases
total_purchases = heroes_of_pymoli['Item ID'].count()

#Total Revenue
total_revenue = heroes_of_pymoli['Price'].sum()


In [64]:
#Create summary dataframe to hold Purchasing Analysis 
purchasing_summary_df = pd.DataFrame({'Number of Unique Items': [unique_items], 'Average Price' : [average_price], 'Total Purchases' : [total_purchases], 'Total Revenue' : [total_revenue]})
purchasing_summary_df

Unnamed: 0,Number of Unique Items,Average Price,Total Purchases,Total Revenue
0,179,3.050987,780,2379.77


In [65]:
#Gender Demographics
#Calculate count and percentage of players per gender
gender_demo = heroes_of_pymoli.drop_duplicates(subset=['SN'])
count = gender_demo['Gender'].value_counts()
percentage = count/player_count *100

#create DataFrame to dispay summary of gender demographics
gender_demo_summary = pd.DataFrame({'Total Count': count, 'Percentage of Players': percentage})

#Display summary table
gender_demo_summary

Unnamed: 0,Total Count,Percentage of Players
Male,484,84.027778
Female,81,14.0625
Other / Non-Disclosed,11,1.909722


In [66]:
#Gender Purchasing Analysis
#Run calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender
gender_purchases = heroes_of_pymoli.groupby(['Gender']).count()['Purchase ID']
gender_avg_purchase = heroes_of_pymoli.groupby(['Gender']).mean()['Price']
gender_total_purchase = (gender_purchases * gender_avg_purchase)
gender_avg_per_person = (gender_total_purchase / count)

#Create a summary data frame to hold the results
gender_purchase_analysis = pd.DataFrame({'Purchase Count':gender_purchases, 'Average Purchase Price':gender_avg_purchase, 'Total Purchase Value':gender_total_purchase, 'Average Total Purchase Per Person':gender_avg_per_person})

#Optional: give the displayed data cleaner formatting
#Display the summary data frame
gender_purchase_analysis


Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Average Total Purchase Per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,3.203009,361.94,4.468395
Male,652,3.017853,1967.64,4.065372
Other / Non-Disclosed,15,3.346,50.19,4.562727


In [79]:
#Age Demographics
#Establish bins for ages
bins = [0, 9, 14, 19, 24, 29, 34, 39, 100]
group_names = ['<10', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39', '>40']

#Categorize the existing players using the age bins. Hint: use pd.cut()
age_demo = heroes_of_pymoli.drop_duplicates(subset=['SN'])
age_demo['Age Group'] = pd.cut(age_demo['Age'], bins, labels=group_names, include_lowest=True)

#Calculate the numbers and percentages by age group
count = age_demo['Age Group'].value_counts()
percentage = count/player_count *100
percentage

#Create a summary data frame to hold the results
age_demographics = pd.DataFrame({'Total Count': count, 'Percentage of Players': percentage})

#Optional: round the percentage column to two decimal points
#Display Age Demographics Table
age_demographics

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,Total Count,Percentage of Players
20-24,258,44.791667
15-19,107,18.576389
25-29,77,13.368056
30-34,52,9.027778
35-39,31,5.381944
10-14,22,3.819444
<10,17,2.951389
>40,12,2.083333


In [88]:
#Purchasing Analysis (Age)
#Bin the purchase_data data frame by age
bins = [0, 9, 14, 19, 24, 29, 34, 39, 100]
group_names = ['<10', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39', '>40']

age_group_purchases = heroes_of_pymoli
age_group_purchases['Age Group'] = pd.cut(age_group_purchases['Age'], bins, labels=group_names, include_lowest=True)

#Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below
age_purchases = age_group_purchases.groupby(['Age Group']).count()['Purchase ID']
age_avg_purchase = age_group_purchases.groupby(['Age Group']).mean()['Price']
age_total_purchase = (age_purchases * age_avg_purchase)
age_avg_per_person = (age_total_purchase / count)
age_avg_per_person

#Create a summary data frame to hold the results
age_group_analysis = pd.DataFrame({'Purchase Count':age_purchases, 'Average Purchase Price':age_avg_purchase, 'Total Purchase Value':age_total_purchase, 'Avg Total Purchase per Person':age_avg_per_person})

#Optional: give the displayed data cleaner formatting
#Display the summary data frame
age_group_analysis

Unnamed: 0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
<10,23,3.353478,77.13,4.537059
10-14,28,2.956429,82.78,3.762727
15-19,136,3.035956,412.89,3.858785
20-24,365,3.052219,1114.06,4.318062
25-29,101,2.90099,293.0,3.805195
30-34,73,2.931507,214.0,4.115385
35-39,41,3.601707,147.67,4.763548
>40,13,2.941538,38.24,3.186667


In [96]:
#Top Spenders
#Run basic calculations to obtain purchase count, average purchase price, and Total Purchase Price per SN
sn_purchases = heroes_of_pymoli.groupby('SN').count()['Purchase ID']
sn_avg_purchase = heroes_of_pymoli.groupby('SN').mean()['Price']
sn_total_purchase = sn_purchases * sn_avg_purchase

#Create a summary data frame to hold the results
sn_summary = pd.DataFrame({'Purchase Count':sn_purchases, 'Average Purchase Price':sn_avg_purchase, 'Total Purchase Value':sn_total_purchase})

#Sort the total purchase value column in descending order
sn_summary_sorted = sn_summary.sort_values('Total Purchase Value', ascending=False)
#Optional: give the displayed data cleaner formatting
#Display a preview of the summary data frame
sn_summary_sorted.head()

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,3.792,18.96
Idastidru52,4,3.8625,15.45
Chamjask73,3,4.61,13.83
Iral74,4,3.405,13.62
Iskadarya95,3,4.366667,13.1
