In [30]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)

In [38]:
#Viewing the head of the data
purchase_data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [39]:
#identify names of unique players
purchase_data.SN.unique()
#finding the sum of unique players
total=len(purchase_data.SN.unique())

#Summary table
summary_total_players = pd.DataFrame({"Total Players":[total]})
summary_total_players


Unnamed: 0,Total Players
0,576


In [40]:
#Finding total of unique items
purchase_data["Item ID"].unique()
No_Unique_Items=len(purchase_data["Item ID"].unique())

#finding average price and roundoff to 2 decimal places
Average_price=round(purchase_data["Price"].mean(),2)

#Number of purchases
No_of_purchases=len(purchase_data)

#Total Revenue
Total_Revenue=round(purchase_data["Price"].sum(),2)

#Summary table
purchasing_analysis=pd.DataFrame({"Number of Unique Items": [No_Unique_Items], "Average Price":[Average_price], "Number of Purchases": [No_of_purchases], "Total Revenue": [Total_Revenue]})
purchasing_analysis

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,3.05,780,2379.77


In [41]:
#Use groupby to group data into gender while counting unique values 
genders=purchase_data.groupby('Gender')['SN'].nunique().reset_index()

#Finding the percentage for each gender
genders['Percentage']=round(100*genders['SN']/genders['SN'].sum(),2)

#Renaming columns
genders.columns=['Gender', 'Total Count', 'Percentage of Player']
genders


Unnamed: 0,Gender,Total Count,Percentage of Player
0,Female,81,14.06
1,Male,484,84.03
2,Other / Non-Disclosed,11,1.91


In [58]:
purchasing_analysis=purchase_data.groupby(['Gender']).agg({'SN':['count'],'Price':['mean','sum']})
#purchasing_analysis['Mean']=purchasing_analysis['Price'].sum()/purchasing_analysis['SN'].sum()

purchasing_analysis.columns=['Purchase Count', 'Average Purchase Price', 'Total Purchase Value']


#Purchase per person
purchasing_analysis['Purchase per Person']=purchasing_analysis['Total Purchase Value']/purchasing_analysis['Purchase Count']
purchasing_analysis

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Purchase per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,3.203009,361.94,3.203009
Male,652,3.017853,1967.64,3.017853
Other / Non-Disclosed,15,3.346,50.19,3.346


In [59]:
#Look fro max and min ages to make sure they are included in bins
print(purchase_data['Age'].max())
print(purchase_data['Age'].min())



45
7


In [60]:
#Create Bins for ages
bins=[0,9,14,19,24,29,34,39,49]

#Create labels for bins
age_labels=["<10", "10-14", "15-19","20-24","25-29","30-34","35-39","40+",]

In [61]:
# Slice the data and place it into bins
pd.cut(purchase_data["Age"], bins, labels=age_labels).head()

0    20-24
1      40+
2    20-24
3    20-24
4    20-24
Name: Age, dtype: category
Categories (8, object): [<10 < 10-14 < 15-19 < 20-24 < 25-29 < 30-34 < 35-39 < 40+]

In [66]:
# Place the data series into a new column inside of the DataFrame
purchase_data["Age Group"] = pd.cut(purchase_data["Age"], bins, labels=age_labels)
purchase_data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price,Age Group
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53,20-24
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56,40+
2,2,Ithergue48,24,Male,92,Final Critic,4.88,20-24
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27,20-24
4,4,Iskosia90,23,Male,131,Fury,1.44,20-24


In [67]:
# Create a GroupBy object based upon "Age Group" counting only unique players
Age_group = purchase_data.groupby("Age Group")["SN"].nunique().reset_index()

# Finding the percentage of players
Age_group['Percentage']=round(100*Age_group['SN']/Age_group['SN'].sum(),2)

#Rename the columns in data frame
Age_group.columns=['Age Group', 'Total Count','Percentage of Players in %']
Age_group

Unnamed: 0,Age Group,Total Count,Percentage of Players in %
0,<10,17,2.95
1,10-14,22,3.82
2,15-19,107,18.58
3,20-24,258,44.79
4,25-29,77,13.37
5,30-34,52,9.03
6,35-39,31,5.38
7,40+,12,2.08


In [68]:
#purchase analysis by Age Group
purchasing_analysis_age=purchase_data.groupby(['Age Group']).agg({'SN':['count'],'Price':['mean','sum']})
purchasing_analysis_age.columns=['Purchase Count','Average Purchase Price','Total Purchase Value']
purchasing_analysis_age

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
<10,23,3.353478,77.13
10-14,28,2.956429,82.78
15-19,136,3.035956,412.89
20-24,365,3.052219,1114.06
25-29,101,2.90099,293.0
30-34,73,2.931507,214.0
35-39,41,3.601707,147.67
40+,13,2.941538,38.24


In [69]:
#Finding top spenders
top_spenders=purchase_data.groupby(['SN']).agg({'Item ID':['count'],'Price':['mean','sum']})
top_spenders.columns=['Total Count','Average Purchase Price','Total Purchase Value']
top_spenders.sort_values('Total Purchase Value',ascending=False)

Unnamed: 0_level_0,Total Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,3.792000,18.96
Idastidru52,4,3.862500,15.45
Chamjask73,3,4.610000,13.83
Iral74,4,3.405000,13.62
Iskadarya95,3,4.366667,13.10
...,...,...,...
Ililsasya43,1,1.020000,1.02
Irilis75,1,1.020000,1.02
Aidai61,1,1.010000,1.01
Chanirra79,1,1.010000,1.01


In [70]:
#Grouping by Item ID and Item name, finding Purchase count per item, item price and its total purchase value
popular_items=purchase_data.groupby(['Item ID', 'Item Name']).agg({'Item ID':['count'],'Price':['mean','sum']})

#renaming the columns
popular_items.columns=['Purchase Count','Item Price','Total Purchase Value']

#Finding popula items by descending purchase count column
popular_items.sort_values('Purchase Count',ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,4.614615,59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,4.230000,50.76
145,Fiery Glass Crusader,9,4.580000,41.22
132,Persuasion,9,3.221111,28.99
108,"Extraction, Quickblade Of Trembling Hands",9,3.530000,31.77
...,...,...,...,...
42,The Decapitator,1,1.750000,1.75
51,Endbringer,1,4.660000,4.66
118,"Ghost Reaver, Longsword of Magic",1,2.170000,2.17
104,Gladiator's Glaive,1,1.930000,1.93


In [71]:
#Most profitable items by arranging the total purchase value in descending order
popular_items.sort_values('Total Purchase Value',ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,4.614615,59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,4.230000,50.76
82,Nirvana,9,4.900000,44.10
145,Fiery Glass Crusader,9,4.580000,41.22
103,Singed Scalpel,8,4.350000,34.80
...,...,...,...,...
28,"Flux, Destroyer of Due Diligence",2,1.060000,2.12
125,Whistling Mithril Warblade,2,1.000000,2.00
126,Exiled Mithril Longsword,1,2.000000,2.00
104,Gladiator's Glaive,1,1.930000,1.93
