In [1]:
#import Dependencies
import pandas as pd
import numpy as np

In [2]:
#Load resources used
file_to_load = "Resources/purchase_data.csv"

#read the data
purchase_data = pd.read_csv(file_to_load)

In [3]:
#show dataset being used
purchase_data

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44
...,...,...,...,...,...,...,...
775,775,Aethedru70,21,Female,60,Wolf,3.54
776,776,Iral74,21,Male,164,Exiled Doomblade,1.63
777,777,Yathecal72,20,Male,67,"Celeste, Incarnation of the Corrupted",3.46
778,778,Sisur91,7,Male,92,Final Critic,4.19


In [4]:
#find all unique values in SN
unique_players = purchase_data["SN"].unique()

In [5]:
#determine the total player count, Create dataframe of total players with column
total_players = pd.DataFrame(unique_players).count()
total_players = pd.DataFrame(total_players, columns = ["Total Players"])
total_players

Unnamed: 0,Total Players
0,576


In [6]:
#Find all unique items in data
unique_items = purchase_data["Item Name"].unique()
unique_items = pd.DataFrame(unique_items).count()
unique_items

0    179
dtype: int64

In [7]:
#determine average price of each item
avg_price = "${0:.2f}".format(purchase_data["Price"].mean())
avg_price

'$3.05'

In [8]:
#Determine the total amount of purchases
number_of_purchases = purchase_data["Purchase ID"].count()
number_of_purchases

780

In [9]:
#Calculate total revenue using sum function of data
revenue = "${0:.2f}".format(purchase_data["Price"].sum())
revenue

'$2379.77'

In [10]:
#Create new dataframe using above information using dictionary.
purchasing_analysis = {"Number of Unique Items": unique_items, 
                       "Number of Purchase": number_of_purchases,
                      "Average Price": avg_price,
                      "Total Revenue": revenue}
purchasing_analysis = pd.DataFrame(purchasing_analysis)
purchasing_analysis

Unnamed: 0,Number of Unique Items,Number of Purchase,Average Price,Total Revenue
0,179,780,$3.05,$2379.77


In [11]:
#Determine the total amount of male, female and other players
total_players = purchase_data["Gender"].count()
male_players = (purchase_data["Gender"]=="Male").sum()
female_players =(purchase_data["Gender"]=="Female").sum()
other_players = (purchase_data["Gender"]=="Other / Non-Disclosed").sum()

In [12]:
#Calculate the percentage of male, female, and other players. Use format function to create percentages
perc_male = "{0:.2f}%".format((male_players / total_players)*100)
perc_female = "{0:.2f}%".format((female_players / total_players)*100)
perc_other = "{0:.2f}%".format((other_players / total_players)*100)

In [13]:
#Create dataframe of above calculated data using dictionaries and libraries
gender_demo = {"Gender": ["Male", "Female", "Other / Non-Disclosed"], 
               "Total Count": [male_players, female_players, other_players], 
               "Percentage of Players": [perc_male, perc_female, perc_other]}
gender_demo = pd.DataFrame(gender_demo)

gender_demo

Unnamed: 0,Gender,Total Count,Percentage of Players
0,Male,652,83.59%
1,Female,113,14.49%
2,Other / Non-Disclosed,15,1.92%


In [14]:
#Create a bin of Age ranges for analysis, add Age range category to data
age_bins = [0,10,14,19,24,29,34,39,40]
group_names = ["<10","10-14","15-19","20-24","25-29","30-34","35-39","40+"]

purchase_data["Age Ranges"] = pd.cut(purchase_data["Age"], age_bins, labels=group_names)
purchase_data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price,Age Ranges
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53,20-24
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56,40+
2,2,Ithergue48,24,Male,92,Final Critic,4.88,20-24
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27,20-24
4,4,Iskosia90,23,Male,131,Fury,1.44,20-24


In [15]:
# Calculate the total amount and percentage of players in each age range,
age_count = purchase_data["Age Ranges"].value_counts()
players_total = purchase_data["Age"].count()
age_perc = (age_count / players_total)*100

In [16]:
#Create dictionary to make dataframe of above data
age_demo = {"Count Total": age_count, "Percentage of Player": age_perc}

age_demo = pd.DataFrame(age_demo)

#Clean up data with added percent sign
age_demo["Percentage of Player"] = age_demo["Percentage of Player"].map("{0:.2f}%".format)
age_demo = age_demo.sort_index()

age_demo

Unnamed: 0,Count Total,Percentage of Player
<10,32,4.10%
10-14,19,2.44%
15-19,136,17.44%
20-24,365,46.79%
25-29,101,12.95%
30-34,73,9.36%
35-39,41,5.26%
40+,6,0.77%


In [17]:
#Create a groupby with bins to calculate age purchase analysis
age_group = purchase_data.groupby("Age Ranges")

#Calculate total players,Average price paid, sum of all purchases, and average purchase price per person in each age range
purchase_count = age_group["Purchase ID"].count()
price_average = age_group["Price"].mean()
total_price = age_group["Price"].sum()
avg_purchase_price = total_price/ purchase_count

In [18]:
#Create dictionary of purchase analysis
purchase_analysis = {"Purchase Count": purchase_count, 
                     "Average Purchase Price": price_average, 
                     "Total Purchase Value": total_price,
                    "Average Total Purchase Per Person": avg_purchase_price}

purchase_analysis = pd.DataFrame(purchase_analysis)

#Clean up dataframe adding dollar values
purchase_analysis["Average Purchase Price"] = purchase_analysis["Average Purchase Price"].map("${0:.2f}".format)
purchase_analysis["Average Total Purchase Per Person"] = purchase_analysis["Average Total Purchase Per Person"].map("${0:.2f}".format)
purchase_analysis["Total Purchase Value"] = purchase_analysis["Total Purchase Value"].map("${0:.2f}".format)

purchase_analysis

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Average Total Purchase Per Person
Age Ranges,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
<10,32,$3.40,$108.96,$3.40
10-14,19,$2.68,$50.95,$2.68
15-19,136,$3.04,$412.89,$3.04
20-24,365,$3.05,$1114.06,$3.05
25-29,101,$2.90,$293.00,$2.90
30-34,73,$2.93,$214.00,$2.93
35-39,41,$3.60,$147.67,$3.60
40+,6,$2.79,$16.71,$2.79


In [19]:
#Group data by SN
id_group = purchase_data.groupby("SN")

#Calculate the purchase amounts, average price, and total spent per each player
count_purchase = id_group["Purchase ID"].count()
average_price = id_group["Price"].mean()
price_total = id_group["Price"].sum()

In [20]:
#Create dictionary for top spenders
top_spenders = {"Purchase Count": count_purchase, 
               "Average Purchase Price": average_price,
               "Total Purchase Value": price_total}

top_spenders = pd.DataFrame(top_spenders)

#Clean up dataframe adding dollar values
top_spenders["Average Purchase Price"] = top_spenders["Average Purchase Price"].map("${0:.2f}".format)
top_spenders["Total Purchase Value"] = top_spenders["Total Purchase Value"].map("${0:.2f}".format)

#Make dataframe looking at the users who made the most purchases
top_spenders.sort_values("Purchase Count", ascending=False).head(5)

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,$3.79,$18.96
Iral74,4,$3.40,$13.62
Idastidru52,4,$3.86,$15.45
Asur53,3,$2.48,$7.44
Inguron55,3,$3.70,$11.11


In [21]:
#Group data by Item ID and Item Name
item_group = purchase_data.groupby(["Item ID", "Item Name"])

#Calculate the total purchases, average price, and total purchase value
purchases = item_group["Item ID"].count()
prices = item_group["Price"].mean()
total_value = item_group["Price"].sum()

In [25]:
#Create dictionary for popular items
popular_items = {"Purchase Count": purchases, 
                 "Item Price": prices,
                 "Total Purchase Value": total_value}

popular_items = pd.DataFrame(popular_items)

#Clean up the data with adding dollar values
popular_items["Item Price"] = popular_items["Item Price"].map("${0:.2f}".format)


#Make dataframe looking at the users who made the most popular items
popular_items.sort_values("Purchase Count", ascending=False).head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,$4.61,59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,50.76
145,Fiery Glass Crusader,9,$4.58,41.22
132,Persuasion,9,$3.22,28.99
108,"Extraction, Quickblade Of Trembling Hands",9,$3.53,31.77


In [26]:
# Switch sort values to Total Purchase value in descending order
popular_items_value = popular_items.sort_values("Total Purchase Value", ascending=False)

popular_items_value.head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,$4.61,59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,50.76
82,Nirvana,9,$4.90,44.1
145,Fiery Glass Crusader,9,$4.58,41.22
103,Singed Scalpel,8,$4.35,34.8


In [24]:
# 3 observable trends in the data

# There are several observable trends that can be observed in the purchase data of Heroes of Pymoli. First, the majority of players that play the game is male(~84%).
# Second, more people between the ages of 15-30 play the game than any other ages(77.18%). Almost half of all players are between 20-24 (46.79%).
# Third, the most popular item to purchase is the Final Critic. It has the most purchases(13), as well as, the highest total purchase value ($59.99).