In [1]:
#Downloading the Dependencies

import pandas as pd
import numpy as np


In [2]:
#The file path for the data
heroes_pymoli_df = pd.read_csv("Resources/purchase_data.csv")
heroes_pymoli_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


# Player Count

In [3]:
#Display the total number of players

players = len(heroes_pymoli_df["SN"].value_counts())
print(f"Total Players: {players}")


Total Players: 576


# Purchase Analysis

In [4]:
#unique items
u_items = len(heroes_pymoli_df["Item ID"].value_counts())
u_items

183

In [5]:
#average purchase price
avg_price = round(heroes_pymoli_df["Price"].mean(), 2)
avg_price

3.05

In [6]:
#total number of purchases
num_purchases = heroes_pymoli_df["Price"].count()
num_purchases

780

In [7]:
#total revenue
revenue_total = round(heroes_pymoli_df["Price"].sum(), 2)
revenue_total

2379.77

In [8]:
purchase_analysis = []
purchase_analysis.append(u_items)
purchase_analysis.append("$" + str(avg_price))
purchase_analysis.append(num_purchases)
purchase_analysis.append("$" + str(revenue_total))


In [9]:
#Putting into a dataframe
pd.DataFrame([purchase_analysis], columns=["Unique Items", "Average Price", "Number of Purchases", "Total Revenue"])

Unnamed: 0,Unique Items,Average Price,Number of Purchases,Total Revenue
0,183,$3.05,780,$2379.77


# Gender Demograpics

In [10]:
gender =heroes_pymoli_df[["SN", "Gender"]]
gender.head()

Unnamed: 0,SN,Gender
0,Lisim78,Male
1,Lisovynya38,Male
2,Ithergue48,Male
3,Chamassasya86,Male
4,Iskosia90,Male


In [11]:
g_counts = gender["Gender"].value_counts()
g_counts

Male                     652
Female                   113
Other / Non-Disclosed     15
Name: Gender, dtype: int64

In [12]:

(heroes_pymoli_df.groupby("Gender").size()/heroes_pymoli_df["Gender"].count())*100
percent_h = (heroes_pymoli_df.groupby("Gender").size()/heroes_pymoli_df["Gender"].count())*100
percent_h

Gender
Female                   14.487179
Male                     83.589744
Other / Non-Disclosed     1.923077
dtype: float64

In [13]:
#Making dataframe

gender_df = pd.DataFrame({"Total Counts": g_counts, "Percentage of Players": percent_h})
gender_df.index = (["Female", "Male", "Other/Non-Disclosed"])
gender_df

Unnamed: 0,Total Counts,Percentage of Players
Female,113,14.487179
Male,652,83.589744
Other/Non-Disclosed,15,1.923077


# Purchase Analysis: Gender

In [14]:
total_spent = heroes_pymoli_df[["SN", "Gender", "Price"]]
counts_g = total_spent["Gender"].value_counts()
purchase_counts = counts_g[0],counts_g[1],counts_g[2]
total_spent = total_spent.groupby("Gender")
amount_spent = total_spent.sum()
amount_spent

Unnamed: 0_level_0,Price
Gender,Unnamed: 1_level_1
Female,361.94
Male,1967.64
Other / Non-Disclosed,50.19


In [15]:
total_spent.mean

<bound method GroupBy.mean of <pandas.core.groupby.groupby.DataFrameGroupBy object at 0x1159e4b00>>

# Age Demographics

In [16]:
age_demo = heroes_pymoli_df[["SN", "Age"]]
age_demo = age_demo.drop_duplicates()

In [17]:
#ages 
age_10 = age_demo[age_demo["Age"] < 10].count()[0]
age_14 = age_demo[(age_demo["Age"] >= 10) & (age_demo["Age"] <=14)].count()[0]
age_19 = age_demo[(age_demo["Age"] >= 15) & (age_demo["Age"] <=19)].count()[0]
age_24 = age_demo[(age_demo["Age"] >= 20) & (age_demo["Age"] <=24)].count()[0]
age_29 = age_demo[(age_demo["Age"] >= 25) & (age_demo["Age"] <=29)].count()[0]
age_34 = age_demo[(age_demo["Age"] >= 30) & (age_demo["Age"] <=34)].count()[0]
age_39 = age_demo[(age_demo["Age"] >= 35) & (age_demo["Age"] <=39)].count()[0]
age_40 = age_demo[age_demo["Age"] >= 40].count()[0]
ages = [age_10, age_14, age_19, age_24, age_29, age_34, age_39, age_40]


In [18]:
#percentages
percent_10 = round((age_10/players)*100,2)
percent_14 = round((age_14/players)*100,2)
percent_19 = round((age_19/players)*100,2)
percent_24 = round((age_24/players)*100,2)
percent_29 = round((age_29/players)*100,2)
percent_34 = round((age_34/players)*100,2)
percent_39 = round((age_39/players)*100,2)
percent_40 = round((age_40/players)*100,2)
percents_p = [percent_10,percent_14,percent_19,percent_24,percent_29,percent_34,percent_39,percent_40]
percents_p


[2.95, 3.82, 18.58, 44.79, 13.37, 9.03, 5.38, 2.08]

In [19]:
#dictionary
age_dict = {"Percent of Players": percents_p, "Totals": ages}

In [20]:
#dataframe
age_dict_df = pd.DataFrame(age_dict)
age_dict_df.index = (["<10", "10-14", "15-19","20-24","25-29", "30-34","35-39", "40+"])
age_dict_df


Unnamed: 0,Percent of Players,Totals
<10,2.95,17
10-14,3.82,22
15-19,18.58,107
20-24,44.79,258
25-29,13.37,77
30-34,9.03,52
35-39,5.38,31
40+,2.08,12


# Top Spenders

In [21]:
spenders = heroes_pymoli_df[["SN", "Price", "Item Name"]]
total_spend = spenders.groupby("SN").sum()