In [1]:
# Dependencies and Setup
import pandas as pd
import csv
from pathlib import Path

In [2]:
# File to Load (Remember to Change These)
file_to_load = Path("Resources/purchase_data.csv")

In [3]:
# Read Purchasing File and store into Pandas data frame
purchase_data_df = pd.read_csv(file_to_load)
purchase_data_df

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44
...,...,...,...,...,...,...,...
775,775,Aethedru70,21,Female,60,Wolf,3.54
776,776,Iral74,21,Male,164,Exiled Doomblade,1.63
777,777,Yathecal72,20,Male,67,"Celeste, Incarnation of the Corrupted",3.46
778,778,Sisur91,7,Male,92,Final Critic,4.19


In [4]:
# Player Count
player_count = len(purchase_data_df["SN"].unique())
player_count

576

In [5]:
# Total Number of Players
player_df = pd.DataFrame({"Total Players":[player_count]})                         
player_df

Unnamed: 0,Total Players
0,576


In [6]:
# Purchasing Analysis (Total)
purchase_total = purchase_data_df["Price"].sum()
purchase_total

2379.77

In [7]:
# Number of Unique Items
unique_items = len(purchase_data_df["Item Name"].unique())
unique_items

179

In [8]:
# Average Purchase Price
average_price = purchase_data_df["Price"].mean()
average_price.round(2)

3.05

In [9]:
# Total Number of Purchases
purchase_total_count = purchase_data_df["Price"].count()
purchase_total_count

780

In [10]:
# Total Revenue
total_revenue = purchase_data_df["Price"].sum()
total_revenue

2379.77

In [11]:
# Most Common Item
most_common = purchase_data_df["Item Name"].max()
most_common

'Yearning Mageblade'

In [12]:
# Least Common Item
least_common = purchase_data_df["Item Name"].min()
least_common

'Abyssal Shard'

In [13]:
#Create a summary data frame to hold the results
#Optional: give the displayed data cleaner formatting
#Display the summary data frame

purchase_analysis_summary_df = pd.DataFrame({"Number of Unique Items": [unique_items],
                                             "Average Price": [average_price],
                                             "Number of Purchases": [purchase_total_count],
                                             "Total Revenue": [total_revenue],
                                             "Most Common Item": most_common,
                                             "Least Common Item": least_common})

purchase_analysis_summary_df

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue,Most Common Item,Least Common Item
0,179,3.050987,780,2379.77,Yearning Mageblade,Abyssal Shard


In [14]:
# Gender Demographics (have to drop the duplicates)
gender_count_df = purchase_data_df[["Gender", "SN"]].drop_duplicates(subset = "SN", keep = 'first')

In [15]:
gender_count = gender_count_df.drop(columns=["SN"])

In [16]:
gender_percentage = gender_count.value_counts(normalize=True) * 100
gender_percentage.round(2)

Gender               
Male                     84.03
Female                   14.06
Other / Non-Disclosed     1.91
dtype: float64

In [17]:
# Percentage and Count of Male Players
# Percentage and Count of Female Players
# Percentage and Count of Other / Non-Disclosed

gender_demographics_summary = pd.DataFrame({"Total Count": gender_count,
                                            "Percentage of Players": gender_percentage})

gender_demographics_summary

ValueError: Shape of passed values is (1, 2), indices imply (3, 2)

In [22]:
# Purchasing Analysis (Gender)
#The below each broken by gender
purchase_gender_df = pd.DataFrame(purchase_data_df)
purchase_gender = purchase_gender_df.groupby(["Gender"])

In [23]:
# Purchase Count
purchase_count = purchase_gender["Price"].count()
purchase_count

Gender
Female                   113
Male                     652
Other / Non-Disclosed     15
Name: Price, dtype: int64

In [24]:
# Average Purchase Price
average_price = purchase_gender["Price"].mean()
average_price.round(2)

Gender
Female                   3.20
Male                     3.02
Other / Non-Disclosed    3.35
Name: Price, dtype: float64

In [25]:
# Total Purchase Value
total_purchase_value = purchase_gender["Price"].sum()
total_purchase_value

Gender
Female                    361.94
Male                     1967.64
Other / Non-Disclosed      50.19
Name: Price, dtype: float64

In [36]:
# Average Purchase Total per Person by Gender
avg_purchase_total_gender = total_purchase_value/gender_count
avg_purchase_total_gender

SyntaxError: invalid syntax (<ipython-input-36-82c5b7b4e5b2>, line 2)

In [37]:
# summary table
gender_purchase_summary = pd.DataFrame({"Purchase Count": purchase_count],
                                        "Average Purchase Price": average_price,
                                        "Total Purchase Value": total_purchase_value,
                                        "Avg Total Purchase per Person": avg_purchase_total_gender})

gender_purchase_summary

SyntaxError: invalid syntax (<ipython-input-37-7d8f49e2009b>, line 2)

In [None]:
# Age Demographics
# The below each broken into bins of 4 years (i.e. <10, 10-14, 15-19, etc.)

In [None]:
# Purchase Count

In [None]:
# Average Purchase Price

In [None]:
# Total Purchase Value

In [None]:
# Average Purchase Total per Person by Age Group

In [None]:
# Purchasing Analysis (age)
# The below each broken into bins of 4 years (i.e. <10, 10-14, 15-19, etc.)

In [None]:
# Purchase Count

In [None]:
# Average Purchase Price

In [None]:
# Total Purchase Value

In [None]:
# Average Purchase Total per Person by Age

In [None]:
# Top Spenders
# Identify the the top 5 spenders in the game by total purchase value, then list (in a table):


In [None]:
# SN

In [None]:
# Purchase Count

In [None]:
# Average Purchase Price


In [None]:
# Total Purchase Value

In [None]:
# Most Popular Items
# Identify the 5 most popular items by purchase count, then list (in a table):

In [None]:
# Item ID

In [None]:
# Item Name

In [None]:
# Purchase Count


In [None]:
# Item Price

In [None]:
# Total Purchase Value

In [None]:
# Most Profitable Items
# Identify the 5 most profitable items by total purchase value, then list (in a table):

In [None]:
# Item ID


In [None]:
#Item Name

In [None]:
# Purchase Count


In [None]:
# Item Price


In [None]:
# Total Purchase Value