In [1]:
# import relevant modules
import pandas as pd

# find csv and read as data frame
file = "Resources/purchase_data.csv"
purchase_data = pd.read_csv(file)

# display df for reference
purchase_data

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44
...,...,...,...,...,...,...,...
775,775,Aethedru70,21,Female,60,Wolf,3.54
776,776,Iral74,21,Male,164,Exiled Doomblade,1.63
777,777,Yathecal72,20,Male,67,"Celeste, Incarnation of the Corrupted",3.46
778,778,Sisur91,7,Male,92,Final Critic,4.19


In [2]:
# check data cleanliness
purchase_data.count()

Purchase ID    780
SN             780
Age            780
Gender         780
Item ID        780
Item Name      780
Price          780
dtype: int64

In [35]:
# find unique names under SN
# create an array of unique SNs
uniqueSN = purchase_data["SN"].unique()
# find the number of elements within this array
totalUsers = len(uniqueSN)


print("There are a total of " + str(totalUsers) + " players playing the game.")

There are a total of 576 players playing the game.


In [7]:
# complete purchasing analysis calculations
# number of unique items
uniqueItems = purchase_data["Item ID"].unique()
uniqueItemCount = len(uniqueItems)
# average purchase price
averagePurchasePrice = round(purchase_data["Price"].mean(),2)
# total count of purchases
uniquePurchases = purchase_data["Purchase ID"].count()
# total revenue
totalRevenue = purchase_data["Price"].sum()

# display these results
print("There have been a total of " + str(len(uniqueItems)) + " distinct purchases in-game.")
print("In-game transactions are for an average of $" + str(averagePurchasePrice) + " per purchase.")
print("There have been a total of " + str(uniquePurchases) + " purchases within the game.")
print("A total of $" + str(totalRevenue) + " has been spent in-game.")

There have been a total of 179 distinct purchases in-game.
In-game transactions are for an average of $3.05 per purchase.
There have been a total of 780 purchases within the game.
A total of $2379.77 has been spent in-game.


In [23]:
# gender demographic analysis
# create df of only SN and gender
genderCount_df = purchase_data[["SN","Gender"]]
# drop duplicate SNs
genderCount_df = genderCount_df.drop_duplicates(subset="SN")
genderCount_df = genderCount_df.set_index("SN")

genderCount_df["Gender"].value_counts()

Male                     484
Female                    81
Other / Non-Disclosed     11
Name: Gender, dtype: int64

In [33]:
# gender demographic analysis (cont'd)
# count total players, males, females, other
totalCount = len(genderCount_df)
maleCount = len(genderCount_df.loc[genderCount_df["Gender"] == "Male",:])
femaleCount = len(genderCount_df.loc[genderCount_df["Gender"] == "Female",:])
otherCount = len(genderCount_df.loc[genderCount_df["Gender"] == "Other / Non-Disclosed",:])

# display the results
genderAnalysis_df = pd.DataFrame(
    {"Gender": ["Male","Female","Other/Non-Disclosed"],
    "Count": [maleCount, femaleCount, otherCount]})

genderAnalysis_df["Percentage of Players"] = ((genderAnalysis_df["Count"]/totalCount)*100)
genderAnalysis_df["Percentage of Players"] = genderAnalysis_df["Percentage of Players"].map("{:.2f}%".format)

genderAnalysis_df.style.
genderAnalysis_df

Unnamed: 0,Gender,Count,Percentage of Players
0,Male,484,84.03%
1,Female,81,14.06%
2,Other/Non-Disclosed,11,1.91%


In [34]:
genderGrouped = purchase_data.groupby(["SN"])
genderGrouped.count().head()

Unnamed: 0_level_0,Purchase ID,Age,Gender,Item ID,Item Name,Price
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Adairialis76,1,1,1,1,1,1
Adastirin33,1,1,1,1,1,1
Aeda94,1,1,1,1,1,1
Aela59,1,1,1,1,1,1
Aelaria33,1,1,1,1,1,1
