In [1]:
# Import libraries and data file, create player DataFrame
import pandas as pd 

data_file = 'Resources/purchase_data.csv'
player_df = pd.read_csv(data_file)

<h1><b>Player Count</b></h1>

In [2]:
#Get unique players and put it in a new Data Frame
totalPlayers = len(player_df["SN"].unique())
printTotalPlayer = pd.DataFrame({"Total Number of Players": [totalPlayers]})
printTotalPlayer

Unnamed: 0,Total Number of Players
0,576


<h1><b>Purchasing Analysis (Total)</b></h1>

In [3]:
#Find that stats about Purchases in game, Create new Dataframe to hold data and format
uniqueItems = len(player_df["Item ID"].unique())
averagePurch = player_df["Price"].mean()
totalPurch = player_df["Purchase ID"].count()
totalRev = player_df["Price"].sum()

print_df = pd.DataFrame({"Number of Unique Items": [uniqueItems],"Average Purchase Price": [averagePurch], 
                         "Total Number of Purchases": [totalPurch], "Total Revenue": [totalRev]})
print_df.style.format({'Average Purchase Price': "${:,.2f}",
                       "Total Revenue": "${:,.2f}"})

Unnamed: 0,Number of Unique Items,Average Purchase Price,Total Number of Purchases,Total Revenue
0,179,$3.05,780,"$2,379.77"


<h1><b>Gender Demographics</b></h1>

In [4]:
#Find that stats about players gender in game, Create new Dataframe to hold data and format
genderBreakdown = player_df.groupby("Gender")
totalGenderCount = genderBreakdown.nunique()["SN"]
playerPercent = totalGenderCount / totalPlayers * 100

printGender_df = pd.DataFrame({"Total Count" : totalGenderCount,"Player Percentage": playerPercent })
printGender_df.style.format({"Player Percentage" : "{:.2f}%"})


Unnamed: 0_level_0,Total Count,Player Percentage
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,81,14.06%
Male,484,84.03%
Other / Non-Disclosed,11,1.91%


<h1><b>Purchasing Analysis (Gender)</b></h1>

In [5]:
#Analyis perchases by gender, 
totalCountGender = genderBreakdown["Purchase ID"].count()
avgPurchPriceGender = genderBreakdown["Price"].mean()
totalPurchaseGender = genderBreakdown["Price"].sum() 
avgPerPlayerGender = totalPurchaseGender/totalGenderCount

printGenPurch_df = pd.DataFrame({"Purchase Count" : totalCountGender, "Average Purchase Price" : avgPurchPriceGender,
                                  "Total Purchase Value" : totalPurchaseGender, "Average Purchase Total per Person": avgPerPlayerGender}
                               )
printGenPurch_df.style.format({"Average Purchase Price": "${:,.2f}",
                               "Total Purchase Value": "${:,.2f}",
                               "Average Purchase Total per Person": "${:,.2f}"})


Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Average Purchase Total per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,$3.20,$361.94,$4.47
Male,652,$3.02,"$1,967.64",$4.07
Other / Non-Disclosed,15,$3.35,$50.19,$4.56


<h1><b>Age Demographics</b></h1>

In [6]:
#Create age bins, break up by total count and percentage
bin_labels = ["Less than 10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-40", "40+"]
bins = [0,9.99,14.99,19.99,24.99,29.99,34.99,39.99,200]
player_df["Age Range"] = pd.cut(player_df['Age'], bins=bins, labels = bin_labels)

ageRange = player_df.groupby("Age Range")
totalAgeCount = ageRange.nunique()["SN"]
percentAge = totalAgeCount/totalPlayers * 100

printAgeStats_df = pd.DataFrame({"Total Count" : totalAgeCount, "Percentage of Players" : percentAge})
printAgeStats_df.style.format({"Percentage of Players": "{:.2f}%"})

Unnamed: 0_level_0,Total Count,Percentage of Players
Age Range,Unnamed: 1_level_1,Unnamed: 2_level_1
Less than 10,17,2.95%
10-14,22,3.82%
15-19,107,18.58%
20-24,258,44.79%
25-29,77,13.37%
30-34,52,9.03%
35-40,31,5.38%
40+,12,2.08%


<h1><b>Purchasing Analysis (Age)</b></h1>

In [7]:
#Using bins from above break down purchase data
totalCountAge = ageRange["Purchase ID"].count()
avgPurchPriceAge = ageRange["Price"].mean()
totalPurchAge = ageRange["Price"].sum() 
avgPerPersonAge = totalPurchAge/totalAgeCount

printAge_df = pd.DataFrame({"Purchase Count" : totalCountAge, "Average Purchase Price" : avgPurchPriceAge,
                            "Total Purchase Value" : totalPurchAge, "Average Purchase Total per Person": avgPerPersonAge}
                          )
print("### Age Demographics")
printAge_df.style.format({"Average Purchase Price": "${:,.2f}",
                          "Total Purchase Value": "${:,.2f}",
                          "Average Purchase Total per Person": "${:,.2f}"})

### Age Demographics


Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Average Purchase Total per Person
Age Range,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Less than 10,23,$3.35,$77.13,$4.54
10-14,28,$2.96,$82.78,$3.76
15-19,136,$3.04,$412.89,$3.86
20-24,365,$3.05,"$1,114.06",$4.32
25-29,101,$2.90,$293.00,$3.81
30-34,73,$2.93,$214.00,$4.12
35-40,41,$3.60,$147.67,$4.76
40+,13,$2.94,$38.24,$3.19


<h1><b>Top Spenders</b></h1>

In [18]:
#Find top spenders and break down purchase count, avg purchase and total value
topSpenders = player_df.groupby("SN")
totalPerPerson = topSpenders["Purchase ID"].count()
avgPerPerson = topSpenders["Price"].mean()
totalPerPerson = topSpenders["Price"].sum()

printTopSpender_df = pd.DataFrame({"Purchase Count" : totalPerPerson, "Average Purchase Price" : avgPerPerson,
                                    "Total Purchase Value" : totalPerPerson})
top5 = printTopSpender_df.sort_values(["Total Purchase Value"], ascending=False).head()
top5.style.format({"Average Purchase Price": "${:,.2f}",
                    "Total Purchase Value": "${:,.2f}",
                    "Purchase Count": "{:,.2f}"})

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,18.96,$3.79,$18.96
Idastidru52,15.45,$3.86,$15.45
Chamjask73,13.83,$4.61,$13.83
Iral74,13.62,$3.40,$13.62
Iskadarya95,13.1,$4.37,$13.10


<h1><b>Most Popular Items</b></h1>

In [13]:
#Find the most popular items and total Value, display top 5
newItems = player_df[["Item ID", "Item Name", "Price"]]
groupedItems = newItems.groupby(["Item ID", "Item Name"])
purchCount = groupedItems["Price"].count()
totalValue = (groupedItems["Price"].sum())
itemPrice = totalValue/purchCount

printTopItem_df = pd.DataFrame({"Purchase Count" : purchCount, "Item Price" : itemPrice, "Total Value" : totalValue})
topItems = printTopItem_df.sort_values(["Purchase Count"], ascending=False).head()
topItems.style.format({"Item Price": "${:,.2f}",
                       "Total Value": "${:,.2f}"})


Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,$4.61,$59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
145,Fiery Glass Crusader,9,$4.58,$41.22
132,Persuasion,9,$3.22,$28.99
108,"Extraction, Quickblade Of Trembling Hands",9,$3.53,$31.77


<h1><b>Most Profitable Items</b></h1>

In [10]:
#Find the most profitable items, display top 5
top5 = printTopItem_df.sort_values(["Total Value"], ascending =False).head()
top5.style.format({"Item Price": "${:,.2f}",
                   "Total Value": "${:,.2f}"})


Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,$4.61,$59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
82,Nirvana,9,$4.90,$44.10
145,Fiery Glass Crusader,9,$4.58,$41.22
103,Singed Scalpel,8,$4.35,$34.80


<h1><b1>Analysis</b></h1>
<p>The Heroes of Pymoli game seems to be more geared to the young male demographic. Males make up about 84% of
   the users in the game, with its biggest group in the early 20s. They are also the largest spenders in the game, with 
   the teenage group in second.</p>

<p>The most popular items also seem to be the most profitable items, these must be some of the best items in the game 
   as most people who purchases items seems go for the same items. There is only 1 item that is in the top of popular items
   but does not crack the top 5 of Most Profitable items</p>

<p>The top spenders dont seem to always be buying the most expensive items, for the most part at the average purchase price 
   is in the mid 3 dollar range. Even though they are the most popular items, maybe these items are some of more well designed
    which and the top spenders have more disposable income to buy better looking items, even if they are not the most powerful.</p>