In [1]:
# Dependencies and Setup
import pandas as pd
import numpy as np

# File to Load (Remember to Change These)
file_to_load = "gameData.csv"

# Read Purchasing File and store into Pandas data frame
purchaseData = pd.read_csv(file_to_load)


In [2]:
purchaseData.head(10)

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44
5,5,Yalae81,22,Male,81,Dreamkiss,3.61
6,6,Itheria73,36,Male,169,"Interrogator, Blood Blade of the Queen",2.18
7,7,Iskjaskst81,20,Male,162,Abyssal Shard,2.67
8,8,Undjask33,22,Male,21,Souleater,1.1
9,9,Chanosian48,35,Other / Non-Disclosed,136,Ghastly Adamantite Protector,3.58


In [3]:
totalPlayers = purchaseData['SN'].nunique()
totalPlayers

576

In [4]:
numberUniqueItems = purchaseData['Item ID'].nunique()

In [5]:
averagePurchaseCost = round(purchaseData['Price'].mean(), 2)
averagePurchaseCost

3.05

In [6]:
totalPurchases = purchaseData['Purchase ID'].nunique()
totalPurchases

780

In [7]:
totalRevenue = purchaseData['Price'].sum()
totalRevenue

2379.77

In [8]:
summaryDataFrame = pd.DataFrame({"Number of Unique Items": [numberUniqueItems],
                                 "Average Price" : [averagePurchaseCost],
                                 "Number of Purchases" : [totalPurchases],
                                 "Total Revenue" : [totalRevenue]})
summaryDataFrame

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,183,3.05,780,2379.77


Gender Demographics

Percentage and Count of Male Players
Percentage and Count of Female Players
Percentage and Count of Other / Non-Disclosed

In [9]:
#Holy hell, this was annoying. drop_duplicates will get rid of ALL versions of the duplicate rows if keep = False
uniqueDataFrame = purchaseData.drop_duplicates(subset="SN", keep = 'first', inplace = False) 


genderBreakdown = uniqueDataFrame['Gender'].value_counts()
print(genderBreakdown)

malePlayers = genderBreakdown[0]
femalePlayers = genderBreakdown[1]
otherPlayers = genderBreakdown[2]

malePercent = malePlayers/totalPlayers
femalePercent = femalePlayers/totalPlayers
otherPercent = otherPlayers/totalPlayers

genderSummary = pd.DataFrame({
    "Gender": ["Male","Female","Other / Non-Disclosed"],
    "Number of Players": [malePlayers,femalePlayers,otherPlayers],
    "Percent of Players": [malePercent,femalePercent,otherPercent]
})
genderSummary

Male                     484
Female                    81
Other / Non-Disclosed     11
Name: Gender, dtype: int64


Unnamed: 0,Gender,Number of Players,Percent of Players
0,Male,484,0.840278
1,Female,81,0.140625
2,Other / Non-Disclosed,11,0.019097


In [10]:
uniqueDataFrame.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [11]:
malePurchaseData = purchaseData.loc[purchaseData['Gender'] == "Male"]
malePurchaseAmount = malePurchaseData['Price'].sum()
malePurchaseAveragePrice = malePurchaseData['Price'].mean()
malePurchaseCount = malePurchaseData['Gender'].count()
maleAveragePerPerson = malePurchaseAmount / malePlayers
malePurchaseData.head()


femalePurchaseData = purchaseData.loc[purchaseData['Gender'] == "Female"]
femalePurchaseAmount = femalePurchaseData['Price'].sum()
femalePurchaseAveragePrice = femalePurchaseData['Price'].mean()
femalePurchaseCount = femalePurchaseData['Gender'].count()
femaleAveragePerPerson = femalePurchaseAmount / femalePlayers


otherPurchaseData = purchaseData.loc[purchaseData['Gender'] == "Other / Non-Disclosed"]
otherPurchaseAmount = otherPurchaseData['Price'].sum()
otherPurchaseAveragePrice = otherPurchaseData['Price'].mean()
otherPurchaseCount = otherPurchaseData['Gender'].count()
otherAveragePerPerson = otherPurchaseAmount / otherPlayers

In [12]:
genderPurchaseSummary = pd.DataFrame({
    "Gender": ["Male","Female","Other / Non-Disclosed"],
    "Purchase Count": [malePurchaseCount,femalePurchaseCount,otherPurchaseCount],
    "Average Purchase Price": [malePurchaseAveragePrice,femalePurchaseAveragePrice,otherPurchaseAveragePrice],
    "Total Purchase Value": [malePurchaseAmount, femalePurchaseAmount, otherPurchaseAmount],
    "Avg Total Purchase per Person": [maleAveragePerPerson,femaleAveragePerPerson,otherAveragePerPerson]
})
genderPurchaseSummary

Unnamed: 0,Gender,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
0,Male,652,3.017853,1967.64,4.065372
1,Female,113,3.203009,361.94,4.468395
2,Other / Non-Disclosed,15,3.346,50.19,4.562727


In [13]:
bins = [-1, 9, 14, 19, 24, 29, 34, 39, 1000]
binLabels = ["<10","10-14","15-19","20-24","25-29","30-34","35-39","40+"]
uniqueDataFrame["Age Group"] = pd.cut(uniqueDataFrame["Age"], bins, labels=binLabels)

agePurchaseData = uniqueDataFrame['Age Group'].value_counts()
agePurchaseData[1]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


22

In [14]:
ageList = []
agePercent = []
for i in range(len(agePurchaseData)):
    ageList.append(agePurchaseData[i])
    agePercent.append(100 * round((ageList[i] / totalPlayers),4))

In [15]:
ageDataFrame = pd.DataFrame({
    "Age": binLabels,
    "Count": ageList,
    "Percent" : agePercent
})
ageDataFrame

Unnamed: 0,Age,Count,Percent
0,<10,17,2.95
1,10-14,22,3.82
2,15-19,107,18.58
3,20-24,258,44.79
4,25-29,77,13.37
5,30-34,52,9.03
6,35-39,31,5.38
7,40+,12,2.08


In [18]:
purchaseData["Age Group"] = pd.cut(purchaseData["Age"], bins, labels=binLabels)
purchaseData.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price,Age Group
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53,20-24
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56,40+
2,2,Ithergue48,24,Male,92,Final Critic,4.88,20-24
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27,20-24
4,4,Iskosia90,23,Male,131,Fury,1.44,20-24
