Skip to content

mamclaughlin/python-gamingData

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

14 Commits
 
 
 
 
 
 

Repository files navigation

Dependencies & Reading File

#import dependencies
import pandas as pd
import numpy as np

#import file
file = "Resources/purchase_data.json"

#read file
df = pd.read_json(file)
df.head()
# df.style.format({"Price": "${:.2f}"})
<style> .dataframe thead tr:only-child th { text-align: right; }
.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}
</style>
Age Gender Item ID Item Name Price SN
0 38 Male 165 Bone Crushing Silver Skewer 3.37 Aelalis34
1 21 Male 119 Stormbringer, Dark Blade of Ending Misery 2.32 Eolo46
2 34 Male 174 Primitive Blade 2.46 Assastnya25
3 21 Male 92 Final Critic 1.36 Pheusrical25
4 23 Male 63 Stormfury Mace 1.27 Aela59

Number of Players

#Player Count
total_players = pd.DataFrame(
    np.array(
        [
            [len(df["SN"].unique())], 
        ]))

total_players.columns = ["Total Players"]
total_players
<style> .dataframe thead tr:only-child th { text-align: right; }
.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}
</style>
Total Players
0 573

Purchasing Analysis (Total)

total_purchase_amount_sum = df["Price"].sum()
average_price = total_purchase_amount_sum/len(df.index)
total_num_purchases = len(df.index)
total_rev = df["Price"].sum()


purchasing_analysis = pd.DataFrame(
    np.array(
        [
            [len(df["Item ID"].unique()), 
             average_price,
             total_num_purchases,
             total_rev
            ]
            
        ]))
purchasing_analysis.columns = ["Number of Unique Items", "Average Price", "Number of Purchases", "Total Revenue"]
purchasing_analysis.style.format({"Average Price": "${:.2f}", "Total Revenue":"${:.2f}"})
purchasing_analysis
<style> .dataframe thead tr:only-child th { text-align: right; }
.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}
</style>
Number of Unique Items Average Price Number of Purchases Total Revenue
0 183.0 2.931192 780.0 2286.33

Gender Demographics

# Count
gender = df["Gender"].value_counts()

#more precise, by SN 
new_gender = df.groupby("Gender")["SN"].unique()
len(new_gender["Female"])
len(new_gender["Male"])
len(new_gender["Other / Non-Disclosed"])


# Percentage
female_gender_percent = len(new_gender["Female"])/len(df["SN"].unique()) * 100
male_gender_percent = len(new_gender["Male"])/len(df["SN"].unique()) * 100
non_disclosed_gender = len(new_gender["Other / Non-Disclosed"])/len(df["SN"].unique()) * 100

percentage = pd.DataFrame(
        {   "Gender": ["Female", "Male", "Non Disclosed Gender"],
            "Percentage of Players": [female_gender_percent,male_gender_percent,non_disclosed_gender],
            "Total Count": [len(new_gender["Female"]),len(new_gender["Male"]),len(new_gender["Other / Non-Disclosed"])],
        }
    )
percentage[["Gender","Total Count","Percentage of Players"]]
new_percentage = percentage.set_index(['Gender'])

del new_percentage.index.name
new_percentage
<style> .dataframe thead tr:only-child th { text-align: right; }
.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}
</style>
Percentage of Players Total Count
Female 17.452007 100
Male 81.151832 465
Non Disclosed Gender 1.396161 8

Purchasing Analysis (Gender)

#Purchase Count Variables
filtered_f_purchase_price = df.loc[df["Gender"] == "Female"]
filtered_m_purchase_price = df.loc[df["Gender"] == "Male"]
filtered_n_purchase_price = df.loc[df["Gender"] == "Other / Non-Disclosed"]

# Purchase total
filtered_f_price_total = filtered_f_purchase_price["Price"].sum()
filtered_m_price_total = filtered_m_purchase_price["Price"].sum()
filtered_n_price_total = filtered_n_purchase_price["Price"].sum()

# Average price 
avg_purchase_f_price = filtered_f_price_total/gender["Female"]
avg_purchase_m_price = filtered_m_price_total/gender["Male"]
avg_purchase_n_price = filtered_n_price_total/gender["Other / Non-Disclosed"]

# normalized purchase
normalized_f_purchase = filtered_f_price_total/len(filtered_f_purchase_price["SN"].value_counts())
normalized_m_purchase = filtered_m_price_total/len(filtered_m_purchase_price["SN"].value_counts())
normalized_n_purchase = filtered_n_price_total/len(filtered_n_purchase_price["SN"].value_counts())


purchase_analysis = pd.DataFrame(
        {   "Gender": ["Female", "Male", "Non Disclosed Gender"],
            "Purchase Count": [gender["Female"],gender["Male"],gender["Other / Non-Disclosed"]],
            "Average Purchase Price": [avg_purchase_f_price,avg_purchase_m_price,avg_purchase_n_price],
            "Total Purchase Value": [filtered_f_price_total,filtered_m_price_total,filtered_n_price_total],
            "Normalized Totals": [normalized_f_purchase,normalized_m_purchase,normalized_n_purchase],
        }
    )
purchase_analysis[["Gender","Purchase Count","Average Purchase Price", "Total Purchase Value", "Normalized Totals"]]
new_purchase_analysis = purchase_analysis.set_index(['Gender'])

del new_purchase_analysis.index.name
new_purchase_analysis
---------------------------------------------------------------------------

ValueError                                Traceback (most recent call last)

<ipython-input-152-17120ae36a90> in <module>()
      1 #Purchase Count Variables
----> 2 filtered_f_purchase_price = df.loc[df["Gender"] == "Female" and df["SN"].unique()]
      3 filtered_m_purchase_price = df.loc[df["Gender"] == "Male"]
      4 filtered_n_purchase_price = df.loc[df["Gender"] == "Other / Non-Disclosed"]
      5 


/Users/Mariah/anaconda/lib/python3.6/site-packages/pandas/core/generic.py in __nonzero__(self)
    951         raise ValueError("The truth value of a {0} is ambiguous. "
    952                          "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
--> 953                          .format(self.__class__.__name__))
    954 
    955     __bool__ = __nonzero__


ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

Age Demographics

Count and Percentage

#Count
under_ten = len(df.loc[df["Age"] <= 10]) 
ten_fourteen = len(df.loc[(df["Age"] >= 10) & (df["Age"] <= 14)])
fift_nint = len(df.loc[(df["Age"] >= 15) & (df["Age"] <= 19)]) 
twent_twentfour = len(df.loc[(df["Age"] >= 20) & (df["Age"] <= 24)])
twentfive_twentnine = len(df.loc[(df["Age"] >= 25) & (df["Age"] <= 29)])
thirty_thirtyfour = len(df.loc[(df["Age"] >= 30) & (df["Age"] <= 34)])
thirtyfi_thirtynine = len(df.loc[(df["Age"] >= 35) & (df["Age"] <= 39)])
fourty_plus = len(df.loc[(df["Age"] >= 40)])

# Percentage
p_under_ten = under_ten/len(df.index) * 100
p_ten_fourteen = ten_fourteen/len(df.index) * 100
p_fift_nint = fift_nint/len(df.index) * 100
p_twent_twentfour = twent_twentfour/len(df.index) * 100
p_twentfive_twentnine = twentfive_twentnine/len(df.index) * 100
p_thirty_thirtyfour = thirty_thirtyfour/len(df.index) * 100
p_thirtyfi_thirtynine = thirtyfi_thirtynine/len(df.index) * 100
p_fourty_plus = fourty_plus/len(df.index) * 100

age_count_analysis = pd.DataFrame(
        {   "Age": ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"],
            "Percentage of Players": [p_under_ten, p_ten_fourteen, p_fift_nint, p_twent_twentfour, p_twentfive_twentnine, p_thirty_thirtyfour, p_thirtyfi_thirtynine, p_fourty_plus],
            "Total Count": [under_ten, ten_fourteen, fift_nint, twent_twentfour, twentfive_twentnine, thirty_thirtyfour,thirtyfi_thirtynine,fourty_plus],
        }
    )
age_count_analysis[["Age","Percentage of Players", "Total Count"]]
new_age_count_analysis = age_count_analysis.set_index(["Age"])

del new_age_count_analysis.index.name
new_age_count_analysis
<style> .dataframe thead tr:only-child th { text-align: right; }
.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}
</style>
Percentage of Players Total Count
<10 4.102564 32
10-14 4.487179 35
15-19 17.051282 133
20-24 43.076923 336
25-29 16.025641 125
30-34 8.205128 64
35-39 5.384615 42
40+ 2.179487 17

Purchase Count, Average Purchase Price, Total Purchase Value, Normalized Totals

# average purchase price --> use len() w/o unique()
# general values to work with
ap_under_ten = df.loc[df["Age"] <= 10] 
ap_ten_fourteen = df.loc[(df["Age"] >= 10) & (df["Age"] <= 14)]
ap_fift_nint = df.loc[(df["Age"] >= 15) & (df["Age"] <= 19)]
ap_twent_twentfour = df.loc[(df["Age"] >= 20) & (df["Age"] <= 24)]
ap_twentfive_twentnine = df.loc[(df["Age"] >= 25) & (df["Age"] <= 29)]
ap_thirty_thirtyfour = df.loc[(df["Age"] >= 30) & (df["Age"] <= 34)]
ap_thirtyfi_thirtynine = df.loc[(df["Age"] >= 35) & (df["Age"] <= 39)]
ap_fourty_plus = df.loc[(df["Age"] >= 40)]

# normalized 
n_under_ten = len(ap_under_ten["SN"].unique())
n_ten_fourteen = len(ap_ten_fourteen["SN"].unique())
n_fift_nint = len(ap_fift_nint["SN"].unique())
n_twent_twentfour = len(ap_twent_twentfour["SN"].unique())
n_twentfive_twentnine = len(ap_twentfive_twentnine["SN"].unique())
n_thirty_thirtyfour = len(ap_thirty_thirtyfour["SN"].unique())
n_thirtyfi_thirtynine = len(ap_thirtyfi_thirtynine["SN"].unique())
n_fourty_plus = len(ap_fourty_plus["SN"].unique())


# averages
av_under_ten = ap_under_ten["Price"].sum()/under_ten
av_ten_fourteen = ap_ten_fourteen["Price"].sum()/ten_fourteen
av_fift_nint = ap_fift_nint["Price"].sum()/fift_nint
av_twent_twentfour = ap_twent_twentfour["Price"].sum()/twent_twentfour
av_twentfive_twentnine = ap_twentfive_twentnine["Price"].sum()/twentfive_twentnine
av_thirty_thirtyfour = ap_thirty_thirtyfour["Price"].sum()/thirty_thirtyfour
av_thirtyfi_thirtynine = ap_thirtyfi_thirtynine["Price"].sum()/thirtyfi_thirtynine
av_fourty_plus = ap_fourty_plus["Price"].sum()/fourty_plus

# total purchase value
t_under_ten = ap_under_ten["Price"].sum()
t_ten_fourteen = ap_ten_fourteen["Price"].sum()
t_fift_nint = ap_fift_nint["Price"].sum()
t_twent_twentfour = ap_twent_twentfour["Price"].sum()
t_twentfive_twentnine = ap_twentfive_twentnine["Price"].sum()
t_thirty_thirtyfour = ap_thirty_thirtyfour["Price"].sum()
t_thirtyfi_thirtynine = ap_thirtyfi_thirtynine["Price"].sum()
t_fourty_plus = ap_fourty_plus["Price"].sum()


ap_analysis = pd.DataFrame(
        {   "Age": ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"],
            "Purchase Count": [under_ten, ten_fourteen, fift_nint, twent_twentfour, twentfive_twentnine,thirty_thirtyfour,thirtyfi_thirtynine,fourty_plus],
            "Average Purchase Price": [av_under_ten,av_ten_fourteen,av_fift_nint,av_twent_twentfour,av_twentfive_twentnine,av_thirty_thirtyfour,av_thirtyfi_thirtynine,av_fourty_plus],
           "Total Purchase Value": [t_under_ten,t_ten_fourteen,t_fift_nint,t_twent_twentfour,t_twentfive_twentnine,t_thirty_thirtyfour,t_thirtyfi_thirtynine, t_fourty_plus],
            "Normalized Totals": [n_under_ten,n_ten_fourteen,n_fift_nint,n_twent_twentfour,n_twentfive_twentnine,n_thirty_thirtyfour,n_thirtyfi_thirtynine,n_fourty_plus]
        }
    )
ap_analysis[["Age","Purchase Count", "Average Purchase Price"]]
new_ap_analysis = ap_analysis.set_index(['Age'])

del new_ap_analysis.index.name
new_ap_analysis
<style> .dataframe thead tr:only-child th { text-align: right; }
.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}
</style>
Average Purchase Price Normalized Totals Purchase Count Total Purchase Value
<10 3.019375 22 32 96.62
10-14 2.770000 23 35 96.95
15-19 2.905414 100 133 386.42
20-24 2.913006 259 336 978.77
25-29 2.962640 87 125 370.33
30-34 3.082031 47 64 197.25
35-39 2.842857 27 42 119.40
40+ 3.161765 11 17 53.75

Top Spenders

df2 = df.groupby('SN').sum()
df4 = df2.sort_values(["Price"], ascending=False)

# get presence of each SN
df3 = df["SN"].value_counts()


users = []
total_purchase_value = []
total_counts = []
average_purchase = []
df3.loc[users]

for x in range(5):
    users.append(df4.index[x])
    total_purchase_value.append(df4["Price"][x])
    total_counts.append(df3.loc[users][x])
    average_purchase.append(total_purchase_value[x]/total_counts[x])


top_analysis = pd.DataFrame(
        {   "SN": [x for x in users],
            "Purchase Count": [x for x in total_counts],
            "Average Purchase Price": [x for x in average_purchase],
            "Total Purchase Value": [x for x in total_purchase_value],
        }
    )
top_analysis[["SN","Purchase Count", "Average Purchase Price", "Total Purchase Value"]]

new_top_analysis = top_analysis.set_index(['SN'])

del new_top_analysis.index.name
new_top_analysis
<style> .dataframe thead tr:only-child th { text-align: right; }
.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}
</style>
Average Purchase Price Purchase Count Total Purchase Value
Undirrala66 3.412000 5 17.06
Saedue76 3.390000 4 13.56
Mindimnya67 3.185000 4 12.74
Haellysu29 4.243333 3 12.73
Eoda93 3.860000 3 11.58

Most Popular Items

# Most Popular Items
item_id = []
item_purch_count = []

# most popular item - find most popular items
items = df["Item ID"].value_counts()

#used to test information
df.loc[df["Item ID"] == 84][["Item Name","Price"]]

#get Item IDs
for i in range(5):
    item_id.append(items.index[i])

# find price of items
for i in range(5):
    counts = df.loc[df["Item ID"].unique() == item_id[i]]
    item_purch_count.append(counts["Price"])


#filters item id's that match the array of popular items
matches = df.loc[df["Item ID"].isin(item_id)][["Item Name","Price", "Item ID"]]

#gets purchase count and sum
grouped_matches = matches.groupby(["Item Name"]).count()
matches_price_sum = matches.groupby(["Item Name"]).sum()

popular_items = pd.DataFrame(
        {   "Item ID": [x for x in item_id],
            "Item Name": [x for x in grouped_matches.index],
            "Purchase Count": [x for x in grouped_matches["Price"]],
            "Item Price": [x for x in matches_price_sum["Price"]/grouped_matches["Price"]],
            "Total Purchase Value": [x for x in matches_price_sum["Price"]]
        }
    )
popular_items[["Item ID", "Item Name", "Purchase Count", "Item Price", "Total Purchase Value"]]

new_popular_items = popular_items.set_index(['Item Name'])

del new_popular_items.index.name
new_popular_items
<style> .dataframe thead tr:only-child th { text-align: right; }
.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}
</style>
Item ID Item Price Purchase Count Total Purchase Value
Arcane Gem 84 2.23 11 24.53
Betrayal, Whisper of Grieving Widows 39 2.35 11 25.85
Retribution Axe 31 4.14 9 37.26
Trickster 34 2.07 9 18.63
Woeful Adamantite Claymore 175 1.24 9 11.16

Most Profitable Items

p_item_id = []
p_item_purch_total = []

# most profitable item - find most popular items
p_items = df.groupby("Item ID").sum(ascending=True)
p_items_totals = p_items.sort_values(["Price"], ascending=False)

p_items_totals.iloc[0:5]["Price"].to_frame()

# #used to test information
# df.loc[df["Item ID"] == 84][["Item Name","Price"]]

#get Item IDs & purchase total
for i in range(5):
    p_item_id.append(p_items_totals.index[i])
    p_item_purch_total.append(p_items_totals["Price"].head(i))

p_item_id
p_item_purch_total


# #filters item id's that match the array of popular items
p_matches = df.loc[df["Item ID"].isin(p_item_id)][["Item Name","Price", "Item ID"]]


# #gets purchase count and sum
p_grouped_matches = p_matches.groupby(["Item Name"]).count()
p_matches_price_sum = p_matches.groupby(["Item Name"]).sum()

profitable_items = pd.DataFrame(
        {   "Item ID": [x for x in p_item_id],
            "Item Name": [x for x in p_grouped_matches.index],
            "Purchase Count": [x for x in p_grouped_matches["Price"]],
            "Item Price": [x for x in p_matches_price_sum["Price"]/p_grouped_matches["Price"]],
            "Total Purchase Value": [x for x in p_matches_price_sum["Price"]]
        }
    )
profitable_items[["Item ID", "Item Name", "Purchase Count", "Item Price", "Total Purchase Value"]]

new_profitable_items = profitable_items.set_index(['Item Name'])

del new_profitable_items.index.name
new_profitable_items
<style> .dataframe thead tr:only-child th { text-align: right; }
.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}
</style>
Item ID Item Price Purchase Count Total Purchase Value
Orenmir 34 4.95 6 29.70
Retribution Axe 115 4.14 9 37.26
Singed Scalpel 32 4.87 6 29.22
Spectral Diamond Doomblade 103 4.25 7 29.75
Splitter, Foe Of Subtlety 107 3.61 8 28.88

About

No description, website, or topics provided.

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

 
 
 

Contributors