Dependencies & Reading File

#import dependencies
import pandas as pd
import numpy as np

#import file
file = "Resources/purchase_data.json"

#read file
df = pd.read_json(file)
df.head()
# df.style.format({"Price": "${:.2f}"})

.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}

</style>

	Age	Gender	Item ID	Item Name	Price	SN
0	38	Male	165	Bone Crushing Silver Skewer	3.37	Aelalis34
1	21	Male	119	Stormbringer, Dark Blade of Ending Misery	2.32	Eolo46
2	34	Male	174	Primitive Blade	2.46	Assastnya25
3	21	Male	92	Final Critic	1.36	Pheusrical25
4	23	Male	63	Stormfury Mace	1.27	Aela59

Number of Players

#Player Count
total_players = pd.DataFrame(
    np.array(
        [
            [len(df["SN"].unique())], 
        ]))

total_players.columns = ["Total Players"]
total_players

.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}

</style>

	Total Players
0	573

Purchasing Analysis (Total)

total_purchase_amount_sum = df["Price"].sum()
average_price = total_purchase_amount_sum/len(df.index)
total_num_purchases = len(df.index)
total_rev = df["Price"].sum()


purchasing_analysis = pd.DataFrame(
    np.array(
        [
            [len(df["Item ID"].unique()), 
             average_price,
             total_num_purchases,
             total_rev
            ]
            
        ]))
purchasing_analysis.columns = ["Number of Unique Items", "Average Price", "Number of Purchases", "Total Revenue"]
purchasing_analysis.style.format({"Average Price": "${:.2f}", "Total Revenue":"${:.2f}"})
purchasing_analysis

.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}

</style>

	Number of Unique Items	Average Price	Number of Purchases	Total Revenue
0	183.0	2.931192	780.0	2286.33

Gender Demographics

# Count
gender = df["Gender"].value_counts()

#more precise, by SN 
new_gender = df.groupby("Gender")["SN"].unique()
len(new_gender["Female"])
len(new_gender["Male"])
len(new_gender["Other / Non-Disclosed"])


# Percentage
female_gender_percent = len(new_gender["Female"])/len(df["SN"].unique()) * 100
male_gender_percent = len(new_gender["Male"])/len(df["SN"].unique()) * 100
non_disclosed_gender = len(new_gender["Other / Non-Disclosed"])/len(df["SN"].unique()) * 100

percentage = pd.DataFrame(
        {   "Gender": ["Female", "Male", "Non Disclosed Gender"],
            "Percentage of Players": [female_gender_percent,male_gender_percent,non_disclosed_gender],
            "Total Count": [len(new_gender["Female"]),len(new_gender["Male"]),len(new_gender["Other / Non-Disclosed"])],
        }
    )
percentage[["Gender","Total Count","Percentage of Players"]]
new_percentage = percentage.set_index(['Gender'])

del new_percentage.index.name
new_percentage

.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}

</style>

	Percentage of Players	Total Count
Female	17.452007	100
Male	81.151832	465
Non Disclosed Gender	1.396161	8

Purchasing Analysis (Gender)

#Purchase Count Variables
filtered_f_purchase_price = df.loc[df["Gender"] == "Female"]
filtered_m_purchase_price = df.loc[df["Gender"] == "Male"]
filtered_n_purchase_price = df.loc[df["Gender"] == "Other / Non-Disclosed"]

# Purchase total
filtered_f_price_total = filtered_f_purchase_price["Price"].sum()
filtered_m_price_total = filtered_m_purchase_price["Price"].sum()
filtered_n_price_total = filtered_n_purchase_price["Price"].sum()

# Average price 
avg_purchase_f_price = filtered_f_price_total/gender["Female"]
avg_purchase_m_price = filtered_m_price_total/gender["Male"]
avg_purchase_n_price = filtered_n_price_total/gender["Other / Non-Disclosed"]

# normalized purchase
normalized_f_purchase = filtered_f_price_total/len(filtered_f_purchase_price["SN"].value_counts())
normalized_m_purchase = filtered_m_price_total/len(filtered_m_purchase_price["SN"].value_counts())
normalized_n_purchase = filtered_n_price_total/len(filtered_n_purchase_price["SN"].value_counts())


purchase_analysis = pd.DataFrame(
        {   "Gender": ["Female", "Male", "Non Disclosed Gender"],
            "Purchase Count": [gender["Female"],gender["Male"],gender["Other / Non-Disclosed"]],
            "Average Purchase Price": [avg_purchase_f_price,avg_purchase_m_price,avg_purchase_n_price],
            "Total Purchase Value": [filtered_f_price_total,filtered_m_price_total,filtered_n_price_total],
            "Normalized Totals": [normalized_f_purchase,normalized_m_purchase,normalized_n_purchase],
        }
    )
purchase_analysis[["Gender","Purchase Count","Average Purchase Price", "Total Purchase Value", "Normalized Totals"]]
new_purchase_analysis = purchase_analysis.set_index(['Gender'])

del new_purchase_analysis.index.name
new_purchase_analysis

---------------------------------------------------------------------------

ValueError                                Traceback (most recent call last)

<ipython-input-152-17120ae36a90> in <module>()
      1 #Purchase Count Variables
----> 2 filtered_f_purchase_price = df.loc[df["Gender"] == "Female" and df["SN"].unique()]
      3 filtered_m_purchase_price = df.loc[df["Gender"] == "Male"]
      4 filtered_n_purchase_price = df.loc[df["Gender"] == "Other / Non-Disclosed"]
      5 


/Users/Mariah/anaconda/lib/python3.6/site-packages/pandas/core/generic.py in __nonzero__(self)
    951         raise ValueError("The truth value of a {0} is ambiguous. "
    952                          "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
--> 953                          .format(self.__class__.__name__))
    954 
    955     __bool__ = __nonzero__


ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

Age Demographics

Count and Percentage

#Count
under_ten = len(df.loc[df["Age"] <= 10]) 
ten_fourteen = len(df.loc[(df["Age"] >= 10) & (df["Age"] <= 14)])
fift_nint = len(df.loc[(df["Age"] >= 15) & (df["Age"] <= 19)]) 
twent_twentfour = len(df.loc[(df["Age"] >= 20) & (df["Age"] <= 24)])
twentfive_twentnine = len(df.loc[(df["Age"] >= 25) & (df["Age"] <= 29)])
thirty_thirtyfour = len(df.loc[(df["Age"] >= 30) & (df["Age"] <= 34)])
thirtyfi_thirtynine = len(df.loc[(df["Age"] >= 35) & (df["Age"] <= 39)])
fourty_plus = len(df.loc[(df["Age"] >= 40)])

# Percentage
p_under_ten = under_ten/len(df.index) * 100
p_ten_fourteen = ten_fourteen/len(df.index) * 100
p_fift_nint = fift_nint/len(df.index) * 100
p_twent_twentfour = twent_twentfour/len(df.index) * 100
p_twentfive_twentnine = twentfive_twentnine/len(df.index) * 100
p_thirty_thirtyfour = thirty_thirtyfour/len(df.index) * 100
p_thirtyfi_thirtynine = thirtyfi_thirtynine/len(df.index) * 100
p_fourty_plus = fourty_plus/len(df.index) * 100

age_count_analysis = pd.DataFrame(
        {   "Age": ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"],
            "Percentage of Players": [p_under_ten, p_ten_fourteen, p_fift_nint, p_twent_twentfour, p_twentfive_twentnine, p_thirty_thirtyfour, p_thirtyfi_thirtynine, p_fourty_plus],
            "Total Count": [under_ten, ten_fourteen, fift_nint, twent_twentfour, twentfive_twentnine, thirty_thirtyfour,thirtyfi_thirtynine,fourty_plus],
        }
    )
age_count_analysis[["Age","Percentage of Players", "Total Count"]]
new_age_count_analysis = age_count_analysis.set_index(["Age"])

del new_age_count_analysis.index.name
new_age_count_analysis

.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}

</style>

	Percentage of Players	Total Count
<10	4.102564	32
10-14	4.487179	35
15-19	17.051282	133
20-24	43.076923	336
25-29	16.025641	125
30-34	8.205128	64
35-39	5.384615	42
40+	2.179487	17

Purchase Count, Average Purchase Price, Total Purchase Value, Normalized Totals

# average purchase price --> use len() w/o unique()
# general values to work with
ap_under_ten = df.loc[df["Age"] <= 10] 
ap_ten_fourteen = df.loc[(df["Age"] >= 10) & (df["Age"] <= 14)]
ap_fift_nint = df.loc[(df["Age"] >= 15) & (df["Age"] <= 19)]
ap_twent_twentfour = df.loc[(df["Age"] >= 20) & (df["Age"] <= 24)]
ap_twentfive_twentnine = df.loc[(df["Age"] >= 25) & (df["Age"] <= 29)]
ap_thirty_thirtyfour = df.loc[(df["Age"] >= 30) & (df["Age"] <= 34)]
ap_thirtyfi_thirtynine = df.loc[(df["Age"] >= 35) & (df["Age"] <= 39)]
ap_fourty_plus = df.loc[(df["Age"] >= 40)]

# normalized 
n_under_ten = len(ap_under_ten["SN"].unique())
n_ten_fourteen = len(ap_ten_fourteen["SN"].unique())
n_fift_nint = len(ap_fift_nint["SN"].unique())
n_twent_twentfour = len(ap_twent_twentfour["SN"].unique())
n_twentfive_twentnine = len(ap_twentfive_twentnine["SN"].unique())
n_thirty_thirtyfour = len(ap_thirty_thirtyfour["SN"].unique())
n_thirtyfi_thirtynine = len(ap_thirtyfi_thirtynine["SN"].unique())
n_fourty_plus = len(ap_fourty_plus["SN"].unique())


# averages
av_under_ten = ap_under_ten["Price"].sum()/under_ten
av_ten_fourteen = ap_ten_fourteen["Price"].sum()/ten_fourteen
av_fift_nint = ap_fift_nint["Price"].sum()/fift_nint
av_twent_twentfour = ap_twent_twentfour["Price"].sum()/twent_twentfour
av_twentfive_twentnine = ap_twentfive_twentnine["Price"].sum()/twentfive_twentnine
av_thirty_thirtyfour = ap_thirty_thirtyfour["Price"].sum()/thirty_thirtyfour
av_thirtyfi_thirtynine = ap_thirtyfi_thirtynine["Price"].sum()/thirtyfi_thirtynine
av_fourty_plus = ap_fourty_plus["Price"].sum()/fourty_plus

# total purchase value
t_under_ten = ap_under_ten["Price"].sum()
t_ten_fourteen = ap_ten_fourteen["Price"].sum()
t_fift_nint = ap_fift_nint["Price"].sum()
t_twent_twentfour = ap_twent_twentfour["Price"].sum()
t_twentfive_twentnine = ap_twentfive_twentnine["Price"].sum()
t_thirty_thirtyfour = ap_thirty_thirtyfour["Price"].sum()
t_thirtyfi_thirtynine = ap_thirtyfi_thirtynine["Price"].sum()
t_fourty_plus = ap_fourty_plus["Price"].sum()


ap_analysis = pd.DataFrame(
        {   "Age": ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"],
            "Purchase Count": [under_ten, ten_fourteen, fift_nint, twent_twentfour, twentfive_twentnine,thirty_thirtyfour,thirtyfi_thirtynine,fourty_plus],
            "Average Purchase Price": [av_under_ten,av_ten_fourteen,av_fift_nint,av_twent_twentfour,av_twentfive_twentnine,av_thirty_thirtyfour,av_thirtyfi_thirtynine,av_fourty_plus],
           "Total Purchase Value": [t_under_ten,t_ten_fourteen,t_fift_nint,t_twent_twentfour,t_twentfive_twentnine,t_thirty_thirtyfour,t_thirtyfi_thirtynine, t_fourty_plus],
            "Normalized Totals": [n_under_ten,n_ten_fourteen,n_fift_nint,n_twent_twentfour,n_twentfive_twentnine,n_thirty_thirtyfour,n_thirtyfi_thirtynine,n_fourty_plus]
        }
    )
ap_analysis[["Age","Purchase Count", "Average Purchase Price"]]
new_ap_analysis = ap_analysis.set_index(['Age'])

del new_ap_analysis.index.name
new_ap_analysis

.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}

</style>

	Average Purchase Price	Normalized Totals	Purchase Count	Total Purchase Value
<10	3.019375	22	32	96.62
10-14	2.770000	23	35	96.95
15-19	2.905414	100	133	386.42
20-24	2.913006	259	336	978.77
25-29	2.962640	87	125	370.33
30-34	3.082031	47	64	197.25
35-39	2.842857	27	42	119.40
40+	3.161765	11	17	53.75

Top Spenders

df2 = df.groupby('SN').sum()
df4 = df2.sort_values(["Price"], ascending=False)

# get presence of each SN
df3 = df["SN"].value_counts()


users = []
total_purchase_value = []
total_counts = []
average_purchase = []
df3.loc[users]

for x in range(5):
    users.append(df4.index[x])
    total_purchase_value.append(df4["Price"][x])
    total_counts.append(df3.loc[users][x])
    average_purchase.append(total_purchase_value[x]/total_counts[x])


top_analysis = pd.DataFrame(
        {   "SN": [x for x in users],
            "Purchase Count": [x for x in total_counts],
            "Average Purchase Price": [x for x in average_purchase],
            "Total Purchase Value": [x for x in total_purchase_value],
        }
    )
top_analysis[["SN","Purchase Count", "Average Purchase Price", "Total Purchase Value"]]

new_top_analysis = top_analysis.set_index(['SN'])

del new_top_analysis.index.name
new_top_analysis

.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}

</style>

	Average Purchase Price	Purchase Count	Total Purchase Value
Undirrala66	3.412000	5	17.06
Saedue76	3.390000	4	13.56
Mindimnya67	3.185000	4	12.74
Haellysu29	4.243333	3	12.73
Eoda93	3.860000	3	11.58

Most Popular Items

# Most Popular Items
item_id = []
item_purch_count = []

# most popular item - find most popular items
items = df["Item ID"].value_counts()

#used to test information
df.loc[df["Item ID"] == 84][["Item Name","Price"]]

#get Item IDs
for i in range(5):
    item_id.append(items.index[i])

# find price of items
for i in range(5):
    counts = df.loc[df["Item ID"].unique() == item_id[i]]
    item_purch_count.append(counts["Price"])


#filters item id's that match the array of popular items
matches = df.loc[df["Item ID"].isin(item_id)][["Item Name","Price", "Item ID"]]

#gets purchase count and sum
grouped_matches = matches.groupby(["Item Name"]).count()
matches_price_sum = matches.groupby(["Item Name"]).sum()

popular_items = pd.DataFrame(
        {   "Item ID": [x for x in item_id],
            "Item Name": [x for x in grouped_matches.index],
            "Purchase Count": [x for x in grouped_matches["Price"]],
            "Item Price": [x for x in matches_price_sum["Price"]/grouped_matches["Price"]],
            "Total Purchase Value": [x for x in matches_price_sum["Price"]]
        }
    )
popular_items[["Item ID", "Item Name", "Purchase Count", "Item Price", "Total Purchase Value"]]

new_popular_items = popular_items.set_index(['Item Name'])

del new_popular_items.index.name
new_popular_items

.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}

</style>

	Item ID	Item Price	Purchase Count	Total Purchase Value
Arcane Gem	84	2.23	11	24.53
Betrayal, Whisper of Grieving Widows	39	2.35	11	25.85
Retribution Axe	31	4.14	9	37.26
Trickster	34	2.07	9	18.63
Woeful Adamantite Claymore	175	1.24	9	11.16

Most Profitable Items

p_item_id = []
p_item_purch_total = []

# most profitable item - find most popular items
p_items = df.groupby("Item ID").sum(ascending=True)
p_items_totals = p_items.sort_values(["Price"], ascending=False)

p_items_totals.iloc[0:5]["Price"].to_frame()

# #used to test information
# df.loc[df["Item ID"] == 84][["Item Name","Price"]]

#get Item IDs & purchase total
for i in range(5):
    p_item_id.append(p_items_totals.index[i])
    p_item_purch_total.append(p_items_totals["Price"].head(i))

p_item_id
p_item_purch_total


# #filters item id's that match the array of popular items
p_matches = df.loc[df["Item ID"].isin(p_item_id)][["Item Name","Price", "Item ID"]]


# #gets purchase count and sum
p_grouped_matches = p_matches.groupby(["Item Name"]).count()
p_matches_price_sum = p_matches.groupby(["Item Name"]).sum()

profitable_items = pd.DataFrame(
        {   "Item ID": [x for x in p_item_id],
            "Item Name": [x for x in p_grouped_matches.index],
            "Purchase Count": [x for x in p_grouped_matches["Price"]],
            "Item Price": [x for x in p_matches_price_sum["Price"]/p_grouped_matches["Price"]],
            "Total Purchase Value": [x for x in p_matches_price_sum["Price"]]
        }
    )
profitable_items[["Item ID", "Item Name", "Purchase Count", "Item Price", "Total Purchase Value"]]

new_profitable_items = profitable_items.set_index(['Item Name'])

del new_profitable_items.index.name
new_profitable_items

.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}

</style>

	Item ID	Item Price	Purchase Count	Total Purchase Value
Orenmir	34	4.95	6	29.70
Retribution Axe	115	4.14	9	37.26
Singed Scalpel	32	4.87	6	29.22
Spectral Diamond Doomblade	103	4.25	7	29.75
Splitter, Foe Of Subtlety	107	3.61	8	28.88

Name		Name	Last commit message	Last commit date
Latest commit History 14 Commits
GamingData		GamingData
README.md		README.md
Trends.txt		Trends.txt

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Repository files navigation

Dependencies & Reading File

Number of Players

Purchasing Analysis (Total)

Gender Demographics

Purchasing Analysis (Gender)

Age Demographics

Count and Percentage

Purchase Count, Average Purchase Price, Total Purchase Value, Normalized Totals

Top Spenders

Most Popular Items

Most Profitable Items

About

Uh oh!

Releases

Packages

Uh oh!

Contributors

Uh oh!

Languages

Folders and files

Latest commit

History

Repository files navigation

Dependencies & Reading File

Number of Players

Purchasing Analysis (Total)

Gender Demographics

Purchasing Analysis (Gender)

Age Demographics

Count and Percentage

Purchase Count, Average Purchase Price, Total Purchase Value, Normalized Totals

Top Spenders

Most Popular Items

Most Profitable Items

About

Resources

Uh oh!

Stars

Watchers

Forks

Releases

Packages 0

Uh oh!

Contributors

Uh oh!

Languages

Packages