# Heroes of Pymoli Data Analysis
* Observed Trend1
* Observed Trend2
* Observed Trend3

In [81]:
# Import Dependencies
import pandas as pd
import os
import mimetypes
from functools import reduce

# Path to resource files
path = "resources"

# Get a file name list
fileList = [f for f in os.listdir(path) if (mimetypes.guess_type(f)[0] == "application/json")]
# print(fileList)

def mergeDF(directory, fileArr):
    dfArr = []    
    if len(fileArr) > 1:
        for file in fileArr:
            dfArr.append(pd.read_json(os.path.join(directory, file)))
        df = reduce(lambda  left,right: pd.merge(left,right,how='outer'), dfArr)
    else:
        df = pd.read_json(os.path.join(directory, fileArr[0]))
    return df

# Create references to the json files and import it into a marged Pandas DataFrame
purchase_df = mergeDF(path, fileList)
# purchase_df

## Player Count

In [8]:
# Count the number of total players
totalCount = purchase_df["SN"].value_counts().count()
pd.DataFrame({"Total Players": [totalCount]})

Unnamed: 0,Total Players
0,612


## Purchasing Analysis (Total)

In [9]:
# Create a new df with formatting
purchasingAnalysis_df = pd.DataFrame({
    "Number of Unique Items":[purchase_df["Item ID"].value_counts().count()],
    "Average Purchase Price":["$" + str(round(purchase_df["Price"].mean(),2))],
    "Total Number of Purchases":[len(purchase_df.index)],
    "Total Revenue":["$" + str(round(purchase_df["Price"].sum(),2))]
})

# Output
purchasingAnalysis_df[[
    "Number of Unique Items",
    "Average Purchase Price",
    "Total Number of Purchases",
    "Total Revenue"
]]

Unnamed: 0,Number of Unique Items,Average Purchase Price,Total Number of Purchases,Total Revenue
0,184,$2.93,858,$2514.43


## Gender Demographics

In [10]:
# Create a new df removed duplicated "SN"
uniquePlayers_df = purchase_df[~purchase_df["SN"].duplicated()]
# uniquePlayers_df

# Group by gender
gender = uniquePlayers_df.groupby("Gender")

# Create a new df
gender_df = pd.DataFrame({
    "Percentage of Players": gender["Age"].count()/totalCount,
    "Total Count":gender["Age"].count()
})

# Format
gender_df["Percentage of Players"] = gender_df["Percentage of Players"].map("{:.2%}".format)

# Sort and Output
gender_df.sort_values(by="Total Count",ascending=False)

Unnamed: 0_level_0,Percentage of Players,Total Count
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Male,80.88%,495
Female,17.65%,108
Other / Non-Disclosed,1.47%,9


## Purchasing Analysis (Gender)

In [11]:
# Create a new df
purchasing_gender_df = pd.DataFrame({
    "Purchase Count": gender_df["Total Count"],
    "Average Purchase Price":gender["Price"].mean(),
    "Total Purchase Value":gender["Price"].sum(),
    "Normalized Totals":gender["Price"].mean()*gender["Price"].std()
})

#Format and Output
purchasing_gender_df[[
    "Purchase Count",
    "Average Purchase Price",
    "Total Purchase Value",
    "Normalized Totals"
]].style.format({
    "Average Purchase Price":"${:.2f}",
    "Total Purchase Value":"${:.2f}",
    "Normalized Totals":"${:.2f}"
})

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Totals
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,108,$2.93,$316.62,$3.30
Male,495,$3.00,$1486.37,$3.31
Other / Non-Disclosed,9,$3.30,$29.70,$2.79


## Age Demographics

In [13]:
# Set bins and labels
# print(purchase_df["Age"].min())
# print(purchase_df["Age"].max())
bins = [0,10,15,20,25,30,35,40,45]
group_labels = ["< 10","10 - 14","15 - 19","20 - 24","25 - 29","30 - 34","35 - 39","40 - 45"]
uniquePlayers_df["ageRange"] = pd.cut(uniquePlayers_df["Age"],bins,labels=group_labels)
# uniquePlayers_df.head()

# Group by ageRange
uniquePlayers_ageRange = uniquePlayers_df.groupby("ageRange")

# Format and Output
pd.DataFrame({
    "Percentage of Players":uniquePlayers_ageRange["Age"].count()/totalCount,
    "Total Count": uniquePlayers_ageRange["Age"].count()
}).style.format({
    "Percentage of Players": "{:.2%}"
})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0_level_0,Percentage of Players,Total Count
ageRange,Unnamed: 1_level_1,Unnamed: 2_level_1
< 10,3.92%,24
10 - 14,9.31%,57
15 - 19,24.51%,150
20 - 24,40.20%,246
25 - 29,8.99%,55
30 - 34,7.84%,48
35 - 39,4.74%,29
40 - 45,0.49%,3


## Purchasing Analysis (Age)

In [14]:
# Create a new df
purchaseAnal_ageRange = pd.DataFrame({
    "Purchase Count": uniquePlayers_ageRange["Age"].count(),
    "Average Purchase Price": uniquePlayers_ageRange["Price"].mean(),
    "Total Purchase Value": uniquePlayers_ageRange["Price"].sum(),
    "Normalized Totals": uniquePlayers_ageRange["Price"].mean()*uniquePlayers_ageRange["Price"].std()
})

# Format and Output
purchaseAnal_ageRange[[
    "Purchase Count",
    "Average Purchase Price",
    "Total Purchase Value",
    "Normalized Totals"
]].style.format({
    "Average Purchase Price":"${:.2f}",
    "Total Purchase Value":"${:.2f}",
    "Normalized Totals":"${:.2f}"
})

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Totals
ageRange,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
< 10,24,$3.27,$78.39,$3.67
10 - 14,57,$2.90,$165.16,$3.12
15 - 19,150,$2.92,$437.66,$3.30
20 - 24,246,$3.01,$740.78,$3.36
25 - 29,55,$2.90,$159.72,$3.10
30 - 34,48,$3.24,$155.63,$3.44
35 - 39,29,$2.99,$86.71,$3.16
40 - 45,3,$2.88,$8.64,$2.48


## Top Spenders

In [15]:
# Group by SN
purchase_bySN = purchase_df.groupby("SN")

# Create a new df
topSpenders_df = pd.DataFrame({
    "Purchase Count" : purchase_bySN["Item ID"].count(),
    "Average Purchase Price": purchase_bySN["Price"].mean(),
    "Total Purchase Value": purchase_bySN["Price"].sum()
})

# Sort, Format, and Output
topSpenders_df[[
    "Purchase Count",
    "Average Purchase Price",
    "Total Purchase Value"
]].sort_values(
    by=["Purchase Count", "Average Purchase Price"],
    ascending=False
).head().style.format({
    "Average Purchase Price":"${:.2f}",
    "Total Purchase Value":"${:.2f}"
})

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Undirrala66,5,$3.41,$17.06
Aerithllora36,4,$3.77,$15.10
Saedue76,4,$3.39,$13.56
Sondim43,4,$3.25,$13.02
Mindimnya67,4,$3.18,$12.74


## Most Popular Items

In [16]:
# Group by Item ID and Item Name
purchase_byItem = purchase_df.groupby(["Item ID","Item Name"])

# Create a new df
topItem_df = pd.DataFrame({
    "Purchase Count":purchase_byItem["SN"].count(),
    "Item Price":purchase_byItem["Price"].mean(),
    "Total Purchase Value":purchase_byItem["Price"].sum()
})

# Sort by Purchase Count, Format, and Output
topItem_df[[
    "Purchase Count",
    "Item Price","Total Purchase Value"
]].sort_values(
    by=["Purchase Count", "Total Purchase Value"],
    ascending=False
).head().style.format({
    "Item Price": "${:.2f}",
    "Total Purchase Value": "${:.2f}"
})

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
84,Arcane Gem,12,$2.45,$29.34
39,"Betrayal, Whisper of Grieving Widows",11,$2.35,$25.85
31,Trickster,10,$2.32,$23.22
34,Retribution Axe,9,$4.14,$37.26
107,"Splitter, Foe Of Subtlety",9,$3.67,$33.03


## Most Profitable Items

In [17]:
# Sort by Total Purchase Value, Format and Output
topItem_df[[
    "Purchase Count",
    "Item Price",
    "Total Purchase Value"
]].sort_values(
    by=["Total Purchase Value", "Item Price"],
    ascending=False
).head().style.format({
    "Item Price": "${:.2f}",
    "Total Purchase Value": "${:.2f}"
})

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
34,Retribution Axe,9,$4.14,$37.26
107,"Splitter, Foe Of Subtlety",9,$3.67,$33.03
115,Spectral Diamond Doomblade,7,$4.25,$29.75
32,Orenmir,6,$4.95,$29.70
84,Arcane Gem,12,$2.45,$29.34
