### Heroes Of Pymoli Data Analysis
* Of the 1163 active players, the vast majority are male (84%). There also exists, a smaller, but notable proportion of female players (14%).

* Our peak age demographic falls between 20-24 (44.8%) with secondary groups falling between 15-19 (18.60%) and 25-29 (13.4%).  
-----

In [1]:
# importing dependencies
import pandas as pd

#reading csv file with pandas
path = "Resources/purchase_data.csv"
purchases_df = pd.read_csv(path)
purchases_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


## Player Count

* Total number of players

In [10]:
purchase_per_player = purchases_df["SN"].value_counts()
total_player_count = purchase_per_player.count()
total_player_count

576

## Purchasing Analysis (Total)

* Number of unique items
* Average price
* Number of purchases
* Total revenue

In [116]:
# calculating stats
unique_item_count = len(purchases_df["Item Name"].unique())
average_price = purchases_df["Price"].mean()
purchase_count = purchases_df["Purchase ID"].count()
total_revenue = purchases_df["Price"].sum()

# creating dataframe
purchasing_analysis = pd.DataFrame({
    "Item Count":[unique_item_count],
    "Average Price": [average_price],
    "Number of Purchases":[purchase_count],
    "Total Revenue":[total_revenue]
})

# formatting
purchasing_analysis["Average Price"] = purchasing_analysis["Average Price"].map("${:.2f}".format)
purchasing_analysis["Total Revenue"] = purchasing_analysis["Total Revenue"].map("${:,.2f}".format)

# showing dataframe
purchasing_analysis

Unnamed: 0,Item Count,Average Price,Number of Purchases,Total Revenue
0,179,$3.05,780,"$2,379.77"


## Gender Demographics

* Percentage and count of male players
* Percentage and count of female players
* Percentage and count of other / non-disclosed

In [113]:
# calculating percentage and count by gender
player_demographics = purchases_df.loc[:, ["SN", "Gender", "Age"]]
player_demographics = player_demographics.drop_duplicates()
player_count = player_demographics["SN"].count()

gender_count = player_demographics["Gender"].value_counts().rename("Total Count")
percentage_gender = round(gender_count/player_demographics["SN"].count()*100,2).rename("% of Total Players")
merged_gender_demo = pd.concat([gender_count, percentage_gender], axis=1)

#formatting
merged_gender_demo["% of Total Players"] = merged_gender_demo["% of Total Players"].map("{:.2f}%".format)

# showing dataframe
merged_gender_demo

Unnamed: 0,Total Count,% of Total Players
Male,484,84.03%
Female,81,14.06%
Other / Non-Disclosed,11,1.91%


## Purchasing Analysis (Gender)

* Purchase count by gender
* Average purchase price by gender
* Total purchase value by gender
* Average purchase total per person by gender

In [173]:
# calculating values
groupby_gender = purchases_df.groupby("Gender")

purchase_count = groupby_gender["Price"].count().rename("Purchase Count")
avg_purchase_price = groupby_gender["Price"].mean().rename("Average Purchase Price")
total_purchase = groupby_gender["Price"].sum().rename("Total Revenue")

avg_total_perperson = total_purchase / merged_gender_demo["Total Count"]

# Creating df
gender_purchase_analysis = pd.DataFrame({"Purchase Count":purchase_count,
                                        "Average Purchase Price": avg_purchase_price,
                                        "Total Revenue":total_purchase,
                                        "Normalized Average Purchase Total":avg_total_perperson})

#formatting
gender_purchase_analysis["Average Purchase Price"] = gender_purchase_analysis["Average Purchase Price"].map("${:.2f}".format)
gender_purchase_analysis["Normalized Average Purchase Total"] = gender_purchase_analysis["Normalized Average Purchase Total"].map("${:.2f}".format)
gender_purchase_analysis["Total Revenue"] =gender_purchase_analysis["Total Revenue"].map("${:.2f}".format)

# display dataframe
gender_purchase_analysis

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Revenue,Normalized Average Purchase Total
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,$3.20,$361.94,$4.47
Male,652,$3.02,$1967.64,$4.07
Other / Non-Disclosed,15,$3.35,$50.19,$4.56


## Age Demographics

* Numbers and percentages for each age group

In [120]:
#age statistics
player_demographics["Age"].describe()

count    576.000000
mean      22.741319
std        6.838568
min        7.000000
25%       19.000000
50%       22.000000
75%       25.000000
max       45.000000
Name: Age, dtype: float64

In [157]:
# Age bins
age_bins = [0, 9.9, 14.9, 19.9, 24.9, 29.9, 39.9, 100]
age_labels = ["Children (0-9)", "Adolescents (10-14)", "Teens (15-19)", "Young Adults (20-24)", "Almost Adults (25-29)", "Adults (30-39)", "Elder Adults (40+)"]

# Categorize players using age bins
age_demographics = player_demographics
age_demographics["Age Group"] = pd.cut(age_demographics["Age"], age_bins, labels=age_labels)

# calculating number and percentages of each group
grouped_age_demo = age_demographics.groupby("Age Group")

count_age_group = grouped_age_demo["Age"].count().rename("Total Players")
percentage_age_group = count_age_group/player_count*100

# creating dataframe
age_analysis = pd.DataFrame({"Total Players":count_age_group,
                           "% of Players":percentage_age_group})

# formatting
age_analysis["% of Players"] = age_analysis["% of Players"].map("{:.2f}%".format)


# display dataframe
age_analysis


Unnamed: 0_level_0,Total Players,% of Players
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1
Children (0-9),17,2.95%
Adolescents (10-14),22,3.82%
Teens (15-19),107,18.58%
Young Adults (20-24),258,44.79%
Almost Adults (25-29),77,13.37%
Adults (30-39),83,14.41%
Elder Adults (40+),12,2.08%


## Purchasing Analysis (Age)

* Total purchase count by age
* Total purchase value by age
* Average purchase price by age
* Average purchase total per person by age group

In [165]:
# Purchase data into age bins
age_purchase_analysis = purchases_df
age_purchase_analysis["Age Ranges"] = pd.cut(age_purchase_analysis["Age"], age_bins, labels=age_labels)

# calculating
grouped_age_ranges = age_purchase_analysis.groupby("Age Ranges")

age_total_purchase_count = grouped_age_ranges["SN"].count().rename("Total Players")
age_total_purchase_value = grouped_age_ranges["Price"].sum().rename("Total Revenue")
age_avg_price = grouped_age_ranges["Price"].mean().rename("Average Purchase Total")

age_avg_purchase_perperson = age_total_purchase_value/age_analysis["Total Players"].rename("Normalized Average Purchase Total")

# creating dataframe
age_purchasing_analysis = pd.DataFrame({"Total Players": age_total_purchase_count,
                                       "Total Revenue": age_total_purchase_value,
                                        "Average Purchase Total": age_avg_price,
                                        "Normalized Average Purchase Total":age_avg_purchase_perperson
                                       })

# formating 
age_purchasing_analysis["Total Revenue"] = age_purchasing_analysis["Total Revenue"].map("${:.2f}".format)
age_purchasing_analysis["Average Purchase Total"] = age_purchasing_analysis["Average Purchase Total"].map("${:.2f}".format)
age_purchasing_analysis["Normalized Average Purchase Total"] = age_purchasing_analysis["Normalized Average Purchase Total"].map("${:.2f}".format)

# displaying dataframe
age_purchasing_analysis

Unnamed: 0_level_0,Total Players,Total Revenue,Average Purchase Total,Normalized Average Purchase Total
Age Ranges,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Children (0-9),23,$77.13,$3.35,$4.54
Adolescents (10-14),28,$82.78,$2.96,$3.76
Teens (15-19),136,$412.89,$3.04,$3.86
Young Adults (20-24),365,$1114.06,$3.05,$4.32
Almost Adults (25-29),101,$293.00,$2.90,$3.81
Adults (30-39),114,$361.67,$3.17,$4.36
Elder Adults (40+),13,$38.24,$2.94,$3.19


## Top Spenders

* Purchase count
* Average purchase price
* Total purchase value

In [189]:
# Calculations
groupby_sn = purchases_df.groupby("SN")

top_purchase_count = purchases_df["SN"].value_counts()
top_average_price = groupby_sn["Price"].mean()
top_total_purchase = groupby_sn["Price"].sum()

# creating dataframe
top_spenders = pd.DataFrame({"Purchase Count":top_purchase_count,
                            "Average Purchase Price":top_average_price,
                            "Total Purchases":top_total_purchase})

# Sorting total purchase value (descending order)
top_spenders = top_spenders.sort_values("Total Purchases", ascending=False)

# formatting
top_spenders["Average Purchase Price"] = top_spenders["Average Purchase Price"].map("${:.2f}".format)
top_spenders["Total Purchases"] = top_spenders["Total Purchases"].map("${:.2f}".format)

# displaying dataframe
top_spenders

Unnamed: 0,Purchase Count,Average Purchase Price,Total Purchases
Lisosia93,5,$3.79,$18.96
Idastidru52,4,$3.86,$15.45
Chamjask73,3,$4.61,$13.83
Iral74,4,$3.40,$13.62
Iskadarya95,3,$4.37,$13.10
...,...,...,...
Ililsasya43,1,$1.02,$1.02
Irilis75,1,$1.02,$1.02
Aidai61,1,$1.01,$1.01
Chanirra79,1,$1.01,$1.01


## Most Popular Items

* Purchase count
* Item price
* Total purchase value

In [190]:
purchases_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price,Age Ranges
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53,Young Adults (20-24)
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56,Elder Adults (40+)
2,2,Ithergue48,24,Male,92,Final Critic,4.88,Young Adults (20-24)
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27,Young Adults (20-24)
4,4,Iskosia90,23,Male,131,Fury,1.44,Young Adults (20-24)


In [236]:
# from purchase dataframe retrieving the Item ID, Item Name, and Item Price columns
items_df = purchases_df[["Item ID", "Item Name", "Price"]]

# calculating and grouping
groupby_items = items_df.groupby("Item Name")

item_purchase_count = items_df["Item Name"].value_counts()
item_prices = groupby_items["Price"].mean()
item_total_purchase_value = groupby_items["Price"].sum()


# creating dataframe
popular_items = pd.DataFrame({"Purchase Count":item_purchase_count,
                             "Price":item_prices,
                              "Total Purchase Value":item_total_purchase_value})

# sorting purchase count (descending order)
sorted_popular_items = popular_items.sort_values("Purchase Count", ascending=False)

# formatting
sorted_popular_items["Price"] = sorted_popular_items["Price"].map("${:.2f}".format)
sorted_popular_items["Total Purchase Value"] = sorted_popular_items["Total Purchase Value"].map("${:.2f}".format)



# displaying dataframe
sorted_popular_items

Unnamed: 0,Purchase Count,Price,Total Purchase Value
Final Critic,13,$4.61,$59.99
"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
Persuasion,9,$3.22,$28.99
Nirvana,9,$4.90,$44.10
"Extraction, Quickblade Of Trembling Hands",9,$3.53,$31.77
...,...,...,...
"Riddle, Tribute of Ended Dreams",1,$3.30,$3.30
Betrayer,1,$2.94,$2.94
The Decapitator,1,$1.75,$1.75
Gladiator's Glaive,1,$1.93,$1.93


## Most Profitable Items

* Purchase count
* Item price
* Total purchase value

In [239]:
# sorting above df by total purchase value (descending order)
most_profitable = popular_items.sort_values("Total Purchase Value", ascending=False)

# formatting
most_profitable["Price"] = most_profitable["Price"].map("${:.2f}".format)
most_profitable["Total Purchase Value"] = most_profitable["Total Purchase Value"].map("${:.2f}".format)

# displaying dataframe
most_profitable

Unnamed: 0,Purchase Count,Price,Total Purchase Value
Final Critic,13,$4.61,$59.99
"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
Nirvana,9,$4.90,$44.10
Fiery Glass Crusader,9,$4.58,$41.22
Singed Scalpel,8,$4.35,$34.80
...,...,...,...
"Flux, Destroyer of Due Diligence",2,$1.06,$2.12
Whistling Mithril Warblade,2,$1.00,$2.00
Exiled Mithril Longsword,1,$2.00,$2.00
Gladiator's Glaive,1,$1.93,$1.93
