# Heroes of PyMoli

## Summary of Analysis
    1. The typical customer/player is a male, 21-30 years old. Total revenue from this demographic was 896.33 USD
    
    2. This demographic was the only cohort to purchase the most expensive game at 4.99 USD
    
    3. The most profitable game was "Final Critic" (Item ID: 92) 
       Purchased 13 times and with a total revenue of 59.99 USD
       
    4. Female players spent more money on average per purchase
    
<img src="files/fantasy.png" style="width: 9000px">

In [None]:
#Import Dependencies
import pandas as pd

In [None]:
#Import file 
file = "Resources/purchase_data.csv"

In [None]:
data = pd.read_csv(file, encoding="ISO-8859-1")

# Game Analysis

In [None]:
#Total number of games
games = len(data["Item ID"].unique())

#Create a new data frame for determining game statistics
games_df = data.groupby(["Item ID", "Item Name", "Price"], as_index = False).count()

#Summary statistics for games
g_avg = games_df["Price"].mean()
g_max = games_df["Price"].max()
g_min = games_df["Price"].min()

#Create Summary Table
game_sum_table = pd.DataFrame({"Number of Games": games,
                               "Average Price": [g_avg],
                                "Highest Price": [g_max],
                                 "Lowest Price": [g_min]})

#Formatting of Summary Table
game_sum_table["Average Price"] = game_sum_table["Average Price"].map("${:.2f}".format)
game_sum_table["Highest Price"] = game_sum_table["Highest Price"].map("${:.2f}".format)
game_sum_table["Lowest Price"] = game_sum_table["Lowest Price"].map("${:.2f}".format)

game_sum_table

# Player Analysis


In [None]:
players = len(data["SN"].unique())
print("Total number of players: " + str(players))

## Gender Demographics

In [None]:
player_df = data.groupby(["SN", "Gender"], as_index = False).mean()

#Create total number of players variable
total = len(player_df["Gender"])
male = len(player_df.loc[player_df["Gender"] == "Male"])
female = len(player_df.loc[player_df["Gender"] == "Female"])
other = len(player_df.loc[player_df["Gender"] == "Other / Non-Disclosed"])

#Calculate Percentages
male_per = male / total
female_per = female / total
other_per = other / total

#Create DataFrame for Gender Statistics
player_per_df = pd.DataFrame({"Gender": ["Male", "Female", "Other / Non-Disclosed"],
                              "Total": [male, female, other],
                              "Percentage": [male_per, female_per, other_per]})

#Dataframe Formatting
player_per_df["Percentage"] = player_per_df["Percentage"].map("{:,.2%}".format)

player_per_df

## Purchasing Analysis (Gender)

In [None]:
#Create sex specific data frames
male_df = player_df.loc[player_df["Gender"] == "Male"]
female_df = player_df.loc[player_df["Gender"] == "Female"]
other_df = player_df.loc[player_df["Gender"] == "Other / Non-Disclosed"]

In [None]:
#Create complete dfs specific to gender
m_df = data.loc[data["Gender"] == "Male"]
f_df = data.loc[data["Gender"] == "Female"]
o_df = data.loc[data["Gender"] == "Other / Non-Disclosed"]

In [None]:
#Create male variable statistics of interest
m_total = m_df["Purchase ID"].count()
m_sum = m_df["Price"].sum()
m_avg_no = male_df["Price"].mean()
m_avg = m_df["Price"].mean()

In [None]:
#Create female variable statistics of interest
f_total = f_df["Purchase ID"].count()
f_sum = f_df["Price"].sum()
f_avg_no = female_df["Price"].mean()
f_avg = f_df["Price"].mean()

In [None]:
#Create other variable statistics of interest
o_total = o_df["Purchase ID"].count()
o_sum = o_df["Price"].sum()
o_avg_no = other_df["Price"].mean()
o_avg = o_df["Price"].mean()

In [None]:
#Create Summary Table
gender_sum_table = pd.DataFrame({"Gender": ["Male", "Female", "Other / Non-Disclosed"],
                                "Total Purchases": [m_total, f_total, o_total],
                                 "Avg No. of Purchases": [m_avg_no, f_avg_no, o_avg_no],
                                "Total Spent": [m_sum, f_sum, o_sum],
                               "Average Spent": [m_avg, f_avg, o_avg]})
gender_sum_table["Total Spent"] = gender_sum_table["Total Spent"].map("${:.2f}".format)
gender_sum_table["Average Spent"] = gender_sum_table["Average Spent"].map("${:.2f}".format)
gender_sum_table["Avg No. of Purchases"] = gender_sum_table["Avg No. of Purchases"].map("{:.2f}".format)
gender_sum_table

## Age Demographics

In [None]:
bins = [0,11, 21, 31, 41, 51]
labels = ["0 to 10", "11 to 20", "21 to 30", "31 to 40", "40 +"]

In [None]:
#Place the data series into a new column inside the DataFrame
data["Age Group"] = pd.cut(data["Age"], bins, labels=labels).astype('object')

In [None]:
age_df = data.groupby(["SN", "Age Group"], as_index = False).count()

#Create total number of players variable
total = len(age_df["Age Group"])
u10 = len(age_df.loc[age_df["Age Group"] == "0 to 10"])
u20 = len(age_df.loc[age_df["Age Group"] == "11 to 20"])
u30 = len(age_df.loc[age_df["Age Group"] == "21 to 30"])
u40 = len(age_df.loc[age_df["Age Group"] == "31 to 40"])
o40 = len(age_df.loc[age_df["Age Group"] == "40 +"])

#Calculate percent
u10_p = u10 / total
u20_p = u20 / total
u30_p = u30 / total
u40_p = u40 / total
o40_p = o40 / total

#Create DataFrame for Gender Statistics
age_per_df = pd.DataFrame({"Age Group": labels,
                              "Total": [u10, u20, u30, u40, o40],
                              "Percentage": [u10_p, u20_p, u30_p, u40_p, o40_p]})

#Dataframe Formatting
age_per_df["Percentage"] = age_per_df["Percentage"].map("{:,.2%}".format)
age_per_df


## Puchasing Analysis (Age)

In [None]:
#Group data into age groups
age_df2 = data.groupby(["Age Group"])

#Calculate summary statistics for age cohorts
age_count = age_df2["Price"].count()
tot_spent = age_df2["Price"].sum()
avg_spent = age_df2["Price"].mean()
max_spent = age_df2["Price"].max()
min_spent = age_df2["Price"].min()

#Create DataFrame
age_sum_df = pd.DataFrame({"Total Spent": tot_spent,
                          "Average Spent": avg_spent,
                          "Max Spent": max_spent,
                          "Min Spent": min_spent})

#Formatting
age_sum_df["Total Spent"] = age_sum_df["Total Spent"].map("${:.2f}".format)
age_sum_df["Average Spent"] = age_sum_df["Average Spent"].map("${:.2f}".format)
age_sum_df["Max Spent"] = age_sum_df["Max Spent"].map("${:.2f}".format)
age_sum_df["Min Spent"] = age_sum_df["Min Spent"].map("${:.2f}".format)
age_sum_df

# Top Spenders 

In [None]:
#Create an age and gender data frame
top_df = ((data.groupby(["Age Group", "Gender"], as_index=False)
         .sum()
         .sort_values(by="Price",ascending=False))[["Age Group", "Gender","Price"]])

#Formatting
top_df.rename(columns = {"Price": "Total Spent"}, inplace=True)
top_df["Total Spent"] = top_df["Total Spent"].map("${:.2f}".format)

#Print dataframe
top_df

In [None]:
#Find the most loyal customer
top_player = ((data.groupby(["SN", "Gender", "Age"], as_index=False)
              .sum()
              .drop_duplicates()
              .sort_values(by="Price", ascending=False)))

print("Top Individual Spender: " + top_player.iloc[0,0])
print("Demographics: " + str(top_player.iloc[0,1]) + ", " + str(top_player.iloc[0,2]) + " years old.")
print("Total Spent: $" + str(top_player.iloc[0,5]))


# Most Popular Items

In [None]:
#Purchase counts per game
game_count = data.groupby(["Item ID", "Item Name"]).count().reset_index()
game_sum = data.groupby(["Item ID", "Item Name"]).sum()

#Merge data frames 
g_df = pd.merge(game_count, game_sum, on="Item Name")[["Item ID", "Item Name", "Purchase ID_x", "Price_y"]]
g_df["Game Price"] = g_df["Price_y"]/g_df["Purchase ID_x"]
g_df = (g_df.sort_values(by="Price_y",ascending=False)
        .reset_index()[["Item ID", "Item Name", "Game Price", "Purchase ID_x", "Price_y"]])


#Formatting
g_df.rename(columns = {"Purchase ID_x": "Number of Purchases", "Price_y": "Total Revenue"}, inplace=True)
g_df["Total Revenue"] = g_df["Total Revenue"].map("${:.2f}".format)
g_df["Game Price"] = g_df["Game Price"].map("${:.2f}".format)

g_df.head()

# Most Profitable Items

In [None]:
print("Most Profitable Game: " + g_df.iloc[0,1] + " (Item ID: " + str(g_df.iloc[0,0]) + ")")
print("Number of Purchases: " + str(g_df.iloc[0,3]))
print("Total Revenue: " + str(g_df.iloc[0,4]))