In [1]:
#Import and read in the CSV file
import pandas as pd

csv_file='../Resources/purchase_data.csv'

pymoli_data_df=pd.read_csv(csv_file)

pymoli_data_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [2]:
#Display the total number of players
total_players=pymoli_data_df["SN"].value_counts()
total_players=(len(total_players))

#Create a dataframe to display the values
player_df=pd.DataFrame([{"Total Players": total_players}])
player_df

Unnamed: 0,Total Players
0,576


In [3]:
#Purchasing analysis

#Calculate total unique items
unique_items=pymoli_data_df["Item ID"].value_counts()
total_items=len(unique_items)

# Calculate average price
average_price=round(pymoli_data_df["Price"].mean(), 2)

# Calculate total purchases
number_purchases=pymoli_data_df["Purchase ID"].count()

# Calculate total revenue
total_revenue=round(pymoli_data_df["Price"].sum(), 2)


#Create summary dataframe to display values
summary_df=pd.DataFrame([{"Number of Unique Items": total_items, 
                         "Average Price": average_price, "Number of Purchases": number_purchases, 
                         "Total Revenue": total_revenue}])
summary_df

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,3.05,780,2379.77


In [4]:
#Gender Demographics

#Calculate totoal counts per Gender, remove duplicates
gender_df=pymoli_data_df[["Gender", "SN"]]
gender_df=gender_df.drop_duplicates(["Gender", "SN"])
gender_df=gender_df.groupby("Gender").count()

#Calculate percent of players by Gender and sort dataframe in decending order to match solution
gender_df["Percent of Players"]=round(gender_df["SN"]/total_players *100, 2)
gender_df=gender_df.sort_values("SN", ascending=False)
gender_df

Unnamed: 0_level_0,SN,Percent of Players
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Male,484,84.03
Female,81,14.06
Other / Non-Disclosed,11,1.91


In [5]:
#Purchasing Analysis by Gender

#Retrieve columns for analysis
purchase_df=pymoli_data_df[["Gender","Purchase ID", "Price"]]

#Groupby Gender and calculate purchase total
purchase_total=purchase_df.groupby("Gender").count()["Purchase ID"]

#Calculate average purchase price per Gender
average_price=purchase_df.groupby("Gender").mean()["Price"]

#Calculate total purchase price per Gender
total_purchase=purchase_df.groupby("Gender").sum()["Price"]

#Calculate average total price per Gender
average_total=total_purchase/gender_df["SN"]

#Create dataframe to display values
new_df=pd.DataFrame({
    "Purchase Total": purchase_total, 
    "Average Purchase Price": average_price, 
    "Total Purchase Value": total_purchase,
    "Avg Total Per Person": average_total
})

#Print DataFrame
new_df

Unnamed: 0_level_0,Purchase Total,Average Purchase Price,Total Purchase Value,Avg Total Per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,3.203009,361.94,4.468395
Male,652,3.017853,1967.64,4.065372
Other / Non-Disclosed,15,3.346,50.19,4.562727


In [122]:
#Age Demographics

#Bin data by age-completed with help from tutor
bins=[0,9.5,14.5, 19.5, 24.5, 29.5, 34.5, 39.5, 40]
group_names=["<10", "10-14", "15-19", "20-24","25-29", "30-24", "35-39", "40+"]

#Remove duplicates for calculating age group counts and create column for age ranges
updated_df=pymoli_data_df.drop_duplicates(["SN"])
updated_df["Age Ranges"]=pd.cut(pymoli_data_df["Age"], bins, labels=group_names)

#Groupby new column to get age range values
age_groups=updated_df.groupby(["Age Ranges"])
age_group_totals=age_groups["SN"].count()

#calculate age range percents
age_range_percents=round(age_group_totals/total_players*100,2)


#Create new dataframe presenting the age demographics
age_demo_df=pd.DataFrame({"Total Count": age_group_totals,
                     "Percentage of Players": age_range_percents})

#Display DataFrame
age_demo_df




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


Unnamed: 0_level_0,Total Count,Percentage of Players
Age Ranges,Unnamed: 1_level_1,Unnamed: 2_level_1
<10,17,2.95
10-14,22,3.82
15-19,107,18.58
20-24,258,44.79
25-29,77,13.37
30-24,52,9.03
35-39,31,5.38
40+,5,0.87


In [136]:
#Purchasing Analysis by age

#Bin data by age
bins=[0,9.5,14.5, 19.5, 24.5, 29.5, 34.5, 39.5, 40]
group_names=["<10", "10-14", "15-19", "20-24","25-29", "30-24", "35-39", "40+"]
pymoli_data_df["Age Ranges"]=pd.cut(pymoli_data_df["Age"], bins, labels=group_names)

#Calculate purchase count
purchase_count=pymoli_data_df.groupby(["Age Ranges"]).count()["Purchase ID"]
#print(purchase_count)

#Calculate average price
avg_price_age=round(pymoli_data_df.groupby(["Age Ranges"]).mean()["Price"],2)
#print(avg_price_age)

#Calculate total purchase price
total_price_age=round(pymoli_data_df.groupby(["Age Ranges"]).sum()["Price"],2)
#print(total_price_age)


#Create dataframe to display values
purchase_analysis_age_df=pd.DataFrame({"Purchase Count": purchase_count,
                                      "Average Purchase Price": avg_price_age,
                                      "Total Purchase Value": total_price_age})
#Display DataFrame
purchase_analysis_age_df

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
Age Ranges,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
<10,23,3.35,77.13
10-14,28,2.96,82.78
15-19,136,3.04,412.89
20-24,365,3.05,1114.06
25-29,101,2.9,293.0
30-24,73,2.93,214.0
35-39,41,3.6,147.67
40+,6,2.78,16.71


In [147]:
#Top Spenders

#Use groupby to obtain the top spenders
group_spenders=pymoli_data_df.groupby("SN")

#Calculate purchase count per spender
spender_count=group_spenders.count()["Purchase ID"]

#Calculate average purchase price per spender
spender_average=round(group_spenders.mean()["Price"],2)

#Calculate total purchase per spender
spender_total=round(group_spenders.sum()["Price"], 2)


#Create new dataframe
top_spender_df = pd.DataFrame({"Purchase Count":spender_count,
                            "Average Purchase Price": spender_average,
                            "Total Purchase Price": spender_total
                            })

#Sort values in descending order per Total Purchase Price
sorted_spender_df= top_spender_df.sort_values("Total Purchase Price", ascending=False)

#Display Dataframe
sorted_spender_df.head()






Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Price
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,3.79,18.96
Idastidru52,4,3.86,15.45
Chamjask73,3,4.61,13.83
Iral74,4,3.4,13.62
Iskadarya95,3,4.37,13.1


In [222]:
#Popular Items

#Retrieve the Item ID, Item Name, and Item Price columns
item_data=pymoli_data_df[["Item ID", "Item Name", "Price"]]

#Groupby Item ID and Item Name
grouped_item_data=item_data.groupby(["Item ID","Item Name"])

#Calculate purchase count per groups
item_purchase_count=grouped_item_data.count()["Price"]

#Calculate item price per groups
total_item_price=grouped_item_data.sum()["Price"]

item_price=round(total_item_price/item_purchase_count,2)

item_df=pd.DataFrame({"Purchase Count": item_purchase_count,
                      "Item Price": item_price,
                     "Total Purchase Value":total_item_price})


#Sort dataframe in descending order per Purchase Count
sorted_item_df= item_df.sort_values("Purchase Count", ascending=False)

#Display DataFrame
sorted_item_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,4.61,59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,4.23,50.76
145,Fiery Glass Crusader,9,4.58,41.22
132,Persuasion,9,3.22,28.99
108,"Extraction, Quickblade Of Trembling Hands",9,3.53,31.77


In [223]:
#Sort dataframe by Total Purchase price in descending order per Total Purchase Value
sorted_purchase_price_df=sorted_item_df.sort_values("Total Purchase Value", ascending=False)

#Display Dataframe
sorted_purchase_price_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,4.61,59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,4.23,50.76
82,Nirvana,9,4.9,44.1
145,Fiery Glass Crusader,9,4.58,41.22
103,Singed Scalpel,8,4.35,34.8


In [None]:
Wriiten Analysis:
Based on the analysis of the age demographics, we can conclude that most individuals who play Heroes of Pymoli fall between the age of 20-24.
Based on gender demographics, we can conclude that there are significantly more male players than female players playing Heroes of Pymoli.
Final Critic is the most bought item by players.
