In [5]:
import pandas as pd

In [6]:
load_file = "../Resources/purchase_data.csv"
purchase_data = pd.read_csv(load_file)

In [7]:
purchase_data_df = pd.DataFrame(purchase_data)
purchase_data_df
purchase_data_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [34]:
#total number of unique players
tot_player = purchase_data_df["SN"].count()
tot_player

780

In [9]:
#Purchasing Analysis (Total)
#Run basic calculations to obtain number of unique items, average price, etc.
#Create a summary data frame to hold the results
#Optional: give the displayed data cleaner formatting
#Display the summary data frame

In [10]:
#unique items using len
unique_items = len(purchase_data_df["Item ID"].unique())
unique_items

179

In [11]:
#total revenue
total_revenue = purchase_data_df["Price"].sum()
total_revenue

2379.77

In [12]:
#total purchases
total_purchase = purchase_data_df["Purchase ID"].count()
total_purchase

780

In [13]:
#average price = total revenue / total purchases
average_price = total_revenue / total_purchase
average_price

3.0509871794871795

In [14]:
#round to two decimal places
average_price2 = average_price.round(2)
average_price2

3.05

In [15]:
#purchase anaylsis dataframe
purchase_analysis_pd = pd.DataFrame([{"Number of Unique Items": unique_items, 
                                    "Average Price": average_price2, 
                                    "Number of Purchases": total_purchase,
                                    "Total Revenue": total_revenue}])
purchase_analysis_pd

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,3.05,780,2379.77


In [16]:
#Gender Demographics
#Percentage and Count of Male Players
#Percentage and Count of Female Players
#Percentage and Count of Other / Non-Disclosed

In [67]:
#gender dataframe
gen_demographics = purchase_data.loc[:, ["Gender", "SN", "Age"]]
gen_demographics.head()

Unnamed: 0,Gender,SN,Age
0,Male,Lisim78,20
1,Male,Lisovynya38,40
2,Male,Ithergue48,24
3,Male,Chamassasya86,24
4,Male,Iskosia90,23


In [68]:
gen_demographics2 = gen_demographics.drop_duplicates()
gen_players = gen_demographics2.count()[0]
gen_players

576

In [69]:
gen_total_df = gen_demographics2["Gender"].value_counts()
gen_total_df

Male                     484
Female                    81
Other / Non-Disclosed     11
Name: Gender, dtype: int64

In [70]:
purchase_data_df = gen_demographics.drop_duplicates()
purchase_data_df

Unnamed: 0,Gender,SN,Age
0,Male,Lisim78,20
1,Male,Lisovynya38,40
2,Male,Ithergue48,24
3,Male,Chamassasya86,24
4,Male,Iskosia90,23
...,...,...,...
773,Male,Hala31,21
774,Male,Jiskjask80,11
775,Female,Aethedru70,21
777,Male,Yathecal72,20


In [71]:
gender_percentage_df = gen_demographics2["Gender"].value_counts() / gen_players * 100
gender_percentage_df

Male                     84.027778
Female                   14.062500
Other / Non-Disclosed     1.909722
Name: Gender, dtype: float64

In [72]:
gender_percentage2_df = (gender_percentage_df.round(2).astype(str) + '%')
gender_percentage2_df

Male                     84.03%
Female                   14.06%
Other / Non-Disclosed     1.91%
Name: Gender, dtype: object

In [73]:
gender_demo_df = pd.DataFrame({"Total Count": gen_total_df,
                               "Percentage of Player": gender_percentage2_df})
gender_demo_df

Unnamed: 0,Total Count,Percentage of Player
Male,484,84.03%
Female,81,14.06%
Other / Non-Disclosed,11,1.91%


In [74]:
#total count
tot_count = purchase_data.groupby(["Gender"]).count()["Price"]
tot_count

Gender
Female                   113
Male                     652
Other / Non-Disclosed     15
Name: Price, dtype: int64

In [75]:
#total purchase value
tot_purchase_total = purchase_data.groupby(["Gender"]).sum()["Price"]
tot_purchase_total

Gender
Female                    361.94
Male                     1967.64
Other / Non-Disclosed      50.19
Name: Price, dtype: float64

In [76]:
#average purchase price
avg_purchase_price = purchase_data.groupby(["Gender"]).mean()["Price"]
avg_purchase_price
avg_purchase_price = avg_purchase_price.round(2)
avg_purchase_price

Gender
Female                   3.20
Male                     3.02
Other / Non-Disclosed    3.35
Name: Price, dtype: float64

In [77]:
#avg Total Purchase per Person
avg_purchase_per_person = tot_purchase_total / gender_demo_df["Total Count"]
avg_purchase_per_person = avg_purchase_per_person.round(2)
avg_purchase_per_person

Female                   4.47
Male                     4.07
Other / Non-Disclosed    4.56
dtype: float64

In [78]:
#clean up data
clean_data = pd.DataFrame({"Purchase Count": tot_count,
                          "Average Purchase Price": avg_purchase_price,
                          "Total Purchase Price": tot_purchase_total,
                          "Averagre Total Purchase Per Person": avg_purchase_per_person})
clean_data

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Price,Averagre Total Purchase Per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,3.2,361.94,4.47
Male,652,3.02,1967.64,4.07
Other / Non-Disclosed,15,3.35,50.19,4.56


In [79]:
#age Demographics

In [80]:
#establish bins for ages
bins_age = [0, 9.90, 14.90, 19.90, 24.9, 29.9, 34.90, 39.90, 999]
group_names_range = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", ">40"]

#Use Cut to categorize players
purchase_data_df["Age"] = pd.cut(purchase_data_df["Age"], bins_age, labels=group_names_range)
purchase_data_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,Gender,SN,Age
0,Male,Lisim78,20-24
1,Male,Lisovynya38,>40
2,Male,Ithergue48,20-24
3,Male,Chamassasya86,20-24
4,Male,Iskosia90,20-24


In [87]:
#age range
age_totals = purchase_data_df["Age"].value_counts()
age_totals

20-24    258
15-19    107
25-29     77
30-34     52
35-39     31
10-14     22
<10       17
>40       12
Name: Age, dtype: int64

In [82]:
# percentage range
age_percents = age_totals / gen_players * 100
age_percents = age_percents.round(2)
age_percents

20-24    44.79
15-19    18.58
25-29    13.37
30-34     9.03
35-39     5.38
10-14     3.82
<10       2.95
>40       2.08
Name: Age Ranges, dtype: float64

In [83]:
#age data frame and sort 
age_and_percent_df = pd.DataFrame({"Total Count": age_totals,
                                  "Percentage of Players": age_percents})
age_and_percent_df = age_and_percent_df.sort_index()
age_and_percent_df

Unnamed: 0,Total Count,Percentage of Players
<10,17,2.95
10-14,22,3.82
15-19,107,18.58
20-24,258,44.79
25-29,77,13.37
30-34,52,9.03
35-39,31,5.38
>40,12,2.08


In [84]:
#purchasing Analysis (Age)
purchase_data_df

Unnamed: 0,Gender,SN,Age
0,Male,Lisim78,20-24
1,Male,Lisovynya38,>40
2,Male,Ithergue48,20-24
3,Male,Chamassasya86,20-24
4,Male,Iskosia90,20-24
...,...,...,...
773,Male,Hala31,20-24
774,Male,Jiskjask80,10-14
775,Female,Aethedru70,20-24
777,Male,Yathecal72,20-24
