In [56]:
import pandas as pd
import numpy as np

In [2]:
# LOAD FILE INTO PROGRAM " 'name' = 'LOC' "
file_to_load = "../Resources/purchase_data.csv"

In [103]:
# READ CSV FILE 
purchase_data_df = pd.read_csv(file_to_load)

#PRINT TABLE
purchase_data_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [104]:
# COUNT HOW MANY PLAYERS THERE ARE IN THE DATA
total_players = purchase_data_df.count()
total_players.head()

Purchase ID    780
SN             780
Age            780
Gender         780
Item ID        780
dtype: int64

In [105]:
# CREATE NEW TABLE while including data from 'purchase_data_df.count()'
list_players = pd.DataFrame({
    "Total of Players":[total_players[0]]
})

#PRINT TABLE
list_players.head()

Unnamed: 0,Total of Players
0,780


In [150]:
# FINDING THE NUMBER OF UNIQUE ITEMS

items = purchase_data_df['Item Name'].value_counts()
# Prints out full raw data (shows all rows) 
# np.savetxt('LOCATION TO SAVE plus name', 'Data Frame', FMT) # fmt = '%d' rounds up the values to integers
np.savetxt('../Resources/unique_items_data.text', items, fmt='%d')
number_of_unique_items = items.shape[0]
print("Number of Unique Items: " + str(number_of_unique_items) )

Number of Unique Items: 179


In [119]:
# FINDING THE AVERAGE PRICE

price_df = purchase_data_df['Price']
total_revenue = price_df.sum()                       # TOTAL REVENUE by using '.sum()'
price_average = ((total_revenue)/(total_players[0])) # COULD ADD 'round(decimals = 2)' to round it up, 
                                                     # but we use '.astype(float).map("${:,.2f}".format)' instead

print("Price Average: " + str(price_average))
print("")
print("Total Rev.: " + str(total_revenue))

Price Average: 3.0509871794871795

Total Rev.: 2379.77


In [96]:
# CREATE TABLE
purchasing_analysis_df = pd.DataFrame({
    "Number of Unique Items": [number_of_unique_items],
    "Average Price": [price_average],
    "Number of Purchases": [total_players[0]],
    "Total Revenue":[total_revenue]
})

#TO ROUND VALUES UP AND ADD '$'
purchasing_analysis_df["Average Price"] = purchasing_analysis_df["Average Price"].astype(float).map(
    "${:,.2f}".format)
purchasing_analysis_df["Total Revenue"] = purchasing_analysis_df["Total Revenue"].astype(float).map(
    "${:,.2f}".format)


# PRINT OUT TABLE
purchasing_analysis_df

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,$3.05,780,"$2,379.77"


In [189]:
# Gender Count in column of 'Gender' only
gender_count = purchase_data_df["Gender"].value_counts()
gender_count.head()

# CREATE TABLE COUNT of Genders
grouped_gender_count = purchase_data_df.groupby(['Gender'])

# The object returned is a "GroupBy" object and cannot be viewed normally...
print(grouped_gender_count)

# Data Frame broke into count only ('Total Count' = [Male, Female, Others])
gender_count_test = grouped_gender_count.count().head()

# delete the extra columns (NOT NEEDED)
del gender_count_test['Price']
del gender_count_test['SN']
del gender_count_test['Item ID']
del gender_count_test['Item Name']
del gender_count_test['Age']

# Keep Purchase ID, but convert 'Purchase ID' into 'Total Count'
gender_count_test = gender_count_test.rename(columns={"Purchase ID": "Total Count"})

# to find the amount of genders 
gender_total = gender_count_test.sum()

# the amount of males/females/others... found in 'gender_count_test' data frame... and divide it by the amount of genders
male_percentage = (652/gender_total)
female_percentage = (113/gender_total)
other_percentage = (15/gender_total)

# added a column with its values into 'gender_count_test' data frame
gender_count_test.loc[:, 'PercentageofPlayers'] = [male_percentage, female_percentage, other_percentage]

# convert the 'PercentageofPlayers' values into percetange and string ( example:  .8346121 to 83.46121  )
gender_count_test.PercentageofPlayers = (gender_count_test.PercentageofPlayers * 100).astype(str)

#TO ROUND VALUES UP AND ADD '%'
gender_count_test["Percentage of Players"] = gender_count_test["PercentageofPlayers"].astype(float).map("{:,.2f}%".format)

# DEL the extra COLUMN
del gender_count_test['PercentageofPlayers']

# print out table
gender_count_test.head()

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000001AE9BEDCF98>


Unnamed: 0_level_0,Total Count,Percentage of Players
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,113,83.59%
Male,652,14.49%
Other / Non-Disclosed,15,1.92%


In [291]:
# Filter the data so that only those sightings in the US are in a DataFrame

# MALE ONLY
male_gender_only = purchase_data_df.loc[purchase_data_df["Gender"] == "Male", :]
male_gender_only.head()

male_avg_price = male_gender_only["Price"].mean()
male_purchase_total = male_gender_only["Price"].sum()


# FEMALE ONLY
female_gender_only = purchase_data_df.loc[purchase_data_df["Gender"] == "Female", :]
female_gender_only.head()

female_avg_price = female_gender_only["Price"].mean()
female_purchase_total = female_gender_only["Price"].sum()


# OTHER ONLY
other_gender_only = purchase_data_df.loc[purchase_data_df["Gender"] == "Other / Non-Disclosed", :]
other_gender_only.head()

other_avg_price = other_gender_only["Price"].mean()
other_purchase_total = other_gender_only["Price"].sum()

In [293]:
# MALE ONLY 
grouped_male_gender_df = male_gender_only.groupby(['SN'])

    # Each Person who purchased more than one will add their prices... example Aeda94: 'Price' = [2.00, 3.00, 1.00]
    #                                                                          Yasur85: 'Price' = [1.00, .10]
    #                                                                  Outcome of code: Aeda 94: 6.00   Yasur85: 1.10
grp_male_sum = grouped_male_gender_df["Price"].sum() 

    # After group_by and sum of each 'SN', apply .mean() to caculate by adding each 'SN' then divide by the amount of 'SN'
    # example: 'Lism78' = 5.10, 'Lisovynya38' = 3.90... then Lism78 + Lisovyna38 = 9.00 then 9.00/2 = the avg total purchase per person
grp_male_avg_per_person = grp_male_sum.mean()

# FEMALE ONLY
grouped_female_gender_df = female_gender_only.groupby(['SN'])
grp_female_sum = grouped_female_gender_df["Price"].sum() 
grp_female_avg_per_person = grp_female_sum.mean()


# OTHER ONLY
grouped_other_gender_df = other_gender_only.groupby(['SN'])
grp_other_sum = grouped_other_gender_df["Price"].sum() 
grp_other_avg_per_person = grp_other_sum.mean()

In [294]:
gender_analysis_table_df = gender_analysis.drop(['SN', 'Age', 'Item ID', 'Item Name', 'Price'], axis=1)

gender_analysis_table_df = gender_analysis_table_df.rename(columns={"Purchase ID": "Purchase Count"})

gender_analysis_table_df.head()


# Add COLUMNS and VALUES into Table
gender_analysis_table_df.loc[:, 'Average Purchase of Price'] = [female_avg_price, male_avg_price, other_avg_price]
gender_analysis_table_df.loc[:, 'Total Purchase Value'] = [female_purchase_total, male_purchase_total, other_purchase_total]
gender_analysis_table_df.loc[:, 'Avg Total Purchase per Person'] = [grp_female_avg_per_person, grp_male_avg_per_person, grp_other_avg_per_person]


#TO ROUND VALUES UP AND ADD '%'
gender_analysis_table_df["Average Purchase of Price"] = gender_analysis_table_df["Average Purchase of Price"].astype(float).map("${:,.2f}".format)
gender_analysis_table_df["Total Purchase Value"] = gender_analysis_table_df["Total Purchase Value"].astype(float).map("${:,.2f}".format)
gender_analysis_table_df["Avg Total Purchase per Person"] = gender_analysis_table_df["Avg Total Purchase per Person"].astype(float).map("${:,.2f}".format)


gender_analysis_table_df.head()


Unnamed: 0_level_0,Purchase Count,Average Purchase of Price,Total Purchase Value,Avg Total Purchase per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,$3.20,$361.94,$4.47
Male,652,$3.02,"$1,967.64",$4.07
Other / Non-Disclosed,15,$3.35,$50.19,$4.56
