In [88]:
 # Dependencies and Setup
import pandas as pd
import numpy as np

In [89]:
# Load the file
dataFile = "purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data_pd = pd.read_csv(dataFile)

In [90]:
purchase_data_pd.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,15,Lisassa64,21,Female,98,"Deadline, Voice Of Subtlety",2.89
1,18,Reunasu60,22,Female,82,Nirvana,4.9
2,38,Reulae52,10,Female,116,Renewed Skeletal Katana,4.18
3,41,Assosia88,20,Female,7,"Thorn, Satchel of Dark Souls",1.33
4,55,Phaelap26,25,Female,84,Arcane Gem,3.79


In [91]:
player_count = len(purchase_data_pd["SN"].value_counts())
print("Total Number of Players: " + str(player_count))

Total Number of Players: 576


In [92]:
# Purchasing Analysis (Total)
# Run basic calculations to obtain number of unique items, average price, etc.
# Create a summary data frame to hold the results
# Optional: give the displayed data cleaner formatting
# Display the summary data frame

In [93]:
# get number of unique purchases
unique_items = len(purchase_data_pd["Item ID"].value_counts())
print("Number of unique purchase items: " + str(unique_items))

Number of unique purchase items: 183


In [94]:
# get the average price of items
avg_price = round(purchase_data_pd["Price"].mean(), 2)
print("Average price of items is: " + str(avg_price))

Average price of items is: 3.05


In [95]:
# get number of purchased 
number_of_purchases = len(purchase_data_pd["Purchase ID"].value_counts())
print("Number of items purchased: " + str(number_of_purchases))

Number of items purchased: 780


In [96]:
# total revenue generated from sales
total_revenue = purchase_data_pd["Price"].sum()
print("Total revenue is: " + str(total_revenue))

Total revenue is: 2379.7700000000004


In [97]:
# Create dataframe for number of items, avg sale price, number of purchases, and total revenue

purchase_summary = []
purchase_summary.append(unique_items)
purchase_summary.append("$" + str(avg_price))
purchase_summary.append(number_of_purchases)
purchase_summary.append("$" + str(total_revenue))

purchase_df = pd.DataFrame([purchase_summary], columns = ["Number of Unique Items", "Average Price", 
                                            "Number of Purchases", "Total Revenue"])
purchase_df.head()

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,183,$3.05,780,$2379.7700000000004


In [98]:
# Gender Demographics
# Percentage and Count of Male Players
# Percentage and Count of Female Players
# Percentage and Count of Other / Non-Disclosed

# Get rid off duplicates 1st. 
gender_dem_df = purchase_data_pd.drop_duplicates("SN", inplace=False)
gender_dem_df.head()

# Determine count of male players
is_male = gender_dem_df["Gender"] == "Male"
males_df = gender_dem_df[is_male]
males_count = males_df.shape[0]

# Determine count of female players
is_female = gender_dem_df["Gender"] == "Female"
females_df = gender_dem_df[is_female]
females_count = females_df.shape[0]

# Subtract males + females from player_count to determine number of Other/Non-Disclosed
other_non_disclosed_count = player_count - (males_count + females_count)

# Calculate percentages for males, females and other/non_discolsed
males_percentage = round(((males_count /player_count) * 100), 2)
females_percentage = round(((females_count /player_count) * 100), 2)
other_non_disclosed_percentage = round(((other_non_disclosed_count /player_count) * 100), 2)

# Create the dataframe for dempraphic data
demographics_df = pd.DataFrame({"Gender": ["Male", "Female", "Other / Non-Disclosed"], "Percentage of Players": 
                                [males_percentage, females_percentage, other_non_disclosed_percentage],
                                        "Total Count": [males_count, females_count, other_non_disclosed_count]}, 
                               columns = ["Gender", "Percentage of Players", "Total Count"])

demographics_df = demographics_df.set_index("Gender")
demographics_df.style.format({"Percentage of Players": "{:.2f}%"})  

Unnamed: 0_level_0,Percentage of Players,Total Count
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Male,84.03%,484
Female,14.06%,81
Other / Non-Disclosed,1.91%,11


In [99]:
# Purchasing Analysis (Gender)
# Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender
# Create a summary data frame to hold the results
# Optional: give the displayed data cleaner formatting
# Display the summary data frame

In [100]:
# Purchase counts by Gender column

# Purchases by male players
is_male = purchase_data_pd["Gender"] == "Male"
males_purchases_df = purchase_data_pd[is_male]
males_purchase_count = males_purchases_df.shape[0]
males_avg_purchase_price = males_purchases_df
males_total_spend = round((males_purchases_df["Price"].sum()), 2)
males_avg_spend = round((males_total_spend / males_purchase_count), 2)

# Purchases by female players
is_female = purchase_data_pd["Gender"] == "Female"
females_purchases_df = purchase_data_pd[is_female]
females_purchase_count = females_purchases_df.shape[0]
females_total_spend = round((females_purchases_df["Price"].sum()), 2)
females_avg_spend = round((females_total_spend / females_purchase_count), 2)

# Purchases - Other.Non dislosed
other_non_disclosed_purchase_count = number_of_purchases - (males_purchase_count + females_purchase_count)
other_non_disclosed_total_spend = round((total_revenue - (males_total_spend + females_total_spend)), 2)
other_non_disclosed_avg_spend = round((other_non_disclosed_total_spend / other_non_disclosed_purchase_count), 2)

# Create the dataframe for purchasing data analysis
purchase_analysis_df = pd.DataFrame({"Gender": ["Male", "Female", "Other / Non-Disclosed"], 
                                    "Purchase Count": [males_purchase_count, females_purchase_count, 
                                                              other_non_disclosed_purchase_count],
                                    "Average Purchase Price": [males_avg_spend, females_avg_spend, other_non_disclosed_avg_spend],
                                    "Total Purchase Value": [males_total_spend, females_total_spend, other_non_disclosed_total_spend]}, 
                               columns = ["Gender", "Purchase Count", "Average Purchase Price", "Total Purchase Value"])

purchase_analysis_df = purchase_analysis_df.set_index("Gender")
purchase_analysis_df


Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Male,652,3.02,1967.64
Female,113,3.2,361.94
Other / Non-Disclosed,15,3.35,50.19


In [101]:
get_ipython().system('jupyter nbconvert --to script Heroes-of-Pymoli.ipynb')

[NbConvertApp] Converting notebook Heroes-of-Pymoli.ipynb to script
[NbConvertApp] Writing 5806 bytes to Heroes-of-Pymoli.py
