In [1]:
# import all necessary libraries
import json
import pandas as pd
import string

# read in file and create dataframe
file = json.load(open('purchase_data.json'))
pymoligame_df = pd.DataFrame(file)
pymoligame_df.head()

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46
2,34,Male,174,Primitive Blade,2.46,Assastnya25
3,21,Male,92,Final Critic,1.36,Pheusrical25
4,23,Male,63,Stormfury Mace,1.27,Aela59


In [2]:
# Collecting a list of all columns within the DataFrame
pymoligame_df.columns

Index(['Age', 'Gender', 'Item ID', 'Item Name', 'Price', 'SN'], dtype='object')

In [3]:
# Count the total number of players in the dataframe - pull only unique players
player_count = len(pymoligame_df["SN"].unique())

# Place data found into a summary DataFrame
player_count_table = pd.Series({"Total Players: ":player_count})
player_count_table


Total Players:     573
dtype: int64

In [4]:
# Purchasing Analysis (Total) calculations
unique_items = len(pymoligame_df["Item ID"].unique())
average_price = pymoligame_df["Price"].mean()
number_purchases = pymoligame_df["Item Name"].count()
total_revenue = pymoligame_df["Price"].sum()

In [5]:
# Place all of the data found into a Purchasing Analysis (Total) summary DataFrame
purchasing_analysis_table = pd.DataFrame({"Number of Unique Items":[unique_items],
                             "Average Price":[average_price],
                             "Number of Purchases":[number_purchases],
                             "Total Revenue":[total_revenue]})

In [6]:
# Reorganize the columns using double brackets
purchasing_analysis_table = purchasing_analysis_table[["Number of Unique Items","Average Price","Number of Purchases","Total Revenue"]]

# format the currency fields to display currency
purchasing_analysis_table["Average Price"] = purchasing_analysis_table["Average Price"].map("${:.2f}".format)
purchasing_analysis_table["Total Revenue"] = purchasing_analysis_table["Total Revenue"].map("${:.2f}".format)

purchasing_analysis_table

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,183,$2.93,780,$2286.33


In [7]:
# Gender Demographics Calculation

# Count gender distribution
total_gender_count = pymoligame_df["Gender"].count()
male_count = pymoligame_df["Gender"].value_counts()['Male']
female_count = pymoligame_df["Gender"].value_counts()['Female']
nongender_count = total_gender_count - male_count - female_count

print(f" Total: {total_gender_count}\n Male: {male_count}\n Female: {female_count}\n non_specfic: {nongender_count}")


 Total: 780
 Male: 633
 Female: 136
 non_specfic: 11


In [8]:
# Calculate percentage by gender
male_percent = (male_count/total_gender_count) * 100
female_percent = (female_count/total_gender_count) * 100
non_gender_percent = (nongender_count/total_gender_count) * 100

print(f" % Male: {male_percent}\n % Female: {female_percent}\n % non_specifc: {non_gender_percent}")


 % Male: 81.15384615384616
 % Female: 17.435897435897434
 % non_specifc: 1.4102564102564104


In [9]:
# Place all of the data calculations into a Gender Summary DataFrame
gender_summary = pd.DataFrame({"Male": [male_count],
                                   "Female": [female_count],
                                   "Other/Non-Disclosure": [nongender_count],
                                   "Percentage Male": [male_percent],
                                   "Percentage Female":[female_percent],
                                   "Percentage Non Gender":[non_gender_percent]
                                })
gender_summary = gender_summary[["Male", 
                                 "Female", 
                                 "Other/Non-Disclosure", 
                                 "Percentage Male", 
                                 "Percentage Female", 
                                 "Percentage Non Gender"]]
gender_summary = gender_summary.round(2)

gender_summary

Unnamed: 0,Male,Female,Other/Non-Disclosure,Percentage Male,Percentage Female,Percentage Non Gender
0,633,136,11,81.15,17.44,1.41


In [10]:
# Format the percentage fields
gender_summary["Percentage Male"] = gender_summary["Percentage Male"].map("{0:,.2f}%".format)
gender_summary["Percentage Female"] = gender_summary["Percentage Female"].map("{0:,.2f}%".format)
gender_summary["Percentage Non Gender"] = gender_summary["Percentage Non Gender"].map("{0:,.2f}%".format)

gender_summary


Unnamed: 0,Male,Female,Other/Non-Disclosure,Percentage Male,Percentage Female,Percentage Non Gender
0,633,136,11,81.15%,17.44%,1.41%
