In [81]:
import pandas as pd
import matplotlib.pyplot as plt

In [219]:
data = 'purchase_data.json'
df = pd.read_json(data)

df.head()

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46
2,34,Male,174,Primitive Blade,2.46,Assastnya25
3,21,Male,92,Final Critic,1.36,Pheusrical25
4,23,Male,63,Stormfury Mace,1.27,Aela59


In [83]:
grouped_players = df.groupby('SN')
df1 = grouped_players.count()
total_p = df1['Age'].count()

total_players = pd.DataFrame({
    'Total Players': [total_p]
})

In [84]:
grouped_items = df.groupby('Item ID')
df1 = grouped_items.count()
unique_items = df1['Age'].count()

In [85]:
avg_price = round(df['Price'].mean(),2)
avg_price = '${:.2f}'.format(avg_price)

In [86]:
total_purchases = df['Item ID'].count()

In [124]:
total_revenue = df['Price'].sum()
total_revenue = '${:,.2f}'.format(total_revenue)

total_revenue

'$2,286.33'

In [88]:
purchasing_analysis_total = pd.DataFrame({
    'Number of Unique Items': [unique_items],
    'Average Price': [avg_price],
    'Number of Purchases': [total_purchases],
    'Total Revenue': [total_revenue]
})

purchasing_analysis_total = purchasing_analysis_total[
    [
        'Number of Unique Items', 
        'Average Price',
        'Number of Purchases',
        'Total Revenue'
    ]
]
purchasing_analysis_total

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,183,$2.93,780,"$2,286.33"


In [89]:
users = df.drop_duplicates('SN')

gender_counts = users['Gender'].value_counts()

male = gender_counts['Male']
female = gender_counts['Female']
other = gender_counts['Other / Non-Disclosed']

percent_male = round((male/total_p)*100,2)
percent_female = round((female/total_p)*100,2)
percent_other = round((other/total_p)*100,2)

In [90]:
gender_demographics = pd.DataFrame({
    'Gender': ['Male', 'Female', 'Other / Non-Disclosed'],
    'Percentage of Players': [percent_male, percent_female, percent_other],
    'Total Count': [male, female, other]
})

gender_demographics.set_index('Gender')

Unnamed: 0_level_0,Percentage of Players,Total Count
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Male,81.15,465
Female,17.45,100
Other / Non-Disclosed,1.4,8


In [196]:
gender_counts = df['Gender'].value_counts()
gender_counts_df = gender_counts.to_frame()
gender_counts_df = gender_counts_df.rename(columns={'Gender':'Purchase Count'})

gender_counts_df

Unnamed: 0,Purchase Count
Male,633
Female,136
Other / Non-Disclosed,11


In [197]:
gender = df.groupby('Gender')
gender_price = gender['Price'].mean()

gender_price_df = gender_price.to_frame()
gender_price_df['Price'] = gender_price_df['Price'].map('${:.2f}'.format)
gender_price_df = gender_price_df.rename(columns={'Price':'Average Purchase Price'})

gender_price_df

Unnamed: 0_level_0,Average Purchase Price
Gender,Unnamed: 1_level_1
Female,$2.82
Male,$2.95
Other / Non-Disclosed,$3.25


In [208]:
gender_value = gender['Price'].sum()

gender_value_df = gender_value.to_frame()
gender_value_df['Price'] = gender_value_df['Price'].map('${:,.2f}'.format)
gender_value_df = gender_value_df.rename(columns={'Price':'Total Purchase Value'})

gender_value_df

Unnamed: 0_level_0,Total Purchase Value
Gender,Unnamed: 1_level_1
Female,$382.91
Male,"$1,867.68"
Other / Non-Disclosed,$35.74


In [209]:
df_reset_index = df.set_index('Gender')

df_male = df_reset_index.loc['Male']

df_male_users = df_male.groupby('SN')

df_male_value = df_male_users.sum()

male_normalized_total = df_male_value['Price'].mean()
male_normalized_total = '${:,.2f}'.format(male_normalized_total)

male_normalized_total

'$4.02'

In [210]:
df_female = df_reset_index.loc['Female']

df_female_users = df_female.groupby('SN')

df_female_value = df_female_users.sum()

female_normalized_total = df_female_value['Price'].mean()
female_normalized_total = '${:,.2f}'.format(female_normalized_total)

female_normalized_total

'$3.83'

In [211]:
df_other = df_reset_index.loc['Other / Non-Disclosed']

df_other_users = df_other.groupby('SN')

df_other_value = df_other_users.sum()

other_normalized_total = df_other_value['Price'].mean()
other_normalized_total = '${:,.2f}'.format(other_normalized_total)

other_normalized_total

'$4.47'

In [212]:
normalized_totals = pd.DataFrame({
    'Gender': ['Male', 'Female', 'Other / Non-Disclosed'],
    'Normalized Totals': [male_normalized_total, female_normalized_total, other_normalized_total]
})

normalized_totals_df = normalized_totals.set_index('Gender')

normalized_totals_df

Unnamed: 0_level_0,Normalized Totals
Gender,Unnamed: 1_level_1
Male,$4.02
Female,$3.83
Other / Non-Disclosed,$4.47


In [213]:
merge1 = gender_price_df.merge(gender_value_df, left_index=True, right_index=True)
merge1

Unnamed: 0_level_0,Average Purchase Price,Total Purchase Value
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,$2.82,$382.91
Male,$2.95,"$1,867.68"
Other / Non-Disclosed,$3.25,$35.74


In [214]:
merge2 = merge1.merge(gender_counts_df, left_index=True, right_index=True)

merge3 = merge2.merge(normalized_totals_df, left_index=True, right_index=True)


In [269]:
purchasing_analysis_gender = merge3[
    [
        'Purchase Count', 
        'Average Purchase Price',
        'Total Purchase Value',
        'Normalized Totals'
    ]
]
purchasing_analysis_gender

Unnamed: 0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Totals
Female,136,$2.82,$382.91,$3.83
Male,633,$2.95,"$1,867.68",$4.02
Other / Non-Disclosed,11,$3.25,$35.74,$4.47


In [272]:
bins = [0, 9, 10, 14, 19, 24, 29, 34, 39]

labels = ['<10', '10-14', '15-19','20-24', '25-29', '30-34', '35-39', '40+']

#pd.cut()

SN = df.groupby('SN')
user_age = SN.max()

user_age

user_age['bin'] = pd.cut(user_age['Age'], bins, labels=labels )
user_age

bin_df = user_age['bin'].value_counts()
bin_df


25-29    259
20-24    100
30-34     87
35-39     47
40+       27
15-19     20
<10       19
10-14      3
Name: bin, dtype: int64