In [1]:
# Imports
import pandas as pd
import numpy as np

In [4]:
# Carrega o arquivo
load_file = 'dados_compras.json'
purchase_file = pd.read_json(load_file, orient = 'records')
purchase_file.head()

Unnamed: 0,Idade,Item ID,Login,Nome do Item,Sexo,Valor
0,38,165,Aelalis34,Bone Crushing Silver Skewer,Masculino,3.37
1,21,119,Eolo46,"Stormbringer, Dark Blade of Ending Misery",Masculino,2.32
2,34,174,Assastnya25,Primitive Blade,Masculino,2.46
3,21,92,Pheusrical25,Final Critic,Masculino,1.36
4,23,63,Aela59,Stormfury Mace,Masculino,1.27


In [7]:
customer_demographics = purchase_file.loc[:, ['Sexo', 'Login', 'Idade']]
customer_demographics.head()

Unnamed: 0,Sexo,Login,Idade
0,Masculino,Aelalis34,38
1,Masculino,Eolo46,21
2,Masculino,Assastnya25,34
3,Masculino,Pheusrical25,21
4,Masculino,Aela59,23


In [14]:
# Limpeza dos dados e remoção de duplicados
customer_demographics = customer_demographics.drop_duplicates()
customer_count = customer_demographics.count()[0]
customer_count

573

In [15]:
# Converter saída para DF para uso posterior em análise
pd.DataFrame({'Total de Compradores': [customer_count]})

Unnamed: 0,Total de Compradores
0,573


In [19]:
# Cáculos básicos
average_item_price = purchase_file['Valor'].mean()
total_item_price = purchase_file['Valor'].sum()
total_item_count = purchase_file['Valor'].count()
item_id = len(purchase_file['Item ID'].unique())

# Dataframe para os resultados
summary_calculations = pd.DataFrame({"Número de Itens Únicos" : item_id,
                                     "Número de Compras" : total_item_count,
                                     "Total de Vendas" : total_item_price,
                                     "Preço Médio" : [average_item_price]})

# Data Munging
summary_calculations = summary_calculations.round(2)
summary_calculations ['Preço Médio'] = summary_calculations['Preço Médio'].map('${:,.2f}'.format)
summary_calculations ['Total de Vendas'] = summary_calculations['Total de Vendas'].map('${:,.2f}'.format)
summary_calculations = summary_calculations.loc[:, ['Número de Itens Únicos', 'Preço Médio', 'Número de Compras', 'Total de Vendas']]

summary_calculations

Unnamed: 0,Número de Itens Únicos,Preço Médio,Número de Compras,Total de Vendas
0,183,$2.93,780,"$2,286.33"


In [20]:
purchase_file["Item ID"].unique()

array([165, 119, 174,  92,  63,  10, 153, 169, 118,  99,  57,  47,  81,
        76,  44,  96, 123,  59,  91, 177,  78,   3,  11, 183,  65, 132,
       106,  49,  45, 155,  37,  48,  90,  13, 171,  25,   7, 124,  68,
        85, 120,  17, 141,  73, 151,  32,  51, 101, 140,  31,  34,   2,
        86,  39,  28, 160, 134,  83,  38, 158, 110, 122,  54, 105,  87,
        23, 144, 128, 175,  46, 150, 152, 108, 172, 167, 181,  20, 130,
       111, 103,  30, 139, 173,  55, 115,  35,  42,   9,  84, 180, 102,
        53,  18,  74, 126,  50,  62, 125, 121, 129, 149,  12,  71,  14,
        58,  27,  52,  66, 100, 112,  24,  94, 107,   0, 182,  97,  70,
        89,   1, 170,  93, 179,  36,  75, 143, 137, 176, 148, 127, 147,
       161, 154, 157, 116,  61, 131,  41, 145,  60, 162, 135,   8,  40,
        15,  29,  72, 114,  77, 117,  79,  88, 104,  95,  64,  98,  33,
       146, 166,  56,  22,  21,  16,  67, 133,  69, 159,  82, 113, 164,
         6, 163,   5,  19, 168, 136,  80,  26, 142, 178, 156, 10

In [23]:
# Cálculos Básicos
gender_count = customer_demographics['Sexo'].value_counts()
gender_percent = (gender_count /customer_count) * 100

# Dataframe para os resultados
gender_demographics = pd.DataFrame({'Sexo' : gender_count,
                                   '%' : gender_percent})

# Data Munging
gender_demographics = gender_demographics.round(2)
gender_demographics ['%'] = gender_demographics['%'].map('{:,.1f}%'.format)

In [24]:
# Output Test
gender_count

Masculino                 465
Feminino                  100
Outro / NÃ£o Divulgado      8
Name: Sexo, dtype: int64

In [25]:
# Output Test
gender_percent

Masculino                 81.151832
Feminino                  17.452007
Outro / NÃ£o Divulgado     1.396161
Name: Sexo, dtype: float64

In [26]:
# Output Test
gender_demographics

Unnamed: 0,Sexo,%
Masculino,465,81.2%
Feminino,100,17.4%
Outro / NÃ£o Divulgado,8,1.4%


In [46]:
# Agrupamentos
gender_total_item_price = purchase_file.groupby(['Sexo']).sum()['Valor'].rename('Total de Vendas')
gender_average_item_price = purchase_file.groupby(['Sexo']).mean()['Valor'].rename('Average Price')
purchase_count = purchase_file.groupby(['Sexo']).count()['Valor'].rename('Número de Compras')
normalized_total = gender_total_item_price / gender_demographics['Sexo']

# Armazenando o resultado em um Dataframe
gender_purchasing_analysis = pd.DataFrame({'Número de Compras' : purchase_count,
                                           'Valor Médio Por Item' : gender_average_item_price,
                                           'Total de Vendas' : gender_total_item_price,
                                           'Total Normalizado' : normalized_total})

# Data Munging
gender_purchasing_analysis = gender_purchasing_analysis.round(2)
gender_purchasing_analysis ['Valor Médio Por Item'] = gender_purchasing_analysis['Valor Médio Por Item'].map('${:,.2f}'.format)
gender_purchasing_analysis ['Total de Vendas'] = gender_purchasing_analysis['Total de Vendas'].map('${:,.2f}'.format)
gender_purchasing_analysis ['Total Normalizado'] = gender_purchasing_analysis['Total Normalizado'].map('${:,.2f}'.format)


In [47]:
# Resultado
gender_total_item_price

Sexo
Feminino                   382.91
Masculino                 1867.68
Outro / NÃ£o Divulgado      35.74
Name: Total de Vendas, dtype: float64