## 1 Objetivo.
  
 - Neste notebook iremos explorar, analisar e preparar os dados para análise, visualização e modelagem.
 

 - Importando as bibliotecas.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
from datetime import date

## 2. Importando e fazendo um tratamento na base de dados

 - Importandos os dados do Studio.

In [2]:
url='https://raw.githubusercontent.com/robertferro/studio-estetica/main/studio_estetica.csv'

In [3]:
dados=pd.read_csv(url, sep=';',decimal=',')

In [4]:
dados.head()

Unnamed: 0,data_atendimento,design_de_sobrancelhas,design_coloracao,design_coloracao_buco,buco,brown_lamination,micropigmentacao,extensao_de_cilios,lash_lifting,limpeza_de_pele,microagulhamento,peeling_quimico,massagem_relaxante,pacote_massagem_5,drenagem_linfatica,striort,qtde_clientes,faturamento
0,,20.0,30.0,30.0,10.0,100.0,400.0,100.0,80.0,60.0,250.0,150.0,80.0,250.0,80.0,150.0,0.0,0.0
1,23/12/2018,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,270.0
2,24/12/2018,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,25/12/2018,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,26/12/2018,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,560.0


 - Excluindo os valores nulos

In [5]:
dados.isna().sum()

data_atendimento          2
design_de_sobrancelhas    0
design_coloracao          0
design_coloracao_buco     0
buco                      0
brown_lamination          0
micropigmentacao          0
extensao_de_cilios        0
lash_lifting              0
limpeza_de_pele           0
microagulhamento          0
peeling_quimico           0
massagem_relaxante        0
pacote_massagem_5         0
drenagem_linfatica        0
striort                   0
qtde_clientes             0
faturamento               0
dtype: int64

In [6]:
dados=dados.dropna()

In [7]:
dados.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 740 entries, 1 to 741
Data columns (total 18 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   data_atendimento        740 non-null    object 
 1   design_de_sobrancelhas  740 non-null    float64
 2   design_coloracao        740 non-null    float64
 3   design_coloracao_buco   740 non-null    float64
 4   buco                    740 non-null    float64
 5   brown_lamination        740 non-null    float64
 6   micropigmentacao        740 non-null    float64
 7   extensao_de_cilios      740 non-null    float64
 8   lash_lifting            740 non-null    float64
 9   limpeza_de_pele         740 non-null    float64
 10  microagulhamento        740 non-null    float64
 11  peeling_quimico         740 non-null    float64
 12  massagem_relaxante      740 non-null    float64
 13  pacote_massagem_5       740 non-null    float64
 14  drenagem_linfatica      740 non-null    fl

 - Convertendo a coluna "data_atendimento" para o formato datetime

In [8]:
# reorganizando os dados referentes a data 

def date_to_weekday(date):
    weekday_dict = {0:'segunda', 1:'terca', 2: 'quarta', 3: 'quinta', 4: 'sexta', 5: 'sabado', 6: 'domingo'}
    date_time_obj = datetime.datetime.strptime(date, '%d/%m/%Y')
    return weekday_dict[date_time_obj.weekday()]

date = dados['data_atendimento'].str.split("/", n = 3, expand = True)
dados['dia'] = date[0].astype('int32')
dados['ano'] = date[2].astype('int32')
dados['mes'] = date[1].astype('int32')
dados['dia_da_semana'] = dados['data_atendimento'].apply(date_to_weekday)

In [9]:
# corrigindo um pequeno erro de lançamento

dados.iloc[0,0] = '22/12/2018'
dados.iloc[0,21] = 'sabado'

In [10]:
dados.head()

Unnamed: 0,data_atendimento,design_de_sobrancelhas,design_coloracao,design_coloracao_buco,buco,brown_lamination,micropigmentacao,extensao_de_cilios,lash_lifting,limpeza_de_pele,...,massagem_relaxante,pacote_massagem_5,drenagem_linfatica,striort,qtde_clientes,faturamento,dia,ano,mes,dia_da_semana
1,22/12/2018,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,7.0,270.0,23,2018,12,sabado
2,24/12/2018,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,24,2018,12,segunda
3,25/12/2018,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,25,2018,12,terca
4,26/12/2018,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,4.0,560.0,26,2018,12,quarta
5,27/12/2018,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,2.0,90.0,27,2018,12,quinta


In [11]:
dados['data_atendimento'] = pd.to_datetime(dados['data_atendimento'],format=r"%d/%m/%Y")

In [12]:
dados.head()

Unnamed: 0,data_atendimento,design_de_sobrancelhas,design_coloracao,design_coloracao_buco,buco,brown_lamination,micropigmentacao,extensao_de_cilios,lash_lifting,limpeza_de_pele,...,massagem_relaxante,pacote_massagem_5,drenagem_linfatica,striort,qtde_clientes,faturamento,dia,ano,mes,dia_da_semana
1,2018-12-22,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,7.0,270.0,23,2018,12,sabado
2,2018-12-24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,24,2018,12,segunda
3,2018-12-25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,25,2018,12,terca
4,2018-12-26,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,4.0,560.0,26,2018,12,quarta
5,2018-12-27,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,2.0,90.0,27,2018,12,quinta


In [13]:
dados.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 740 entries, 1 to 741
Data columns (total 22 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   data_atendimento        740 non-null    datetime64[ns]
 1   design_de_sobrancelhas  740 non-null    float64       
 2   design_coloracao        740 non-null    float64       
 3   design_coloracao_buco   740 non-null    float64       
 4   buco                    740 non-null    float64       
 5   brown_lamination        740 non-null    float64       
 6   micropigmentacao        740 non-null    float64       
 7   extensao_de_cilios      740 non-null    float64       
 8   lash_lifting            740 non-null    float64       
 9   limpeza_de_pele         740 non-null    float64       
 10  microagulhamento        740 non-null    float64       
 11  peeling_quimico         740 non-null    float64       
 12  massagem_relaxante      740 non-null    float64   

In [14]:
dados.to_csv('dados_modelagem.csv', index=False)

In [15]:
pd.read_csv('dados_modelagem.csv').head()

Unnamed: 0,data_atendimento,design_de_sobrancelhas,design_coloracao,design_coloracao_buco,buco,brown_lamination,micropigmentacao,extensao_de_cilios,lash_lifting,limpeza_de_pele,...,massagem_relaxante,pacote_massagem_5,drenagem_linfatica,striort,qtde_clientes,faturamento,dia,ano,mes,dia_da_semana
0,2018-12-22,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,7.0,270.0,23,2018,12,sabado
1,2018-12-24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,24,2018,12,segunda
2,2018-12-25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,25,2018,12,terca
3,2018-12-26,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,4.0,560.0,26,2018,12,quarta
4,2018-12-27,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,2.0,90.0,27,2018,12,quinta
