# Trabalhando com datas 

In [1]:
#Importando a biblioteca pandas
import pandas as pd

In [2]:
#Leitura dos arquivos
df1 = pd.read_excel('datasets/Aracaju.xlsx', engine='openpyxl')
df2 = pd.read_excel('datasets/Fortaleza.xlsx', engine='openpyxl')
df3 = pd.read_excel('datasets/Natal.xlsx', engine='openpyxl')
df4 = pd.read_excel('datasets/Recife.xlsx', engine='openpyxl')
df5 = pd.read_excel('datasets/Salvador.xlsx', engine='openpyxl')

In [3]:
#Juntando todos os arquivos
df = pd.concat([df1, df2, df3, df4, df5])

In [4]:
#Exibindo as 5 primeiras listas
df.head()

Unnamed: 0,Cidade,Data,Vendas,LojaID,Qtde
0,Aracaju,2018-01-01,142.0,1520,1
1,Aracaju,2018-01-01,14.21,1522,6
2,Aracaju,2018-01-01,71.55,1520,1
3,Aracaju,2018-01-01,3.01,1521,7
4,Aracaju,2018-01-01,24.51,1522,8


In [26]:
#Criando a coluna de receita
df["Receita"] = df["Vendas"].mul(df["Qtde"])

In [27]:
#Transformando a coluna de data em tipo inteiro
df["Data"] = df["Data"].astype("int64")

In [28]:
#Verificando o tipo de dados da coluna
df.dtypes

Cidade        object
Data           int64
Vendas       float64
LojaID         int64
Qtde           int64
Ano_Venda      int64
Receita      float64
dtype: object

In [29]:
#Transformando coluna de data em data
df["Data"] = pd.to_datetime(df["Data"])

In [30]:
df.dtypes

Cidade               object
Data         datetime64[ns]
Vendas              float64
LojaID                int64
Qtde                  int64
Ano_Venda             int64
Receita             float64
dtype: object

In [31]:
#Agrupamento por ano
df.groupby(df["Data"].dt.year)["Receita"].sum()

Data
2018    118176.53
2019    228246.45
Name: Receita, dtype: float64

In [24]:
#Criando uma nova coluna com o ano
df["Ano_Venda"] = df["Data"].dt.year

In [25]:
df.sample(5)

Unnamed: 0,Cidade,Data,Vendas,LojaID,Qtde,Ano_Venda
24,Aracaju,2018-01-01,151.04,1522,6,2018
66,Salvador,2019-01-01,70.2,1035,3,2019
37,Natal,2018-10-03,726.0,852,2,2018
23,Salvador,2019-01-01,193.2,1036,3,2019
135,Fortaleza,2019-03-02,10.79,983,5,2019


In [38]:
df["Receita/Vendas"] = df["Receita"] / df["Vendas"]

In [39]:
df.head()

Unnamed: 0,Cidade,Data,Vendas,LojaID,Qtde,Ano_Venda,Receita,mes_venda,dia_venda,diferenca_dias,Receita/Vendas
0,Aracaju,2018-01-01,142.0,1520,1,2018,142.0,1,1,0 days,1.0
1,Aracaju,2018-01-01,14.21,1522,6,2018,85.26,1,1,0 days,6.0
2,Aracaju,2018-01-01,71.55,1520,1,2018,71.55,1,1,0 days,1.0
3,Aracaju,2018-01-01,3.01,1521,7,2018,21.07,1,1,0 days,7.0
4,Aracaju,2018-01-01,24.51,1522,8,2018,196.08,1,1,0 days,8.0


In [33]:
#Extraindo o mês e o dia
df["mes_venda"], df["dia_venda"] = (df["Data"].dt.month, df["Data"].dt.day)

In [34]:
df.sample(5)

Unnamed: 0,Cidade,Data,Vendas,LojaID,Qtde,Ano_Venda,Receita,mes_venda,dia_venda
175,Natal,2019-01-02,16.53,1036,2,2019,33.06,1,2
102,Recife,2019-01-01,96.5,982,7,2019,675.5,1,1
151,Natal,2019-01-02,14.63,1035,1,2019,14.63,1,2
6,Recife,2019-01-01,16.27,981,6,2019,97.62,1,1
92,Recife,2019-01-01,163.02,981,4,2019,652.08,1,1


In [35]:
#Retornando a data mais antiga
df["Data"].min()

Timestamp('2018-01-01 00:00:00')

In [36]:
#Calculando a diferença de dias
df["diferenca_dias"] = df["Data"] - df["Data"].min()

In [37]:
df.sample(5)

Unnamed: 0,Cidade,Data,Vendas,LojaID,Qtde,Ano_Venda,Receita,mes_venda,dia_venda,diferenca_dias
94,Fortaleza,2019-01-01,150.49,1003,5,2019,752.45,1,1,365 days
6,Recife,2019-01-01,16.27,981,6,2019,97.62,1,1,365 days
53,Natal,2019-02-14,290.0,854,4,2019,1160.0,2,14,409 days
48,Natal,2018-09-20,676.0,852,4,2018,2704.0,9,20,262 days
1,Natal,2018-11-25,563.0,853,2,2018,1126.0,11,25,328 days


In [43]:
#Criando a coluna trimestre
df["Trimestre_Venda"] = df["Data"].dt.quarter

In [44]:
df.head()

Unnamed: 0,Cidade,Data,Vendas,LojaID,Qtde,Ano_Venda,Receita,mes_venda,dia_venda,diferenca_dias,Receita/Vendas,Semestre_Venda,Trimestre_Venda
0,Aracaju,2018-01-01,142.0,1520,1,2018,142.0,1,1,0 days,1.0,1,1
1,Aracaju,2018-01-01,14.21,1522,6,2018,85.26,1,1,0 days,6.0,1,1
2,Aracaju,2018-01-01,71.55,1520,1,2018,71.55,1,1,0 days,1.0,1,1
3,Aracaju,2018-01-01,3.01,1521,7,2018,21.07,1,1,0 days,7.0,1,1
4,Aracaju,2018-01-01,24.51,1522,8,2018,196.08,1,1,0 days,8.0,1,1


In [46]:
df.sample(5)

Unnamed: 0,Cidade,Data,Vendas,LojaID,Qtde,Ano_Venda,Receita,mes_venda,dia_venda,diferenca_dias,Receita/Vendas,Semestre_Venda,Trimestre_Venda
65,Salvador,2019-01-01,3.39,1034,2,2019,6.78,1,1,365 days,2.0,1,1
40,Salvador,2019-02-13,161.87,1036,3,2019,485.61,2,13,408 days,3.0,1,1
57,Recife,2019-01-01,201.41,980,3,2019,604.23,1,1,365 days,3.0,1,1
96,Fortaleza,2019-01-01,23.31,1004,4,2019,93.24,1,1,365 days,4.0,1,1
82,Salvador,2019-01-01,207.88,1035,1,2019,207.88,1,1,365 days,1.0,1,1


In [50]:
#Filtrando as Vendas de 2019 do mês de março
vendas_marco_19 = df.loc[(df["Data"].dt.year == 2019) & (df["Data"].dt.month == 3)]

In [52]:
#Vai mostrar todas as vendas de março de 2019
vendas_marco_19

Unnamed: 0,Cidade,Data,Vendas,LojaID,Qtde,Ano_Venda,Receita,mes_venda,dia_venda,diferenca_dias,Receita/Vendas,Semestre_Venda,Trimestre_Venda
108,Fortaleza,2019-03-02,152.89,981,4,2019,611.56,3,2,425 days,4.0,1,1
109,Fortaleza,2019-03-02,18.90,982,6,2019,113.40,3,2,425 days,6.0,1,1
110,Fortaleza,2019-03-02,51.98,983,6,2019,311.88,3,2,425 days,6.0,1,1
111,Fortaleza,2019-03-02,8.00,981,3,2019,24.00,3,2,425 days,3.0,1,1
112,Fortaleza,2019-03-02,133.59,982,1,2019,133.59,3,2,425 days,1.0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
137,Salvador,2019-03-02,51.66,1036,3,2019,154.98,3,2,425 days,3.0,1,1
138,Salvador,2019-03-02,212.03,1037,3,2019,636.09,3,2,425 days,3.0,1,1
139,Salvador,2019-03-02,169.01,1036,1,2019,169.01,3,2,425 days,1.0,1,1
140,Salvador,2019-03-02,20.79,1036,2,2019,41.58,3,2,425 days,2.0,1,1
