# Trabalhando com Datas

In [1]:
# Importando a Biblioteca Pandas
import pandas as pd

In [4]:
# Leitura dos Arquivos
df_01 = pd.read_excel("/content/Aracaju.xlsx")
df_02 = pd.read_excel("/content/Fortaleza.xlsx")
df_03 = pd.read_excel("/content/Natal.xlsx")
df_04 = pd.read_excel("/content/Recife.xlsx")
df_05 = pd.read_excel("/content/Salvador.xlsx")

In [5]:
# Juntando todos os Arquivos
df = pd.concat([df_01, df_02, df_03, df_04, df_05])

In [11]:
# Criando a Coluna de Receita
df["Receita"] = df["Vendas"].mul(df["Qtde"])

In [6]:
# Transformando a Coluna de Data em Tipo Inteiro
df["Data"] = df["Data"].astype("int64")

In [7]:
# Verificando o Tipo de Dado de cada Coluna
df.dtypes

Cidade     object
Data        int64
Vendas    float64
LojaID      int64
Qtde        int64
dtype: object

In [8]:
# Transformando Coluna de Data em Data
df["Data"] = pd.to_datetime(df["Data"])

In [9]:
# Verificando o Tipo de Dado de cada Coluna
df.dtypes

Cidade            object
Data      datetime64[ns]
Vendas           float64
LojaID             int64
Qtde               int64
dtype: object

In [12]:
# Agrupamento por Ano
df.groupby(df["Data"].dt.year)["Receita"].sum()

Data
2018    118176.53
2019    228246.45
Name: Receita, dtype: float64

In [13]:
# Criando uma Nova Coluna com Ano
df["Ano_Venda"] = df["Data"].dt.year

In [14]:
# Pegando uma Amostra do Dataset de 5 Linhas
df.sample(10)

Unnamed: 0,Cidade,Data,Vendas,LojaID,Qtde,Receita,Ano_Venda
238,Natal,2019-01-03,139.64,1035,1,139.64,2019
173,Natal,2019-01-02,31.77,1034,2,63.54,2019
1,Natal,2018-11-25,563.0,853,2,1126.0,2018
32,Recife,2019-01-01,15.93,982,7,111.51,2019
77,Fortaleza,2019-01-01,167.64,1005,1,167.64,2019
58,Recife,2019-01-01,8.02,983,2,16.04,2019
103,Recife,2019-01-01,41.65,982,3,124.95,2019
54,Natal,2018-10-03,373.0,852,4,1492.0,2018
126,Natal,2019-01-02,41.69,1036,2,83.38,2019
198,Natal,2019-01-02,13.65,1037,3,40.95,2019


In [15]:
# Extraindo o Mês e o Dia
df["mes_venda"], df["dia_venda"] = (df["Data"].dt.month, df["Data"].dt.day)

In [16]:
# Pegando uma Amostra do Dataset de 5 Linhas
df.sample(10)# Pegando uma Amostra do Dataset de 5 Linhas
df.sample(10)

Unnamed: 0,Cidade,Data,Vendas,LojaID,Qtde,Receita,Ano_Venda,mes_venda,dia_venda
125,Fortaleza,2019-03-02,37.6,983,4,150.4,2019,3,2
22,Recife,2019-01-01,39.53,983,3,118.59,2019,1,1
31,Natal,2018-04-03,401.0,854,4,1604.0,2018,4,3
42,Fortaleza,2019-01-01,38.71,1004,5,193.55,2019,1,1
130,Natal,2019-01-02,59.78,1036,1,59.78,2019,1,2
26,Natal,2018-07-20,370.0,853,3,1110.0,2018,7,20
171,Natal,2019-01-02,136.45,1035,3,409.35,2019,1,2
239,Salvador,2019-01-03,161.41,1037,3,484.23,2019,1,3
46,Recife,2019-01-01,239.17,981,8,1913.36,2019,1,1
207,Salvador,2019-01-02,46.7,1036,2,93.4,2019,1,2


In [17]:
# Retornando a Data mais Antiga
df["Data"].min()

Timestamp('2018-01-01 00:00:00')

In [18]:
# Calculando a Diferença de Dias
df["diferenca_dias"] = df["Data"] - df["Data"].min()

In [19]:
# Pegando uma Amostra do Dataset de 5 Linhas
df.sample(10)

Unnamed: 0,Cidade,Data,Vendas,LojaID,Qtde,Receita,Ano_Venda,mes_venda,dia_venda,diferenca_dias
139,Natal,2019-01-02,169.01,1036,1,169.01,2019,1,2,366 days
70,Fortaleza,2019-01-01,79.57,1002,5,397.85,2019,1,1,365 days
66,Salvador,2019-01-01,70.2,1035,3,210.6,2019,1,1,365 days
95,Aracaju,2018-01-01,127.43,1522,9,1146.87,2018,1,1,0 days
183,Salvador,2019-01-02,120.23,1037,2,240.46,2019,1,2,366 days
205,Natal,2019-01-02,196.09,1037,1,196.09,2019,1,2,366 days
53,Recife,2019-01-01,37.05,983,6,222.3,2019,1,1,365 days
12,Natal,2019-02-02,709.0,853,3,2127.0,2019,2,2,397 days
165,Natal,2019-01-02,11.54,1037,1,11.54,2019,1,2,366 days
83,Natal,2019-01-02,63.66,1036,2,127.32,2019,1,2,366 days


In [28]:
# Criando a Coluna de Trimestre
df["trimestre_venda"] = df["Data"].dt.quarter

In [29]:
# Pegando uma Amostra do Dataset de 5 Linhas
df.sample(5)

Unnamed: 0,Cidade,Data,Vendas,LojaID,Qtde,Receita,Ano_Venda,mes_venda,dia_venda,diferenca_dias,semestre_venda,trimestre_venda
188,Salvador,2019-01-02,178.11,1035,3,534.33,2019,1,2,366 days,1,1
208,Natal,2019-01-02,39.4,1036,3,118.2,2019,1,2,366 days,1,1
190,Salvador,2019-01-02,78.78,1035,3,236.34,2019,1,2,366 days,1,1
205,Natal,2019-01-02,196.09,1037,1,196.09,2019,1,2,366 days,1,1
72,Natal,2018-05-02,612.0,854,4,2448.0,2018,5,2,121 days,2,2


In [26]:
# Filtrando as Vendas de 2019 do Mês de Março
vendas_marco_19 = df.loc[(df["Data"].dt.year == 2019) & (df["Data"].dt.month == 3)]

In [27]:
vendas_marco_19

Unnamed: 0,Cidade,Data,Vendas,LojaID,Qtde,Receita,Ano_Venda,mes_venda,dia_venda,diferenca_dias,semestre_venda
108,Fortaleza,2019-03-02,152.89,981,4,611.56,2019,3,2,425 days,1
109,Fortaleza,2019-03-02,18.90,982,6,113.40,2019,3,2,425 days,1
110,Fortaleza,2019-03-02,51.98,983,6,311.88,2019,3,2,425 days,1
111,Fortaleza,2019-03-02,8.00,981,3,24.00,2019,3,2,425 days,1
112,Fortaleza,2019-03-02,133.59,982,1,133.59,2019,3,2,425 days,1
...,...,...,...,...,...,...,...,...,...,...,...
137,Salvador,2019-03-02,51.66,1036,3,154.98,2019,3,2,425 days,1
138,Salvador,2019-03-02,212.03,1037,3,636.09,2019,3,2,425 days,1
139,Salvador,2019-03-02,169.01,1036,1,169.01,2019,3,2,425 days,1
140,Salvador,2019-03-02,20.79,1036,2,41.58,2019,3,2,425 days,1
