In [3]:
import pandas as pd
import datetime as dt

## **Review of Python datetime Module**

In [6]:
someday = dt.date(2023, 7, 18)

In [7]:
someday.year

2023

In [8]:
someday.month

7

In [9]:
someday.day

18

In [19]:
str(someday)

'2023-07-18'

In [18]:
str(dt.datetime(2023, 7, 17, 17, 13, 57))

'2023-07-17 17:13:57'

#### O construtor str() nos permite visualizar objetos de data em um formato legível.

In [20]:
sometime = dt.datetime(2023, 7, 17, 17, 13, 57)

In [21]:
sometime.year

2023

In [22]:
sometime.month

7

In [23]:
sometime.day

17

In [24]:
sometime.hour

17

In [25]:
sometime.minute

13

In [26]:
sometime.second

57

In [34]:
sometime.weekday()

0

## **The pandas Timestamp Object**

In [36]:
type(pd.Timestamp("2023-07-18"))

pandas._libs.tslibs.timestamps.Timestamp

In [38]:
str(pd.Timestamp("2023-07-18"))

'2023-07-18 00:00:00'

In [42]:
pd.Timestamp("2023/07/18")
pd.Timestamp("2023, 07, 18")
pd.Timestamp("18/7/2023")

Timestamp('2023-07-18 00:00:00')

#### A ressalva aqui, é que dependendo do formato da data, as vezes o pandas consegue descobrir o formato correto da data (ano-mês-dia), mas as vezes o formato fornecido pode ser um pouco confuso, e output retornado pode não estar conforme o esperado.

In [45]:
pd.Timestamp("2023-07-18 21:30:00")

Timestamp('2023-07-18 21:30:00')

In [46]:
pd.Timestamp("2021-03-08 6:13:29 PM")

Timestamp('2021-03-08 18:13:29')

In [48]:
pd.Timestamp(dt.date(2023, 7, 18))

Timestamp('2023-07-18 00:00:00')

In [49]:
pd.Timestamp(dt.datetime(2023, 7, 18, 21, 30, 10))

Timestamp('2023-07-18 21:30:10')

## **The pandas DateTimeIndex Object**

In [51]:
dates = ["2023-07-18", "1990-06-13", "2002-02-24"]

pd.DatetimeIndex(dates) # Armazena datas no formato datetime

DatetimeIndex(['2023-07-18', '1990-06-13', '2002-02-24'], dtype='datetime64[ns]', freq=None)

In [52]:
dates = [dt.date(2023, 7, 18), dt.date(2023, 7, 19), dt.date(2023, 7, 20)]

In [54]:
dt_index = pd.DatetimeIndex(dates)

In [55]:
values = [100, 200, 300]

pd.Series(data = values, index = dt_index)

2023-07-18    100
2023-07-19    200
2023-07-20    300
dtype: int64

#### Em resumo, o método pd.DateTimeIndex() serve apenas como um container para armazenar dados no formato datetime, de modo com que possam ser manipulados mais tarde em objetos Series ou Dataframes.

## The pd.to_datetime() Method

In [56]:
pd.to_datetime(["2023-07-18", "2023-07-19", "2023-07-20"])

DatetimeIndex(['2023-07-18', '2023-07-19', '2023-07-20'], dtype='datetime64[ns]', freq=None)

In [57]:
pd.to_datetime("2023-07-18")

Timestamp('2023-07-18 00:00:00')

In [62]:
pd.to_datetime(dt.datetime.now())

Timestamp('2023-07-18 21:40:31.593402')

In [63]:
pd.to_datetime(["2023", "July 17th, 2023"])

DatetimeIndex(['2023-01-01', '2023-07-17'], dtype='datetime64[ns]', freq=None)

In [67]:
s = pd.Series(["2023", "July 17th, 2023", "2023, 7, 18"])
s

0               2023
1    July 17th, 2023
2        2023, 7, 18
dtype: object

In [68]:
pd.to_datetime(s)

0   2023-01-01
1   2023-07-17
2   2023-07-18
dtype: datetime64[ns]

In [70]:
dates = pd.Series(["July 4th, 1996", "10/04/1991", "Hello", "2015-02-31"])
dates

0    July 4th, 1996
1        10/04/1991
2             Hello
3        2015-02-31
dtype: object

In [73]:
pd.to_datetime(dates, errors = "coerce") # o argumento que passamos para o parâmetro "errors" nos permite apenas converter dados que puderem ser convertidos para o formato datetime.

# NaT == Not a Time

0   1996-07-04
1   1991-10-04
2          NaT
3          NaT
dtype: datetime64[ns]

In [75]:
pd.to_datetime([1349720105, 1349806505], unit = "s") # converte datas no formato Unix

DatetimeIndex(['2012-10-08 18:15:05', '2012-10-09 18:15:05'], dtype='datetime64[ns]', freq=None)

#### Em sintese, basicamente aprendemos a como converter datas nos mais diversos formatos com o método pd.to_datetime, além de aprendermos a utilizar o parâmetro errors = "coerce" para nos tornarmos capazes de converter os dados do nosso dataset que puderem serem convertidos para o formato de data, ignorando os que não puderem.

## **Create Range of Dates with the pd.date_range() Method, Part 1**

In [78]:
times = pd.date_range(start = "2016-01-01", end = "2016-01-10", freq = "D")

In [79]:
type(times)

pandas.core.indexes.datetimes.DatetimeIndex

In [82]:
times[0]

Timestamp('2016-01-01 00:00:00', freq='D')

In [83]:
str(times[0])

'2016-01-01 00:00:00'

In [84]:
type(times[0])

pandas._libs.tslibs.timestamps.Timestamp

In [89]:
pd.date_range(start = "2016-01-01", end = "2016-01-10", freq = "2D")

DatetimeIndex(['2016-01-01', '2016-01-03', '2016-01-05', '2016-01-07',
               '2016-01-09'],
              dtype='datetime64[ns]', freq='2D')

In [90]:
pd.date_range(start = "2016-01-01", end = "2016-01-10", freq = "B") # intervalo de datas apenas em dias úteis

DatetimeIndex(['2016-01-01', '2016-01-04', '2016-01-05', '2016-01-06',
               '2016-01-07', '2016-01-08'],
              dtype='datetime64[ns]', freq='B')

In [92]:
pd.date_range(start = "2016-01-01", end = "2016-01-15", freq = "W") # pega apenas um dia por semana - Domingo | W-SUN

DatetimeIndex(['2016-01-03', '2016-01-10'], dtype='datetime64[ns]', freq='W-SUN')

In [94]:
pd.date_range(start = "2016-01-01", end = "2016-01-15", freq = "W-FRI") # podemos personalizar o dia da semana que queremos pegar

DatetimeIndex(['2016-01-01', '2016-01-08', '2016-01-15'], dtype='datetime64[ns]', freq='W-FRI')

In [95]:
pd.date_range(start = "2016-01-01", end = "2016-01-15", freq = "H") # gera intervalo de datas por hora

DatetimeIndex(['2016-01-01 00:00:00', '2016-01-01 01:00:00',
               '2016-01-01 02:00:00', '2016-01-01 03:00:00',
               '2016-01-01 04:00:00', '2016-01-01 05:00:00',
               '2016-01-01 06:00:00', '2016-01-01 07:00:00',
               '2016-01-01 08:00:00', '2016-01-01 09:00:00',
               ...
               '2016-01-14 15:00:00', '2016-01-14 16:00:00',
               '2016-01-14 17:00:00', '2016-01-14 18:00:00',
               '2016-01-14 19:00:00', '2016-01-14 20:00:00',
               '2016-01-14 21:00:00', '2016-01-14 22:00:00',
               '2016-01-14 23:00:00', '2016-01-15 00:00:00'],
              dtype='datetime64[ns]', length=337, freq='H')

In [97]:
pd.date_range(start = "2016-01-01", end = "2016-01-15", freq = "6H") # também podemos personalizar o intervalo de datas a nível de intervalo de horas

DatetimeIndex(['2016-01-01 00:00:00', '2016-01-01 06:00:00',
               '2016-01-01 12:00:00', '2016-01-01 18:00:00',
               '2016-01-02 00:00:00', '2016-01-02 06:00:00',
               '2016-01-02 12:00:00', '2016-01-02 18:00:00',
               '2016-01-03 00:00:00', '2016-01-03 06:00:00',
               '2016-01-03 12:00:00', '2016-01-03 18:00:00',
               '2016-01-04 00:00:00', '2016-01-04 06:00:00',
               '2016-01-04 12:00:00', '2016-01-04 18:00:00',
               '2016-01-05 00:00:00', '2016-01-05 06:00:00',
               '2016-01-05 12:00:00', '2016-01-05 18:00:00',
               '2016-01-06 00:00:00', '2016-01-06 06:00:00',
               '2016-01-06 12:00:00', '2016-01-06 18:00:00',
               '2016-01-07 00:00:00', '2016-01-07 06:00:00',
               '2016-01-07 12:00:00', '2016-01-07 18:00:00',
               '2016-01-08 00:00:00', '2016-01-08 06:00:00',
               '2016-01-08 12:00:00', '2016-01-08 18:00:00',
               '2016-01-

In [99]:
pd.date_range(start = "2016-01-01", end = "2016-12-31", freq = "M") # intervalos de data a nível de meses | pega o último dia do mês

DatetimeIndex(['2016-01-31', '2016-02-29', '2016-03-31', '2016-04-30',
               '2016-05-31', '2016-06-30', '2016-07-31', '2016-08-31',
               '2016-09-30', '2016-10-31', '2016-11-30', '2016-12-31'],
              dtype='datetime64[ns]', freq='M')

In [100]:
pd.date_range(start = "2016-01-01", end = "2016-12-31", freq = "MS") # intervalos de data a nível de inicio do dia do mês | Month Start

DatetimeIndex(['2016-01-01', '2016-02-01', '2016-03-01', '2016-04-01',
               '2016-05-01', '2016-06-01', '2016-07-01', '2016-08-01',
               '2016-09-01', '2016-10-01', '2016-11-01', '2016-12-01'],
              dtype='datetime64[ns]', freq='MS')

In [103]:
pd.date_range(start = "2016-01-01", end = "2050-12-31", freq = "AS") # intervalo de datas em um intervalo anual | Annual Start

DatetimeIndex(['2016-01-01', '2017-01-01', '2018-01-01', '2019-01-01',
               '2020-01-01', '2021-01-01', '2022-01-01', '2023-01-01',
               '2024-01-01', '2025-01-01', '2026-01-01', '2027-01-01',
               '2028-01-01', '2029-01-01', '2030-01-01', '2031-01-01',
               '2032-01-01', '2033-01-01', '2034-01-01', '2035-01-01',
               '2036-01-01', '2037-01-01', '2038-01-01', '2039-01-01',
               '2040-01-01', '2041-01-01', '2042-01-01', '2043-01-01',
               '2044-01-01', '2045-01-01', '2046-01-01', '2047-01-01',
               '2048-01-01', '2049-01-01', '2050-01-01'],
              dtype='datetime64[ns]', freq='AS-JAN')

#### Resumo: basicamente com o método pd.date_range() podemos gerar um objeto DateTimeIndex (container que armazena dados no formato datetime), contendo datas em um range e intervalo específico.