# Time Methods

Basic python has datetime object containing date and time information.

Pandas allows us to easily extract information from a datetime object to use feature engineering

In [1]:
import pandas as pd
import numpy as np

from datetime import datetime

import warnings

warnings.filterwarnings('ignore')

In [2]:
year = 2023
month = 3
day = 29
hour = 2
minute = 15
second = 30

In [3]:
date = datetime(year=year, month=month, day=day)
date

datetime.datetime(2023, 3, 29, 0, 0)

In [4]:
date_time = datetime(year=year, month=month, day=day, hour=hour, minute=minute, second=second)
date_time

datetime.datetime(2023, 3, 29, 2, 15, 30)

## Different Attributes

In [5]:
date_time.year

2023

In [6]:
date_time.month

3

In [7]:
date_time.day

29

## Using Pandas

In [8]:
series = pd.Series(["Nov 3, 1990", "2000-01-01", None])
series

0    Nov 3, 1990
1     2000-01-01
2           None
dtype: object

In [9]:
time_series = pd.to_datetime(series)
time_series

0   1990-11-03
1   2000-01-01
2          NaT
dtype: datetime64[ns]

**Checking if the date is European or American**

In [10]:
obvious_european_date = "31-12-2000"

In [11]:
pd.to_datetime(obvious_european_date)

Timestamp('2000-12-31 00:00:00')

In [12]:
european_date = "10-12-2000"

In [13]:
pd.to_datetime(european_date)

Timestamp('2000-10-12 00:00:00')

In [14]:
pd.to_datetime(european_date, dayfirst=True)

Timestamp('2000-12-10 00:00:00')

## Custom Time String Formatting

In [15]:
style_date = "12--Dec--2000"

In [16]:
pd.to_datetime(style_date, format="%d--%b--%Y")

Timestamp('2000-12-12 00:00:00')

In [17]:
custom_date = "12th of Dec 2000"

In [18]:
pd.to_datetime(custom_date)

Timestamp('2000-12-12 00:00:00')

## Reading CSV File

In [19]:
sales_dataframe = pd.read_csv('RetailSales_BeerWineLiquor.csv')
sales_dataframe

Unnamed: 0,DATE,MRTSSM4453USN
0,1992-01-01,1509
1,1992-02-01,1541
2,1992-03-01,1597
3,1992-04-01,1675
4,1992-05-01,1822
...,...,...
335,2019-12-01,6630
336,2020-01-01,4388
337,2020-02-01,4533
338,2020-03-01,5562


In [20]:
sales_dataframe['DATE']

0      1992-01-01
1      1992-02-01
2      1992-03-01
3      1992-04-01
4      1992-05-01
          ...    
335    2019-12-01
336    2020-01-01
337    2020-02-01
338    2020-03-01
339    2020-04-01
Name: DATE, Length: 340, dtype: object

In [21]:
sales_dataframe['DATE'] = pd.to_datetime(sales_dataframe['DATE'])
sales_dataframe

Unnamed: 0,DATE,MRTSSM4453USN
0,1992-01-01,1509
1,1992-02-01,1541
2,1992-03-01,1597
3,1992-04-01,1675
4,1992-05-01,1822
...,...,...
335,2019-12-01,6630
336,2020-01-01,4388
337,2020-02-01,4533
338,2020-03-01,5562


In [22]:
sales_dataframe['DATE']

0     1992-01-01
1     1992-02-01
2     1992-03-01
3     1992-04-01
4     1992-05-01
         ...    
335   2019-12-01
336   2020-01-01
337   2020-02-01
338   2020-03-01
339   2020-04-01
Name: DATE, Length: 340, dtype: datetime64[ns]

### Parsing Dates

In [23]:
sales_dataframe = pd.read_csv('RetailSales_BeerWineLiquor.csv', parse_dates=[0])
sales_dataframe

Unnamed: 0,DATE,MRTSSM4453USN
0,1992-01-01,1509
1,1992-02-01,1541
2,1992-03-01,1597
3,1992-04-01,1675
4,1992-05-01,1822
...,...,...
335,2019-12-01,6630
336,2020-01-01,4388
337,2020-02-01,4533
338,2020-03-01,5562


In [24]:
sales_dataframe['DATE']

0     1992-01-01
1     1992-02-01
2     1992-03-01
3     1992-04-01
4     1992-05-01
         ...    
335   2019-12-01
336   2020-01-01
337   2020-02-01
338   2020-03-01
339   2020-04-01
Name: DATE, Length: 340, dtype: datetime64[ns]

## Resampling

In [25]:
sales_dataframe = pd.read_csv('RetailSales_BeerWineLiquor.csv', parse_dates=[0])
sales_dataframe

Unnamed: 0,DATE,MRTSSM4453USN
0,1992-01-01,1509
1,1992-02-01,1541
2,1992-03-01,1597
3,1992-04-01,1675
4,1992-05-01,1822
...,...,...
335,2019-12-01,6630
336,2020-01-01,4388
337,2020-02-01,4533
338,2020-03-01,5562


In [26]:
sales_dataframe = sales_dataframe.set_index("DATE")

In [27]:
sales_dataframe

Unnamed: 0_level_0,MRTSSM4453USN
DATE,Unnamed: 1_level_1
1992-01-01,1509
1992-02-01,1541
1992-03-01,1597
1992-04-01,1675
1992-05-01,1822
...,...
2019-12-01,6630
2020-01-01,4388
2020-02-01,4533
2020-03-01,5562


In [28]:
sales_dataframe.resample(rule="A").mean()

Unnamed: 0_level_0,MRTSSM4453USN
DATE,Unnamed: 1_level_1
1992-12-31,1807.25
1993-12-31,1794.833333
1994-12-31,1841.75
1995-12-31,1833.916667
1996-12-31,1929.75
1997-12-31,2006.75
1998-12-31,2115.166667
1999-12-31,2206.333333
2000-12-31,2375.583333
2001-12-31,2468.416667
