# Time methods

### 

In [1]:
import numpy as np
import pandas as pd
from datetime import datetime

In [2]:
# DATE-TIME object

myyear = 2023
mymonth = 6
myday = 21
myhour = 18
mymin = 24
mysec = 50

In [3]:
mydate = datetime(myyear,mymonth,myday)
mydate

datetime.datetime(2023, 6, 21, 0, 0)

In [4]:
mydatetime = datetime(myyear,mymonth,myday,myhour,mymin,mysec)
mydatetime

datetime.datetime(2023, 6, 21, 18, 24, 50)

In [5]:
# cehcking year
mydatetime.year

2023

In [6]:
# checking month
mydatetime.month

6

In [7]:
# checking minutes
mydatetime.minute

24

In [14]:
myser = pd.Series(['Nov 3, 1990','2000-01-01',None])
myser

0    Nov 3, 1990
1     2000-01-01
2           None
dtype: object

In [13]:
# CONVERTING STRING OBJECT TO DATETIME 

#DEFAULT FORMAT YYYY-MM-DD

timeser = pd.to_datetime(myser)
timeser

#data type is datetime64[ns]
# ns implies accuracy upto nano seconds

0   1990-11-03
1   2000-01-01
2          NaT
dtype: datetime64[ns]

In [15]:
timeser[0]

Timestamp('1990-11-03 00:00:00')

In [18]:
timeser[0].month

11

In [12]:
# EUROPEAN DATE FORMAT: DD-MM-YYYY
# AMERICAN DATE FORMAT: MM-DD-YYYY

In [19]:
#converting EUROPEAN DATE DD-MM-YYYY to ISO date format YYYY-MM-DD

euro_date = '10-12-2000'
pd.to_datetime(euro_date,dayfirst=True)

#dayfirst = True implies '10' in euro_date is day

Timestamp('2000-12-10 00:00:00')

In [20]:
#without parameter dayfirst
pd.to_datetime(euro_date)

#day '10' is treated as month

Timestamp('2000-10-12 00:00:00')

In [21]:
# Situation where european to ISO format is obvious
# i.e., parameter dayfirst is not required

obvio_euro_date = '31-12-2000'
pd.to_datetime(obvio_euro_date)

  pd.to_datetime(obvio_euro_date)


Timestamp('2000-12-31 00:00:00')

### Custom Time String Formatting

A full table of codes can be found here: https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes

In [25]:
style_date = '12--Dec--2000'
pd.to_datetime(style_date, format = '%d--%b--%Y')

Timestamp('2000-12-12 00:00:00')

In [29]:
custom_date = '12th of dec 2000'
pd.to_datetime(custom_date)

Timestamp('2000-12-12 00:00:00')

### Reading dates from CSV file

In [27]:
sales = pd.read_csv('RetailSales_BeerWineLiquor.csv')

In [28]:
sales.head()

Unnamed: 0,DATE,MRTSSM4453USN
0,1992-01-01,1509
1,1992-02-01,1541
2,1992-03-01,1597
3,1992-04-01,1675
4,1992-05-01,1822


In [31]:
sales['DATE']

# DATE column is of data type object

0      1992-01-01
1      1992-02-01
2      1992-03-01
3      1992-04-01
4      1992-05-01
          ...    
335    2019-12-01
336    2020-01-01
337    2020-02-01
338    2020-03-01
339    2020-04-01
Name: DATE, Length: 340, dtype: object

In [32]:
# Converting object dtype to datetime

sales['DATE'] = pd.to_datetime(sales['DATE'])

In [36]:
sales['DATE']

0     1992-01-01
1     1992-02-01
2     1992-03-01
3     1992-04-01
4     1992-05-01
         ...    
335   2019-12-01
336   2020-01-01
337   2020-02-01
338   2020-03-01
339   2020-04-01
Name: DATE, Length: 340, dtype: datetime64[ns]

In [39]:
sales['DATE'][1].month

2

- **Automatically setting date column dtype as datetime using parameter parse_dates while reading csv file**

In [50]:
# Passing index of date column in parameter parse_dates to change dtype

sales = pd.read_csv('RetailSales_BeerWineLiquor.csv', parse_dates = [0])

In [51]:
sales.head()

Unnamed: 0,DATE,MRTSSM4453USN
0,1992-01-01,1509
1,1992-02-01,1541
2,1992-03-01,1597
3,1992-04-01,1675
4,1992-05-01,1822


In [52]:
sales['DATE']

0     1992-01-01
1     1992-02-01
2     1992-03-01
3     1992-04-01
4     1992-05-01
         ...    
335   2019-12-01
336   2020-01-01
337   2020-02-01
338   2020-03-01
339   2020-04-01
Name: DATE, Length: 340, dtype: datetime64[ns]

### Resampling / Grouping by

Used when the time series (our data) has time as the index

.resample() has a 'rule' parameter by which we need to group the data by.

Then you need to call some sort of aggregation function.

The **rule** parameter describes the frequency with which to apply the aggregation function (daily, monthly, yearly, etc.)

For criteria for 'rule', refer to the table below. http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases

The aggregation function is needed because, due to resampling, we need some sort of mathematical rule to join the rows (mean, sum, count, etc.)

In [54]:
# setting index as time in the sales dataframe

sales = sales.set_index('DATE')

In [46]:
sales.head(10)

Unnamed: 0_level_0,MRTSSM4453USN
DATE,Unnamed: 1_level_1
1992-01-01,1509
1992-02-01,1541
1992-03-01,1597
1992-04-01,1675
1992-05-01,1822
1992-06-01,1775
1992-07-01,1912
1992-08-01,1862
1992-09-01,1770
1992-10-01,1882


In [57]:
# grouping data by year end and finding mean

sales.resample(rule = 'A').mean()

Unnamed: 0_level_0,MRTSSM4453USN
DATE,Unnamed: 1_level_1
1992-12-31,1807.25
1993-12-31,1794.833333
1994-12-31,1841.75
1995-12-31,1833.916667
1996-12-31,1929.75
1997-12-31,2006.75
1998-12-31,2115.166667
1999-12-31,2206.333333
2000-12-31,2375.583333
2001-12-31,2468.416667


- **.dt() method in pandas to extract year month etc from date using attributes**

In [58]:
sales = pd.read_csv('RetailSales_BeerWineLiquor.csv', parse_dates = [0])
sales.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 340 entries, 0 to 339
Data columns (total 2 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   DATE           340 non-null    datetime64[ns]
 1   MRTSSM4453USN  340 non-null    int64         
dtypes: datetime64[ns](1), int64(1)
memory usage: 5.4 KB


In [61]:
sales['DATE'].dt.year

0      1992
1      1992
2      1992
3      1992
4      1992
       ... 
335    2019
336    2020
337    2020
338    2020
339    2020
Name: DATE, Length: 340, dtype: int64