# Time Methods

In [1]:
import numpy as np
import pandas as pd

# Python Datetime Review

In [2]:
from datetime import datetime

In [3]:
myyear = 2015
mymonth = 12
myday = 1
myhour = 2
mymin = 30
mysec = 15

In [4]:
mydate = datetime(myyear, mymonth, myday)

In [5]:
mydate

datetime.datetime(2015, 12, 1, 0, 0)

In [6]:
mydatetime = datetime(myyear, mymonth, myday, myhour, mymin, mysec)

In [7]:
mydatetime

datetime.datetime(2015, 12, 1, 2, 30, 15)

In [8]:
mydatetime.year

2015

-------

# Pandas

# Converting to datetime
- pd.to_datetime()

In [13]:
myser = pd.Series(['Nov 3, 1990', '2000-12-01', None])

In [14]:
myser

0    Nov 3, 1990
1     2000-12-01
2           None
dtype: object

In [15]:
myser

0    Nov 3, 1990
1     2000-12-01
2           None
dtype: object

# pd.to_datetime()

In [17]:
timeser = pd.to_datetime(myser)

In [18]:
timeser

0   1990-11-03
1   2000-12-01
2          NaT
dtype: datetime64[ns]

In [19]:
timeser[0].year

1990

In [20]:
obvi_euro_date = '31-12-2020'

In [21]:
pd.to_datetime(obvi_euro_date)

Timestamp('2020-12-31 00:00:00')

In [24]:
euro_date = '10-12-2020' # actually 10th Dec, 2020

In [27]:
pd.to_datetime(euro_date, dayfirst = True) # need to specify dayfirst parameter

Timestamp('2020-12-10 00:00:00')

-------

# Custom Time String Formatting

In [28]:
style_date = '12--Dec--2020' #maybe these values are coming from old websites

In [30]:
pd.to_datetime(style_date, format = '%d--%b--%Y')

Timestamp('2020-12-12 00:00:00')

In [31]:
strange_date = '12th of Dec 2000'

In [32]:
pd.to_datetime(strange_date)

Timestamp('2000-12-12 00:00:00')

--------

# Data

Retail Sales: Beer, Wine, and Liquor Stores

Units: Millions of Dollars, Not Seasonally Adjusted

Frequency: Monthly

In [34]:
sales = pd.read_csv('Data/RetailSales_BeerWineLiquor.csv')

In [35]:
sales.head()

Unnamed: 0,DATE,MRTSSM4453USN
0,1992-01-01,1509
1,1992-02-01,1541
2,1992-03-01,1597
3,1992-04-01,1675
4,1992-05-01,1822


In [36]:
sales['DATE']

0      1992-01-01
1      1992-02-01
2      1992-03-01
3      1992-04-01
4      1992-05-01
          ...    
335    2019-12-01
336    2020-01-01
337    2020-02-01
338    2020-03-01
339    2020-04-01
Name: DATE, Length: 340, dtype: object

In [39]:
# convert to datetime object
# now it becomes datetime object
sales['DATE'] = pd.to_datetime(sales['DATE'])

In [40]:
sales['DATE']

0     1992-01-01
1     1992-02-01
2     1992-03-01
3     1992-04-01
4     1992-05-01
         ...    
335   2019-12-01
336   2020-01-01
337   2020-02-01
338   2020-03-01
339   2020-04-01
Name: DATE, Length: 340, dtype: datetime64[ns]

In [41]:
sales['DATE'][0].year

1992

--------

# Attempt to Parse Dates Automatically

In [44]:
sales.head()

Unnamed: 0,DATE,MRTSSM4453USN
0,1992-01-01,1509
1,1992-02-01,1541
2,1992-03-01,1597
3,1992-04-01,1675
4,1992-05-01,1822


In [45]:
# ask pandas to automatically parse the column as a date while reading from csv
sales = pd.read_csv('Data/RetailSales_BeerWineLiquor.csv', parse_dates = [0])

In [46]:
sales.head()

Unnamed: 0,DATE,MRTSSM4453USN
0,1992-01-01,1509
1,1992-02-01,1541
2,1992-03-01,1597
3,1992-04-01,1675
4,1992-05-01,1822


In [48]:
# pandas read in as Date time
sales['DATE']

0     1992-01-01
1     1992-02-01
2     1992-03-01
3     1992-04-01
4     1992-05-01
         ...    
335   2019-12-01
336   2020-01-01
337   2020-02-01
338   2020-03-01
339   2020-04-01
Name: DATE, Length: 340, dtype: datetime64[ns]

-----------

# Resample
- resample is like groupby and use it combination with aggreation methods.
 [[reference](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.resample.html)]
 
 <table style="display: inline-block">
    <caption style="text-align: center"><strong>TIME SERIES OFFSET ALIASES</strong></caption>
<tr><th>ALIAS</th><th>DESCRIPTION</th></tr>
<tr><td>B</td><td>business day frequency</td></tr>
<tr><td>C</td><td>custom business day frequency (experimental)</td></tr>
<tr><td>D</td><td>calendar day frequency</td></tr>
<tr><td>W</td><td>weekly frequency</td></tr>
<tr><td>M</td><td>month end frequency</td></tr>
<tr><td>SM</td><td>semi-month end frequency (15th and end of month)</td></tr>
<tr><td>BM</td><td>business month end frequency</td></tr>
<tr><td>CBM</td><td>custom business month end frequency</td></tr>
<tr><td>MS</td><td>month start frequency</td></tr>
<tr><td>SMS</td><td>semi-month start frequency (1st and 15th)</td></tr>
<tr><td>BMS</td><td>business month start frequency</td></tr>
<tr><td>CBMS</td><td>custom business month start frequency</td></tr>
<tr><td>Q</td><td>quarter end frequency</td></tr>
<tr><td></td><td><font color=white>intentionally left blank</font></td></tr></table>

<table style="display: inline-block; margin-left: 40px">
<caption style="text-align: center"></caption>
<tr><th>ALIAS</th><th>DESCRIPTION</th></tr>
<tr><td>BQ</td><td>business quarter endfrequency</td></tr>
<tr><td>QS</td><td>quarter start frequency</td></tr>
<tr><td>BQS</td><td>business quarter start frequency</td></tr>
<tr><td>A</td><td>year end frequency</td></tr>
<tr><td>BA</td><td>business year end frequency</td></tr>
<tr><td>AS</td><td>year start frequency</td></tr>
<tr><td>BAS</td><td>business year start frequency</td></tr>
<tr><td>BH</td><td>business hour frequency</td></tr>
<tr><td>H</td><td>hourly frequency</td></tr>
<tr><td>T, min</td><td>minutely frequency</td></tr>
<tr><td>S</td><td>secondly frequency</td></tr>
<tr><td>L, ms</td><td>milliseconds</td></tr>
<tr><td>U, us</td><td>microseconds</td></tr>
<tr><td>N</td><td>nanoseconds</td></tr></table>

In [49]:
sales = sales.set_index('DATE')

In [50]:
sales.head()

Unnamed: 0_level_0,MRTSSM4453USN
DATE,Unnamed: 1_level_1
1992-01-01,1509
1992-02-01,1541
1992-03-01,1597
1992-04-01,1675
1992-05-01,1822


In [52]:
#groupby year and get the average
sales.resample(rule='A').mean()

Unnamed: 0_level_0,MRTSSM4453USN
DATE,Unnamed: 1_level_1
1992-12-31,1807.25
1993-12-31,1794.833333
1994-12-31,1841.75
1995-12-31,1833.916667
1996-12-31,1929.75
1997-12-31,2006.75
1998-12-31,2115.166667
1999-12-31,2206.333333
2000-12-31,2375.583333
2001-12-31,2468.416667


--------

# .dt Method Calls
- like string is .str, for datetime it is .dt
- by using .dt and we can use additional attributes like .dt.year, .dt.month etc

In [53]:
sales = pd.read_csv('Data/RetailSales_BeerWineLiquor.csv', parse_dates = [0])

In [54]:
sales.head()

Unnamed: 0,DATE,MRTSSM4453USN
0,1992-01-01,1509
1,1992-02-01,1541
2,1992-03-01,1597
3,1992-04-01,1675
4,1992-05-01,1822


In [55]:
sales.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 340 entries, 0 to 339
Data columns (total 2 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   DATE           340 non-null    datetime64[ns]
 1   MRTSSM4453USN  340 non-null    int64         
dtypes: datetime64[ns](1), int64(1)
memory usage: 5.4 KB


In [57]:
sales['DATE'].dt.year

0      1992
1      1992
2      1992
3      1992
4      1992
       ... 
335    2019
336    2020
337    2020
338    2020
339    2020
Name: DATE, Length: 340, dtype: int64