# Time Methods for Date and Time Data
* Pandas allows us to easily extract information from a datetime object to use feature engineering


* For example, we have recent timestamped sales data.
* Pandas will allow us to extract information from the timestamp, such as:
    * Day of the Week
    * Weekend vs Weekday
    * AM vs PM




In [36]:
import numpy as np
import pandas as pd
from  datetime import datetime

In [37]:
myyear = 2015
mymonth = 1
myday = 1
myhour = 2
mymin =30
mysecond = 15

In [38]:
mydate = datetime(myyear, mymonth,myday)

In [39]:
mydate

datetime.datetime(2015, 1, 1, 0, 0)

In [40]:
mydatetime = datetime(myyear, mymonth,myday, myhour, mymin, mysecond)

In [41]:
mydatetime

datetime.datetime(2015, 1, 1, 2, 30, 15)

In [42]:
mydatetime.year

2015

In [43]:
# create a series object
myser = pd.Series(['Nov 3, 1990', '2000-01-01', None])

In [44]:
myser

0    Nov 3, 1990
1     2000-01-01
2           None
dtype: object

* it not happen due to the string
* myser[0].year -> cause error

In [45]:
# Convert argument to datetime.
# Parameters
# arg : int, float, str, datetime, list, tuple, 1-d array, Series, DataFrame/dict-like
# The object to convert to a datetime.
# dtype = datetime64(nano second)
timeser = pd.to_datetime(myser)

In [46]:
# timestamp object calling (timeser[0])
# call out the year
timeser[0].year

1990

In [47]:
# Euro Date (date-month-year)
obvi_euro_date = '31-12-2000'

In [48]:
# converts euro-date to (year-month-date)
pd.to_datetime(obvi_euro_date)

Timestamp('2000-12-31 00:00:00')

In [49]:
# some America will think is 12th October 2000
# but the actually date is 10th December 2000
euro_date = '10-12-2000'

In [50]:
# convert to wrong results due to the America style in the code
# => Month go first (October 12th 2000)
# The right is 10th December 2000
# Solve it by use 'dayfirst'
# dayfirstbool, default False
# Specify a date parse order if arg is str or its list-likes. If True, parses dates with the day first, eg 10/11/12 is parsed as 2012-11-10.
pd.to_datetime(euro_date, dayfirst=True)

Timestamp('2000-12-10 00:00:00')

* If you have mix of America datetime and European datetime
=> The problem is not with pandas to datetime, it really bad data
=> <b>clean up the data first</b>


In [51]:
# the difference of date style
style_date = '12--Dec--2000'

In [52]:
# Tell the pandas the actual formatting of date
#  format : str, default None
# The strftime to parse time, eg "%d/%m/%Y", note that "%f" will parse
# all the way up to nanoseconds. - Microsecond as a decimal number, zero-padded to 6 digits.
# See strftime documentation for more information on choices:
#     https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior.
pd.to_datetime(style_date, format='%d--%b--%Y')

Timestamp('2000-12-12 00:00:00')

In [53]:
# custom time
custom_date = "12th of Dec 2000"

In [54]:
# this enough smart to understand this kind of datetime
pd.to_datetime(custom_date)

Timestamp('2000-12-12 00:00:00')

In [55]:
sales = pd.read_csv(r'C:\\Users\\admin\\Desktop\\Data Science\\Course-2021\\\03-Pandas\\RetailSales_BeerWineLiquor.csv')

In [56]:
# dtype = object
sales['DATE']

0      1992-01-01
1      1992-02-01
2      1992-03-01
3      1992-04-01
4      1992-05-01
          ...    
335    2019-12-01
336    2020-01-01
337    2020-02-01
338    2020-03-01
339    2020-04-01
Name: DATE, Length: 340, dtype: object

In [57]:
sales['DATE'] = pd.to_datetime(sales["DATE"])

In [58]:
# dtype=datetime64
sales

Unnamed: 0,DATE,MRTSSM4453USN
0,1992-01-01,1509
1,1992-02-01,1541
2,1992-03-01,1597
3,1992-04-01,1675
4,1992-05-01,1822
...,...,...
335,2019-12-01,6630
336,2020-01-01,4388
337,2020-02-01,4533
338,2020-03-01,5562


In [59]:
# call out the year
sales['DATE'][0].year


1992

In [60]:
# Read the column as a datetime column
sales = pd.read_csv(r'C:\\Users\\admin\\Desktop\\Data Science\\Course-2021\\\03-Pandas\\RetailSales_BeerWineLiquor.csv', parse_dates=['DATE'])


In [61]:
sales['DATE']

0     1992-01-01
1     1992-02-01
2     1992-03-01
3     1992-04-01
4     1992-05-01
         ...    
335   2019-12-01
336   2020-01-01
337   2020-02-01
338   2020-03-01
339   2020-04-01
Name: DATE, Length: 340, dtype: datetime64[ns]

In [62]:
sales = sales.set_index('DATE')

In [63]:
sales

Unnamed: 0_level_0,MRTSSM4453USN
DATE,Unnamed: 1_level_1
1992-01-01,1509
1992-02-01,1541
1992-03-01,1597
1992-04-01,1675
1992-05-01,1822
...,...
2019-12-01,6630
2020-01-01,4388
2020-02-01,4533
2020-03-01,5562


In [64]:
sales.resample(rule='A').sum()

Unnamed: 0_level_0,MRTSSM4453USN
DATE,Unnamed: 1_level_1
1992-12-31,21687
1993-12-31,21538
1994-12-31,22101
1995-12-31,22007
1996-12-31,23157
1997-12-31,24081
1998-12-31,25382
1999-12-31,26476
2000-12-31,28507
2001-12-31,29621


In [65]:
# group by (group everything by YEAR)
# Rule= - The offset string or object representing target conversion.
sales.resample(rule='A').mean()

Unnamed: 0_level_0,MRTSSM4453USN
DATE,Unnamed: 1_level_1
1992-12-31,1807.25
1993-12-31,1794.833333
1994-12-31,1841.75
1995-12-31,1833.916667
1996-12-31,1929.75
1997-12-31,2006.75
1998-12-31,2115.166667
1999-12-31,2206.333333
2000-12-31,2375.583333
2001-12-31,2468.416667


<table style="display: inline-block">
    <caption style="text-align: center"><strong>TIME SERIES OFFSET ALIASES</strong></caption>
<tr><th>ALIAS</th><th>DESCRIPTION</th></tr>
<tr><td>B</td><td>business day frequency</td></tr>
<tr><td>C</td><td>custom business day frequency (experimental)</td></tr>
<tr><td>D</td><td>calendar day frequency</td></tr>
<tr><td>W</td><td>weekly frequency</td></tr>
<tr><td>M</td><td>month end frequency</td></tr>
<tr><td>SM</td><td>semi-month end frequency (15th and end of month)</td></tr>
<tr><td>BM</td><td>business month end frequency</td></tr>
<tr><td>CBM</td><td>custom business month end frequency</td></tr>
<tr><td>MS</td><td>month start frequency</td></tr>
<tr><td>SMS</td><td>semi-month start frequency (1st and 15th)</td></tr>
<tr><td>BMS</td><td>business month start frequency</td></tr>
<tr><td>CBMS</td><td>custom business month start frequency</td></tr>
<tr><td>Q</td><td>quarter end frequency</td></tr>
<tr><td></td><td><font color=white>intentionally left blank</font></td></tr></table>

<table style="display: inline-block; margin-left: 40px">
<caption style="text-align: center"></caption>
<tr><th>ALIAS</th><th>DESCRIPTION</th></tr>
<tr><td>BQ</td><td>business quarter endfrequency</td></tr>
<tr><td>QS</td><td>quarter start frequency</td></tr>
<tr><td>BQS</td><td>business quarter start frequency</td></tr>
<tr><td>A</td><td>year end frequency</td></tr>
<tr><td>BA</td><td>business year end frequency</td></tr>
<tr><td>AS</td><td>year start frequency</td></tr>
<tr><td>BAS</td><td>business year start frequency</td></tr>
<tr><td>BH</td><td>business hour frequency</td></tr>
<tr><td>H</td><td>hourly frequency</td></tr>
<tr><td>T, min</td><td>minutely frequency</td></tr>
<tr><td>S</td><td>secondly frequency</td></tr>
<tr><td>L, ms</td><td>milliseconds</td></tr>
<tr><td>U, us</td><td>microseconds</td></tr>
<tr><td>N</td><td>nanoseconds</td></tr></table>



In [66]:
sales = pd.read_csv(r'C:\\Users\\admin\\Desktop\\Data Science\\Course-2021\\\03-Pandas\\RetailSales_BeerWineLiquor.csv', parse_dates=['DATE'])

In [67]:
sales.head(8)

Unnamed: 0,DATE,MRTSSM4453USN
0,1992-01-01,1509
1,1992-02-01,1541
2,1992-03-01,1597
3,1992-04-01,1675
4,1992-05-01,1822
5,1992-06-01,1775
6,1992-07-01,1912
7,1992-08-01,1862


In [68]:
sales.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 340 entries, 0 to 339
Data columns (total 2 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   DATE           340 non-null    datetime64[ns]
 1   MRTSSM4453USN  340 non-null    int64         
dtypes: datetime64[ns](1), int64(1)
memory usage: 5.4 KB


In [69]:
# use the dt=datetime
sales['DATE'].dt.year

0      1992
1      1992
2      1992
3      1992
4      1992
       ... 
335    2019
336    2020
337    2020
338    2020
339    2020
Name: DATE, Length: 340, dtype: int64

In [70]:
sales['DATE'].dt.month


0       1
1       2
2       3
3       4
4       5
       ..
335    12
336     1
337     2
338     3
339     4
Name: DATE, Length: 340, dtype: int64