# 2) Review of Python's `datetime` Modules

In [1]:
import pandas as pd
import datetime as dt

In [5]:
someday = dt.date(2010, 6, 12)

In [6]:
someday.year, someday.day, someday.month

(2010, 12, 6)

In [13]:
dt.datetime(2010, 6, 12) # default to midnight
dt.datetime(2010, 6, 12, 8, 30, 57) 
dt.datetime(2010, 6, 12, 17, 30, 57)

datetime.datetime(2010, 6, 12, 17, 30, 57)

In [17]:
str(someday)
str(dt.datetime(2010, 6, 12, 17, 30, 57))

'2010-06-12 17:30:57'

In [18]:
sometime = dt.datetime(2010, 6, 12, 17, 30, 57)

In [20]:
sometime.day, sometime.month, sometime.year, sometime.hour, sometime.minute, sometime.second

(12, 6, 2010, 17, 30, 57)

-----

# 3) The `Pandas Timestamp` Object

### using String

In [30]:
# Pandas is very flexible

pd.Timestamp('2020-04-06')
pd.Timestamp('2018-10-12')
pd.Timestamp('2013, 11, 04')
pd.Timestamp('1/1/2020')
pd.Timestamp('19/12/2015')
pd.Timestamp('12/19/2015')
pd.Timestamp('4/3/2020')

pd.Timestamp('2021/03/08 08:35:15')
pd.Timestamp('08-03-2021 6:20:55 PM')

Timestamp('2021-08-03 18:20:55')

### using Python Datetime object

In [32]:
pd.Timestamp(dt.date(2015, 1, 1))
pd.Timestamp(dt.datetime(2020, 4, 6, 18, 23, 45))

Timestamp('2020-04-06 18:23:45')

--------

# 4) The `Pandas DatetimeIndex` Object
+ basically **a collection or container of multiple Pandas Timestamps**
+ what it gonna do is two things: convert into pandas datatime, then store those datetimeindex object.

In [36]:
dates = ['2016/01/02', '2016-04-12', '2009-09-07']
pd.DatetimeIndex(dates)

DatetimeIndex(['2016-01-02', '2016-04-12', '2009-09-07'], dtype='datetime64[ns]', freq=None)

In [42]:
dates = [dt.date(2015,1,31), dt.date(2021,4,6), dt.date(2003,4,30)]
dtIndex = pd.DatetimeIndex(dates)
dtIndex

DatetimeIndex(['2015-01-31', '2021-04-06', '2003-04-30'], dtype='datetime64[ns]', freq=None)

In [45]:
values = [100, 200, 300]
pd.Series(data = values, index = dtIndex)

2015-01-31    100
2021-04-06    200
2003-04-30    300
dtype: int64

--------

# 5) The `pd.to_datetime()` Method

In [56]:
pd.to_datetime('2010-12-31')
pd.to_datetime(dt.date(2020, 4, 6))
pd.to_datetime(dt.datetime(2020, 4, 6, 19, 47, 50))
pd.to_datetime(['2015-03-30', '2011/12/31', '2021', 'Apr 6th 2021']) # list of  objects

DatetimeIndex(['2015-03-30', '2011-12-31', '2021-01-01', '2021-04-06'], dtype='datetime64[ns]', freq=None)

### Most Common Real Life Usecase is to convert `Pandas Series to datetime object`.

In [58]:
times = pd.Series(['2015-03-30', '2011/12/31', '2021', 'Apr 6th 2021'])
times

0      2015-03-30
1      2011/12/31
2            2021
3    Apr 6th 2021
dtype: object

In [59]:
pd.to_datetime(times)

0   2015-03-30
1   2011-12-31
2   2021-01-01
3   2021-04-06
dtype: datetime64[ns]

### Common Issues passing

In [62]:
dates = pd.Series(['July 4th, 1996', '10/04/2021', 'Hello', '2015-02-31'])
dates

0    July 4th, 1996
1        10/04/2021
2             Hello
3        2015-02-31
dtype: object

In [64]:
# will cause ParserError: Unknown string format: Hello
# pd.to_datetime(dates) 

### `errors=coerce` will make invalid string and make them NaT (Not a Time), instead of raising an Error

In [65]:
pd.to_datetime(dates, errors='coerce')

0   1996-07-04
1   2021-10-04
2          NaT
3          NaT
dtype: datetime64[ns]

-----

# Unix Time
+ store time in number of seconds

In [16]:
pd.to_datetime([1349720105, 1349806505, 1349892905, 1349979305, 1350065705], unit='s') # unit in seconds

DatetimeIndex(['2012-10-08 18:15:05', '2012-10-09 18:15:05',
               '2012-10-10 18:15:05', '2012-10-11 18:15:05',
               '2012-10-12 18:15:05'],
              dtype='datetime64[ns]', freq=None)

------

# 6) Create Range of Dates with the `pd.date_range()` Method, Part 1

# using `start`, `end` and `freq` parameters

### `freq` parameter

In [20]:
times = pd.date_range(start='2016-01-01', end='2016-01-10', freq='D') # frequency is Day
times

DatetimeIndex(['2016-01-01', '2016-01-02', '2016-01-03', '2016-01-04',
               '2016-01-05', '2016-01-06', '2016-01-07', '2016-01-08',
               '2016-01-09', '2016-01-10'],
              dtype='datetime64[ns]', freq='D')

In [71]:
type(times)

pandas.core.indexes.datetimes.DatetimeIndex

In [73]:
type(times[0])

pandas._libs.tslibs.timestamps.Timestamp

### `2D`: 2 days increment

In [75]:
pd.date_range(start='2016-01-01', end='2016-01-10', freq='2D')

DatetimeIndex(['2016-01-01', '2016-01-03', '2016-01-05', '2016-01-07',
               '2016-01-09'],
              dtype='datetime64[ns]', freq='2D')

### Business Days `B`
+ skipping the weekend

In [76]:
pd.date_range(start='2016-01-01', end='2016-01-10', freq='B')

DatetimeIndex(['2016-01-01', '2016-01-04', '2016-01-05', '2016-01-06',
               '2016-01-07', '2016-01-08'],
              dtype='datetime64[ns]', freq='B')

### Week `W`

Here we can see it starts with the week of 2016-01-03 which is not the start of the date that we specified.

This is because 1st Jan is Friday and Week starts with Sunday (W-SUN). So it skipped to first Sunday to start the week and next Sunday is 10th Jan.

However the subsequence Sunday 17th Jan is not included because it is not within our specified range.

In [77]:
pd.date_range(start='2016-01-01', end='2016-01-10', freq='W')

DatetimeIndex(['2016-01-03', '2016-01-10'], dtype='datetime64[ns]', freq='W-SUN')

We can change this **to start from FRI**

In [22]:
pd.date_range(start='2016-01-01', end='2016-01-10', freq='W-FRI')

DatetimeIndex(['2016-01-01', '2016-01-08'], dtype='datetime64[ns]', freq='W-FRI')

### Hours `H`

In [81]:
pd.date_range(start='2016-01-01', end='2016-01-10', freq='H')
pd.date_range(start='2016-01-01', end='2016-01-10', freq='6H')
pd.date_range(start='2016-01-01', end='2016-01-10', freq='12H')

DatetimeIndex(['2016-01-01 00:00:00', '2016-01-01 12:00:00',
               '2016-01-02 00:00:00', '2016-01-02 12:00:00',
               '2016-01-03 00:00:00', '2016-01-03 12:00:00',
               '2016-01-04 00:00:00', '2016-01-04 12:00:00',
               '2016-01-05 00:00:00', '2016-01-05 12:00:00',
               '2016-01-06 00:00:00', '2016-01-06 12:00:00',
               '2016-01-07 00:00:00', '2016-01-07 12:00:00',
               '2016-01-08 00:00:00', '2016-01-08 12:00:00',
               '2016-01-09 00:00:00', '2016-01-09 12:00:00',
               '2016-01-10 00:00:00'],
              dtype='datetime64[ns]', freq='12H')

## Month End `M`
- give me the end of the month

In [24]:
pd.date_range(start='2016-01-01', end='2016-01-15', freq='M') # this will return nothing as there is no month end for this date range
pd.date_range(start='2016-01-01', end='2016-12-31', freq='M')

DatetimeIndex(['2016-01-31', '2016-02-29', '2016-03-31', '2016-04-30',
               '2016-05-31', '2016-06-30', '2016-07-31', '2016-08-31',
               '2016-09-30', '2016-10-31', '2016-11-30', '2016-12-31'],
              dtype='datetime64[ns]', freq='M')

## Month Start `MS`
+ give me the start of the month

In [87]:
pd.date_range(start='2016-01-01', end='2016-12-31', freq='MS')
pd.date_range(start='2016-01-01', end='2017-01-01', freq='MS')

DatetimeIndex(['2016-01-01', '2016-02-01', '2016-03-01', '2016-04-01',
               '2016-05-01', '2016-06-01', '2016-07-01', '2016-08-01',
               '2016-09-01', '2016-10-01', '2016-11-01', '2016-12-01',
               '2017-01-01'],
              dtype='datetime64[ns]', freq='MS')

## Year End `A`

In [88]:
pd.date_range(start='2016-01-01', end='2050-01-01', freq='A')

DatetimeIndex(['2016-12-31', '2017-12-31', '2018-12-31', '2019-12-31',
               '2020-12-31', '2021-12-31', '2022-12-31', '2023-12-31',
               '2024-12-31', '2025-12-31', '2026-12-31', '2027-12-31',
               '2028-12-31', '2029-12-31', '2030-12-31', '2031-12-31',
               '2032-12-31', '2033-12-31', '2034-12-31', '2035-12-31',
               '2036-12-31', '2037-12-31', '2038-12-31', '2039-12-31',
               '2040-12-31', '2041-12-31', '2042-12-31', '2043-12-31',
               '2044-12-31', '2045-12-31', '2046-12-31', '2047-12-31',
               '2048-12-31', '2049-12-31'],
              dtype='datetime64[ns]', freq='A-DEC')

--------

# 7) Create Range of Dates with the `pd.date_range()` Method, Part 2

## `period`: how many instances starting from start period

In [40]:
# using start, period and frequency

pd.date_range('2012-09-09', periods=50, freq='D')
pd.date_range('2012-09-09', periods=50, freq='B')
pd.date_range('2012-09-09', periods=50, freq='2W')
pd.date_range('2012-09-09', periods=50, freq='W-SUN')
pd.date_range('2012-09-09', periods=50, freq='W-TUE')
pd.date_range('2012-09-09', periods=50, freq='M')
pd.date_range('2012-09-09', periods=50, freq='MS')
pd.date_range('2012-09-09', periods=50, freq='Y')
pd.date_range('2012-09-09', periods=50, freq='YS')
pd.date_range('2012-09-09', periods=50, freq='H')
pd.date_range('2012-09-09', periods=50, freq='6H')

DatetimeIndex(['2012-09-09 00:00:00', '2012-09-09 06:00:00',
               '2012-09-09 12:00:00', '2012-09-09 18:00:00',
               '2012-09-10 00:00:00', '2012-09-10 06:00:00',
               '2012-09-10 12:00:00', '2012-09-10 18:00:00',
               '2012-09-11 00:00:00', '2012-09-11 06:00:00',
               '2012-09-11 12:00:00', '2012-09-11 18:00:00',
               '2012-09-12 00:00:00', '2012-09-12 06:00:00',
               '2012-09-12 12:00:00', '2012-09-12 18:00:00',
               '2012-09-13 00:00:00', '2012-09-13 06:00:00',
               '2012-09-13 12:00:00', '2012-09-13 18:00:00',
               '2012-09-14 00:00:00', '2012-09-14 06:00:00',
               '2012-09-14 12:00:00', '2012-09-14 18:00:00',
               '2012-09-15 00:00:00', '2012-09-15 06:00:00',
               '2012-09-15 12:00:00', '2012-09-15 18:00:00',
               '2012-09-16 00:00:00', '2012-09-16 06:00:00',
               '2012-09-16 12:00:00', '2012-09-16 18:00:00',
               '2012-09-

-----

# Create Range of Dates with the `pd.date_range()` Method, Part 3

## `end` parameter: range will start from this and counting backwards

In [57]:
pd.date_range(end='1999-12-31', periods=20, freq='D')
pd.date_range(end='1999-12-31', periods=40, freq='2D')
pd.date_range(end='1999-12-31', periods=50, freq='B')
pd.date_range(end='1999-12-31', periods=50, freq='5B')
pd.date_range(end='1999-12-31', periods=50, freq='B')
pd.date_range(end='1999-12-31', periods=40, freq='W-SUN')
pd.date_range(end='1999-12-31', periods=50, freq='W-FRI')
pd.date_range(end='1999-12-31', periods=40, freq='M')
pd.date_range(end='1999-12-31', periods=53, freq='MS')
pd.date_range(end='1999-12-31', periods=50, freq='7H')

DatetimeIndex(['1999-12-16 17:00:00', '1999-12-17 00:00:00',
               '1999-12-17 07:00:00', '1999-12-17 14:00:00',
               '1999-12-17 21:00:00', '1999-12-18 04:00:00',
               '1999-12-18 11:00:00', '1999-12-18 18:00:00',
               '1999-12-19 01:00:00', '1999-12-19 08:00:00',
               '1999-12-19 15:00:00', '1999-12-19 22:00:00',
               '1999-12-20 05:00:00', '1999-12-20 12:00:00',
               '1999-12-20 19:00:00', '1999-12-21 02:00:00',
               '1999-12-21 09:00:00', '1999-12-21 16:00:00',
               '1999-12-21 23:00:00', '1999-12-22 06:00:00',
               '1999-12-22 13:00:00', '1999-12-22 20:00:00',
               '1999-12-23 03:00:00', '1999-12-23 10:00:00',
               '1999-12-23 17:00:00', '1999-12-24 00:00:00',
               '1999-12-24 07:00:00', '1999-12-24 14:00:00',
               '1999-12-24 21:00:00', '1999-12-25 04:00:00',
               '1999-12-25 11:00:00', '1999-12-25 18:00:00',
               '1999-12-

------

# 9) The `.dt` Accessor
+ work like using `.str`


In [59]:
bunch_of_dates = pd.date_range(start='2000-01-01', end='2010-12-31', freq='24D')
bunch_of_dates

DatetimeIndex(['2000-01-01', '2000-01-25', '2000-02-18', '2000-03-13',
               '2000-04-06', '2000-04-30', '2000-05-24', '2000-06-17',
               '2000-07-11', '2000-08-04',
               ...
               '2010-05-20', '2010-06-13', '2010-07-07', '2010-07-31',
               '2010-08-24', '2010-09-17', '2010-10-11', '2010-11-04',
               '2010-11-28', '2010-12-22'],
              dtype='datetime64[ns]', length=168, freq='24D')

In [61]:
s = pd.Series(bunch_of_dates)
s.head()

0   2000-01-01
1   2000-01-25
2   2000-02-18
3   2000-03-13
4   2000-04-06
dtype: datetime64[ns]

In [77]:
s.dt.day
s.dt.month
s.dt.year
s.dt.weekday
mask = s.dt.is_quarter_start
mask

0       True
1      False
2      False
3      False
4      False
       ...  
163    False
164    False
165    False
166    False
167    False
Length: 168, dtype: bool

In [78]:
s[mask]

0     2000-01-01
19    2001-04-01
38    2002-07-01
137   2009-01-01
dtype: datetime64[ns]

------

# 11) Import Financial Dataset with `pandas_datareader` Library

In [80]:
from pandas_datareader import data

In [None]:
stocks = data.DataReader(name='MSFT', data_source='yahoo', start='2010-01-01', end='2021-12-31')
stocks.head(3)

In [None]:
stocks.values
stocks.columns
stocks.index
stocks.axes

--------

# 12) Selecting Rows from a `DataFrame` with a `DatetimeIndex`

In [None]:
# to continue

--------

# 16) The `Timedelta` Object

We already know pandas timestamp object give something current moment in time. For example: March 30th 1999 17:00

In comparision, `Timedelta` measures a **Time Span** or **a duration** or **a passage of Time**. So it is not connected to a specific date, it is simply a measurement of time.

Delta in maths means differences in two things, so Timedelta little translates to differences in times or the distance between two times.

For example: if I say "My Birthday is in 5 days 2 hours and 30 minutes", then that **5 days 2 hours and 30 minutes** describes a measurement of time, a duration. It doesn't describe a specific date. So that's Timedelta object represents in Pandas.

The easiest way that we can reach to time duration is **subtract each other**.


In [81]:
time_a = pd.Timestamp('2020-03-31')
time_b = pd.Timestamp('2020-03-20')

time_a - time_b

Timedelta('11 days 00:00:00')

In [82]:
time_a = pd.Timestamp('2020-03-31 04:35:16PM')
time_b = pd.Timestamp('2020-03-20 02:15:49AM')

time_a - time_b

Timedelta('11 days 14:19:27')

In [83]:
time_b - time_a

Timedelta('-12 days +09:40:33')

### Creating Timedelta from stratch

NOTE: There is years avaliable to Timedelta. So we need to make walkaround like adding 365 days, etc.

In [93]:
pd.Timedelta(days=3)
pd.Timedelta(days=3, minutes=45)
pd.Timedelta(days=3, hours=12, minutes=45, seconds=12)
pd.Timedelta(weeks=8, days=3, hours=12, minutes=45, seconds=20)

Timedelta('59 days 12:45:20')

In [85]:
time_a  + pd.Timedelta(days=3)

Timestamp('2020-04-03 16:35:16')

### Creating Timedelta using Strings

In [96]:
pd.Timedelta('5 minutes')
pd.Timedelta('6 hours 5 minutes')
pd.Timedelta('14 days 6 hours 12 minutes 49 seconds')

Timedelta('14 days 06:12:49')

-------

# 17) `Timedelta` in a Dataset

In [103]:
shipping = pd.read_csv('Data/ecommerce.csv', index_col=['ID'], parse_dates=['order_date', 'delivery_date'])
shipping.head()

Unnamed: 0_level_0,order_date,delivery_date
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1998-05-24,1999-02-05
2,1992-04-22,1998-03-06
4,1991-02-10,1992-08-26
5,1992-07-21,1997-11-20
7,1993-09-02,1998-06-10


In [104]:
shipping.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 501 entries, 1 to 997
Data columns (total 2 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   order_date     501 non-null    datetime64[ns]
 1   delivery_date  501 non-null    datetime64[ns]
dtypes: datetime64[ns](2)
memory usage: 11.7 KB


### How long does it take to ship?

In [111]:
shipping['Delivery Time'] = shipping['delivery_date'] - shipping['order_date']
shipping.head()

Unnamed: 0_level_0,order_date,delivery_date,Delivery Time
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1998-05-24,1999-02-05,257 days
2,1992-04-22,1998-03-06,2144 days
4,1991-02-10,1992-08-26,563 days
5,1992-07-21,1997-11-20,1948 days
7,1993-09-02,1998-06-10,1742 days


### Longest and Shortest Shipping Duration

In [113]:
# 5 longest shipping duration
shipping['Delivery Time'].nlargest(5)

ID
884   3583 days
314   3580 days
904   3562 days
130   3423 days
331   3379 days
Name: Delivery Time, dtype: timedelta64[ns]

In [123]:
shipping['Delivery Time'].max()

Timedelta('3583 days 00:00:00')

In [114]:
# 5 shortest shipping duration
shipping['Delivery Time'].nsmallest(5)

ID
898    8 days
19     9 days
612    9 days
994   10 days
310   16 days
Name: Delivery Time, dtype: timedelta64[ns]

In [124]:
shipping['Delivery Time'].min()

Timedelta('8 days 00:00:00')

### What delivery took more than a year?

In [122]:
shipping[shipping['Delivery Time'] > '365 days'].sort_values('Delivery Time', ascending=False)

Unnamed: 0_level_0,order_date,delivery_date,Delivery Time
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
884,1990-01-20,1999-11-12,3583 days
314,1990-03-07,1999-12-25,3580 days
904,1990-02-13,1999-11-15,3562 days
130,1990-04-02,1999-08-16,3423 days
331,1990-09-18,1999-12-19,3379 days
...,...,...,...
326,1998-05-12,1999-05-29,382 days
445,1993-02-11,1994-02-24,378 days
76,1997-05-26,1998-06-05,375 days
457,1991-06-17,1992-06-18,367 days


----------