# WORKING WITH DATES AND TIMES

In [1]:
import numpy as np
import pandas as pd
import datetime as dt

In [2]:
pd.Timestamp("2023-7-14 15:30:17")

Timestamp('2023-07-14 15:30:17')

In [3]:
pd.Timestamp(dt.datetime(2023, 8, 15, 7, 45, 30))

Timestamp('2023-08-15 07:45:30')

In [4]:
string_dates = ["2026-12-31", "2026-2-13", "2026-3-16"]
pd.DatetimeIndex(string_dates)

DatetimeIndex(['2026-12-31', '2026-02-13', '2026-03-16'], dtype='datetime64[ns]', freq=None)

In [5]:
mixed_dates = [dt.date(2026, 12, 31), "2026/2/13", pd.Timestamp(2026, 3, 16)]
pd.DatetimeIndex(mixed_dates)

DatetimeIndex(['2026-12-31', '2026-02-13', '2026-03-16'], dtype='datetime64[ns]', freq=None)

In [6]:
# loading the dataset
disney = pd.read_csv("./data/disney.csv", parse_dates=["Date"])
disney

Unnamed: 0,Date,High,Low,Open,Close
0,1962-01-02,0.096026,0.092908,0.092908,0.092908
1,1962-01-03,0.094467,0.092908,0.092908,0.094155
2,1962-01-04,0.094467,0.093532,0.094155,0.094155
3,1962-01-05,0.094779,0.093844,0.094155,0.094467
4,1962-01-08,0.095714,0.092285,0.094467,0.094155
...,...,...,...,...,...
14722,2020-06-26,111.199997,108.019997,110.949997,109.099998
14723,2020-06-29,111.570000,108.099998,109.000000,111.519997
14724,2020-06-30,112.050003,109.930000,111.500000,111.510002
14725,2020-07-01,115.599998,112.290001,112.820000,113.010002


In [7]:
# to_datetime() method to convert a potentially datetime column to actual datetime 
disney["Date"] = pd.to_datetime(disney["Date"])

In [8]:
# using 'dt' accessor to access the components of the datatime series from the dataframe

In [9]:
# access day of the month
disney["Date"].dt.day

0         2
1         3
2         4
3         5
4         8
         ..
14722    26
14723    29
14724    30
14725     1
14726     2
Name: Date, Length: 14727, dtype: int32

In [10]:
# access the month number
disney["Date"].dt.month

0        1
1        1
2        1
3        1
4        1
        ..
14722    6
14723    6
14724    6
14725    7
14726    7
Name: Date, Length: 14727, dtype: int32

In [11]:
# extract the year number
disney["Date"].dt.year

0        1962
1        1962
2        1962
3        1962
4        1962
         ... 
14722    2020
14723    2020
14724    2020
14725    2020
14726    2020
Name: Date, Length: 14727, dtype: int32

In [12]:
# day of the week: 0-6 (Mon-Sun)
disney["Date"].dt.dayofweek

0        1
1        2
2        3
3        4
4        0
        ..
14722    4
14723    0
14724    1
14725    2
14726    3
Name: Date, Length: 14727, dtype: int32

In [13]:
# day_name() to get weekday names
disney["Date"].dt.day_name()

0          Tuesday
1        Wednesday
2         Thursday
3           Friday
4           Monday
           ...    
14722       Friday
14723       Monday
14724      Tuesday
14725    Wednesday
14726     Thursday
Name: Date, Length: 14727, dtype: object

In [14]:
# extract month name
disney["Date"].dt.month_name()

0        January
1        January
2        January
3        January
4        January
          ...   
14722       June
14723       June
14724       June
14725       July
14726       July
Name: Date, Length: 14727, dtype: object

In [15]:
# creating a "Day of Week" column
disney["Day of Week"] = disney["Date"].dt.day_name()
disney

Unnamed: 0,Date,High,Low,Open,Close,Day of Week
0,1962-01-02,0.096026,0.092908,0.092908,0.092908,Tuesday
1,1962-01-03,0.094467,0.092908,0.092908,0.094155,Wednesday
2,1962-01-04,0.094467,0.093532,0.094155,0.094155,Thursday
3,1962-01-05,0.094779,0.093844,0.094155,0.094467,Friday
4,1962-01-08,0.095714,0.092285,0.094467,0.094155,Monday
...,...,...,...,...,...,...
14722,2020-06-26,111.199997,108.019997,110.949997,109.099998,Friday
14723,2020-06-29,111.570000,108.099998,109.000000,111.519997,Monday
14724,2020-06-30,112.050003,109.930000,111.500000,111.510002,Tuesday
14725,2020-07-01,115.599998,112.290001,112.820000,113.010002,Wednesday


In [16]:
# boolean methods for date markers
# such as quarter start or end, month start or end, year start or end
# is_quarter_start, is_quarter_end, is_month_start, is_month_end, is_year_start, is_year_end
disney["Date"].dt.is_quarter_start

0        False
1        False
2        False
3        False
4        False
         ...  
14722    False
14723    False
14724    False
14725     True
14726    False
Name: Date, Length: 14727, dtype: bool

### ADDING AND SUBSTRACTING DURATIONS OF TIME

In [17]:
# Using 'DateOffset' construct
pd.DateOffset(years=2, months=4, weeks=2, days=5, hours=7, minutes=25, seconds=30)

<DateOffset: days=5, hours=7, minutes=25, months=4, seconds=30, weeks=2, years=2>

In [18]:
disney["Date"] + pd.DateOffset(years=2, months=4, weeks=1, days=1, hours=5)

0       1964-05-10 05:00:00
1       1964-05-11 05:00:00
2       1964-05-12 05:00:00
3       1964-05-13 05:00:00
4       1964-05-16 05:00:00
                ...        
14722   2022-11-03 05:00:00
14723   2022-11-06 05:00:00
14724   2022-11-07 05:00:00
14725   2022-11-09 05:00:00
14726   2022-11-10 05:00:00
Name: Date, Length: 14727, dtype: datetime64[ns]

In [19]:
disney["Date"] - pd.DateOffset(months=2, days=1)

0       1961-11-01
1       1961-11-02
2       1961-11-03
3       1961-11-04
4       1961-11-07
           ...    
14722   2020-04-25
14723   2020-04-28
14724   2020-04-29
14725   2020-04-30
14726   2020-05-01
Name: Date, Length: 14727, dtype: datetime64[ns]

### DATE OFFSETS

In [20]:
# Month Beginning of next month when we use addition: '+'
# Month Beginning of this month when we use substraction'-'
disney["Date"] + pd.offsets.MonthBegin()

0       1962-02-01
1       1962-02-01
2       1962-02-01
3       1962-02-01
4       1962-02-01
           ...    
14722   2020-07-01
14723   2020-07-01
14724   2020-07-01
14725   2020-08-01
14726   2020-08-01
Name: Date, Length: 14727, dtype: datetime64[ns]

In [21]:
disney["Date"] - pd.offsets.MonthBegin()

0       1962-01-01
1       1962-01-01
2       1962-01-01
3       1962-01-01
4       1962-01-01
           ...    
14722   2020-06-01
14723   2020-06-01
14724   2020-06-01
14725   2020-06-01
14726   2020-07-01
Name: Date, Length: 14727, dtype: datetime64[ns]

In [22]:
# Try the remaining offsets
# pd.offsets.MonthBegin()
# pd.offsets.MonthEnd()
# pd.offsets.BMonthBegin()  # Business Month Beginning
# pd.offsets.BMonthEnd()    # Business Month Ending

### The TIMEDELTA OBJECT

In [23]:
# The Timedelta object can store a duration
duration = pd.Timedelta(
    days=8,
    hours=8,
    minutes=16,
    seconds=59
)
duration

Timedelta('8 days 08:16:59')

In [24]:
# converting a given string to timedelta
pd.to_timedelta("3 hours, 20 minutes, 45 seconds")

Timedelta('0 days 03:20:45')

In [25]:
pd.to_timedelta(18, unit="hour")

Timedelta('0 days 18:00:00')

In [26]:
pd.to_timedelta([5, 10, 15], unit="day")

TimedeltaIndex(['5 days', '10 days', '15 days'], dtype='timedelta64[ns]', freq=None)

In [28]:
pd.Timestamp("2026-01-19") - pd.Timestamp("2024-01-01")

Timedelta('749 days 00:00:00')

In [32]:
# getting the data from a csv file
deliveries = pd.read_csv("./data/deliveries.csv")
deliveries.head(3)

Unnamed: 0,order_date,delivery_date
0,5/24/98,2/5/99
1,4/22/92,3/6/98
2,2/10/91,8/26/92


In [33]:
# changing the two columns into datetime type
deliveries["order_date"] = pd.to_datetime(deliveries["order_date"])
deliveries["delivery_date"] = pd.to_datetime(deliveries["delivery_date"])
deliveries.dtypes

  deliveries["order_date"] = pd.to_datetime(deliveries["order_date"])
  deliveries["delivery_date"] = pd.to_datetime(deliveries["delivery_date"])


order_date       datetime64[ns]
delivery_date    datetime64[ns]
dtype: object

In [36]:
# finding the number of days between order_date and delivery_date
deliveries["duration"] = (
    deliveries["delivery_date"] - deliveries["order_date"]
)
deliveries.head(3)

Unnamed: 0,order_date,delivery_date,duration
0,1998-05-24,1999-02-05,257 days
1,1992-04-22,1998-03-06,2144 days
2,1991-02-10,1992-08-26,563 days


In [38]:
deliveries["duration"].mean()   # min() / max()

Timedelta('1217 days 22:53:53.532934128')

In [41]:
# the following two lines are same
deliveries["duration"] > pd.Timedelta(days=365)
deliveries["duration"] > "365 days"

0      False
1       True
2       True
3       True
4       True
       ...  
496     True
497     True
498     True
499    False
500     True
Name: duration, Length: 501, dtype: bool