In [1]:
import pandas as pd
import numpy as np

<h3>Date and Time Data Types and Tools</h3>

In [2]:
from datetime import datetime
from datetime import timedelta

In [3]:
now = datetime.now()

In [4]:
now

datetime.datetime(2024, 5, 26, 17, 35, 41, 723884)

In [5]:
now.year, now.month, now.day, now.hour, now.minute

(2024, 5, 26, 17, 35)

In [6]:
now.year

2024

In [7]:
# delta
delta = datetime(2024, 5, 21) - datetime(2016, 8, 26, 8, 54)
delta

datetime.timedelta(days=2824, seconds=54360)

In [8]:
# numerical evaluations
start = datetime(2024, 1, 1)
start + timedelta(3000)

datetime.datetime(2032, 3, 19, 0, 0)

<h4>Converting between String and Datetime</h4>

In [9]:
stamp = datetime(2024, 5, 22)

In [10]:
str(stamp)

'2024-05-22 00:00:00'

In [11]:
# Conversion to a specified string format
stamp.strftime("%Y-%B-%d-%u-%A-%p")

'2024-May-22-3-Wednesday-AM'

In [12]:
value = "2024-12-03"

In [13]:
datetime.strptime(value, "%Y-%m-%d")

datetime.datetime(2024, 12, 3, 0, 0)

In [14]:
# converting a list of string dates to datetime format
datestrs = ["7/6/2025", "8/6/2025"]
[datetime.strptime(x, "%m/%d/%Y") for x in datestrs]

[datetime.datetime(2025, 7, 6, 0, 0), datetime.datetime(2025, 8, 6, 0, 0)]

In [15]:
# pandas to_datetime
datestrs = ["2011-07-06 12:00:00", "2025-08-06 00:00:00"]
pd.to_datetime(datestrs)

DatetimeIndex(['2011-07-06 12:00:00', '2025-08-06 00:00:00'], dtype='datetime64[ns]', freq=None)

In [16]:
# dataframe to datetime
df = pd.DataFrame(
    {"year": [2025, 2024, 2023],
     "month": [2, 4, 8],
     "day": [5, 4, 5]
    })

In [17]:
df

Unnamed: 0,year,month,day
0,2025,2,5
1,2024,4,4
2,2023,8,5


In [18]:
pd.to_datetime(df)

0   2025-02-05
1   2024-04-04
2   2023-08-05
dtype: datetime64[ns]

In [19]:
datestr = ["2025-07-06 12:00:00",
           "2034-08-04 13:45:34"]

In [20]:
# Missing values, Not a Time values are also accepted
idx = pd.to_datetime(datestrs + [None])

In [21]:
idx

DatetimeIndex(['2011-07-06 12:00:00', '2025-08-06 00:00:00', 'NaT'], dtype='datetime64[ns]', freq=None)

In [22]:
# indexing idx
idx[2]

NaT

In [23]:
pd.isna(idx)

array([False, False,  True])

<h4>Time Series Basics</h4>

In [24]:
# time series
dates = [datetime(2024, 6, 2),
        datetime(2025, 10, 21),
        datetime(2024, 4, 12),
        datetime(2023, 10, 5),
        datetime(2016, 11, 3),
        datetime(2017, 3, 15),
        datetime(2019, 6, 3),
        datetime(2020, 5, 8),
       ]

In [25]:
ts =pd.Series(np.random.standard_normal(8),
              index=dates,
             )

In [26]:
ts

2024-06-02    0.293264
2025-10-21   -0.183988
2024-04-12   -1.585951
2023-10-05   -2.143029
2016-11-03   -0.470087
2017-03-15   -0.467813
2019-06-03    0.053859
2020-05-08   -0.999767
dtype: float64

In [27]:
ts + ts[::3]

2016-11-03         NaN
2017-03-15         NaN
2019-06-03    0.107718
2020-05-08         NaN
2023-10-05   -4.286058
2024-04-12         NaN
2024-06-02    0.586529
2025-10-21         NaN
dtype: float64

In [28]:
ts[::3]

2024-06-02    0.293264
2023-10-05   -2.143029
2019-06-03    0.053859
dtype: float64

<h4>indexing, Selecting and Subsetting</h4>

In [29]:
# same as indexing a series
stamp = ts.index[2]

In [30]:
stamp

Timestamp('2024-04-12 00:00:00')

In [31]:
ts.iat[0]

0.2932644363256184

In [32]:
# indexing by year
longer_ts = pd.Series(np.random.standard_normal(2000),
                      index=pd.date_range("2024-05-22",
                                          periods=2000))

In [33]:
longer_ts["2024"]

2024-05-22    0.567352
2024-05-23    0.495687
2024-05-24   -0.029125
2024-05-25   -1.474185
2024-05-26   -0.214265
                ...   
2024-12-27    1.333597
2024-12-28    0.615799
2024-12-29    0.221114
2024-12-30   -0.804792
2024-12-31   -2.856967
Freq: D, Length: 224, dtype: float64

In [34]:
# selecting by year-month
longer_ts["2024-06"]

2024-06-01    0.646101
2024-06-02   -0.384709
2024-06-03   -0.690672
2024-06-04    0.640527
2024-06-05    0.923032
2024-06-06   -1.057102
2024-06-07    0.213293
2024-06-08   -0.061178
2024-06-09    0.574991
2024-06-10   -0.743524
2024-06-11    0.783731
2024-06-12   -0.515342
2024-06-13    1.381710
2024-06-14   -0.803077
2024-06-15    0.723488
2024-06-16   -0.339897
2024-06-17    0.130934
2024-06-18   -0.643298
2024-06-19    0.369666
2024-06-20    0.168838
2024-06-21    0.031485
2024-06-22   -0.808769
2024-06-23    0.582660
2024-06-24    0.985190
2024-06-25   -0.051829
2024-06-26   -0.241596
2024-06-27   -1.117831
2024-06-28    0.932942
2024-06-29    1.823039
2024-06-30    0.414063
Freq: D, dtype: float64

In [35]:
# Slicing by datetime
longer_ts[datetime(2024,5,22):
datetime(2024,8,1)
]

2024-05-22    0.567352
2024-05-23    0.495687
2024-05-24   -0.029125
2024-05-25   -1.474185
2024-05-26   -0.214265
                ...   
2024-07-28    1.261894
2024-07-29   -0.111147
2024-07-30    0.851757
2024-07-31    0.948473
2024-08-01   -0.957185
Freq: D, Length: 72, dtype: float64

In [36]:
# Slicing with a timestamp not contained in the ts
longer_ts[datetime(2016, 8, 26):
datetime(2024, 8, 26)
]

2024-05-22    0.567352
2024-05-23    0.495687
2024-05-24   -0.029125
2024-05-25   -1.474185
2024-05-26   -0.214265
                ...   
2024-08-22   -0.842586
2024-08-23    0.596504
2024-08-24    0.984001
2024-08-25   -0.161637
2024-08-26   -1.003443
Freq: D, Length: 97, dtype: float64

In [37]:
longer_ts

2024-05-22    0.567352
2024-05-23    0.495687
2024-05-24   -0.029125
2024-05-25   -1.474185
2024-05-26   -0.214265
                ...   
2029-11-07    0.497688
2029-11-08   -0.264864
2029-11-09   -0.634421
2029-11-10    0.879371
2029-11-11   -0.749791
Freq: D, Length: 2000, dtype: float64

In [38]:
# truncating after a specific date
longer_ts.truncate(after="2025-05-22")

2024-05-22    0.567352
2024-05-23    0.495687
2024-05-24   -0.029125
2024-05-25   -1.474185
2024-05-26   -0.214265
                ...   
2025-05-18   -0.085187
2025-05-19    1.067702
2025-05-20   -1.653159
2025-05-21   -0.116110
2025-05-22   -0.110104
Freq: D, Length: 366, dtype: float64

In [39]:
dates = pd.date_range("2024-01-01",
                      periods=100,
                      freq="W-WED"
                     )

In [40]:
long_df = pd.DataFrame(np.random.standard_normal((100,4)),
                       index=dates,
                       columns=["Iganga", "Jinja", "Kampala", "Busia"])

In [41]:
long_df.loc["2024"]

Unnamed: 0,Iganga,Jinja,Kampala,Busia
2024-01-03,-0.470773,-0.654808,-0.843595,0.003596
2024-01-10,0.122545,0.327547,1.804019,0.027619
2024-01-17,0.600177,-0.155775,0.772945,-2.906781
2024-01-24,-0.356224,0.184909,0.48675,-1.036119
2024-01-31,-1.552802,0.588519,1.607607,1.276485
2024-02-07,0.978148,0.210676,-0.200518,-0.127048
2024-02-14,0.625067,0.504231,-0.014309,-0.372024
2024-02-21,-0.80382,-1.089571,1.651026,0.782391
2024-02-28,2.992255,0.051279,-0.018805,0.964394
2024-03-06,0.115504,-0.623519,0.369925,-1.077769


<h4>Time Series with Duplicates</h4>

In [42]:
dates = pd.DatetimeIndex(["2000-01-01",
                          "2000-01-02",
                          "2000-01-02",
                          "2000-01-02",
                          "2000-01-03"])

In [43]:
dup_ts = pd.Series(np.arange(5),
                   index=dates)

In [44]:
dup_ts

2000-01-01    0
2000-01-02    1
2000-01-02    2
2000-01-02    3
2000-01-03    4
dtype: int32

In [45]:
dup_ts["2000-01-03"]

4

In [46]:
dup_ts["2000-01-02"]

2000-01-02    1
2000-01-02    2
2000-01-02    3
dtype: int32

In [47]:
# Aggregating
grouped = dup_ts.groupby(level=0)

In [48]:
grouped.mean()

2000-01-01    0.0
2000-01-02    2.0
2000-01-03    4.0
dtype: float64

In [49]:
grouped.count()

2000-01-01    1
2000-01-02    3
2000-01-03    1
dtype: int64

<h4>Date Ranges, Frequencies and Shifting</h4>

In [50]:
ts

2024-06-02    0.293264
2025-10-21   -0.183988
2024-04-12   -1.585951
2023-10-05   -2.143029
2016-11-03   -0.470087
2017-03-15   -0.467813
2019-06-03    0.053859
2020-05-08   -0.999767
dtype: float64

In [51]:
resampler = ts.resample("D")

In [52]:
resampler

<pandas.core.resample.DatetimeIndexResampler object at 0x000001E9150E5FA0>

<h4>Generating Date Ranges</h4>

In [53]:
index = pd.date_range("2024-05-01",
                      "2024-12-31")

In [54]:
index

DatetimeIndex(['2024-05-01', '2024-05-02', '2024-05-03', '2024-05-04',
               '2024-05-05', '2024-05-06', '2024-05-07', '2024-05-08',
               '2024-05-09', '2024-05-10',
               ...
               '2024-12-22', '2024-12-23', '2024-12-24', '2024-12-25',
               '2024-12-26', '2024-12-27', '2024-12-28', '2024-12-29',
               '2024-12-30', '2024-12-31'],
              dtype='datetime64[ns]', length=245, freq='D')

In [55]:
# using start date, end date
pd.date_range(start="2024-05-25", periods=10)

DatetimeIndex(['2024-05-25', '2024-05-26', '2024-05-27', '2024-05-28',
               '2024-05-29', '2024-05-30', '2024-05-31', '2024-06-01',
               '2024-06-02', '2024-06-03'],
              dtype='datetime64[ns]', freq='D')

In [56]:
# using end
pd.date_range(end="2024-12-31",
              periods=200,
             tz='Africa/Kampala',
             freq="MS",
              inclusive="both",
             )

DatetimeIndex(['2008-05-01 00:00:00+03:00', '2008-06-01 00:00:00+03:00',
               '2008-07-01 00:00:00+03:00', '2008-08-01 00:00:00+03:00',
               '2008-09-01 00:00:00+03:00', '2008-10-01 00:00:00+03:00',
               '2008-11-01 00:00:00+03:00', '2008-12-01 00:00:00+03:00',
               '2009-01-01 00:00:00+03:00', '2009-02-01 00:00:00+03:00',
               ...
               '2024-03-01 00:00:00+03:00', '2024-04-01 00:00:00+03:00',
               '2024-05-01 00:00:00+03:00', '2024-06-01 00:00:00+03:00',
               '2024-07-01 00:00:00+03:00', '2024-08-01 00:00:00+03:00',
               '2024-09-01 00:00:00+03:00', '2024-10-01 00:00:00+03:00',
               '2024-11-01 00:00:00+03:00', '2024-12-01 00:00:00+03:00'],
              dtype='datetime64[ns, Africa/Kampala]', length=200, freq='MS')

In [57]:
import pytz

In [58]:
# time zone samples
pytz.all_timezones[:50]

['Africa/Abidjan',
 'Africa/Accra',
 'Africa/Addis_Ababa',
 'Africa/Algiers',
 'Africa/Asmara',
 'Africa/Asmera',
 'Africa/Bamako',
 'Africa/Bangui',
 'Africa/Banjul',
 'Africa/Bissau',
 'Africa/Blantyre',
 'Africa/Brazzaville',
 'Africa/Bujumbura',
 'Africa/Cairo',
 'Africa/Casablanca',
 'Africa/Ceuta',
 'Africa/Conakry',
 'Africa/Dakar',
 'Africa/Dar_es_Salaam',
 'Africa/Djibouti',
 'Africa/Douala',
 'Africa/El_Aaiun',
 'Africa/Freetown',
 'Africa/Gaborone',
 'Africa/Harare',
 'Africa/Johannesburg',
 'Africa/Juba',
 'Africa/Kampala',
 'Africa/Khartoum',
 'Africa/Kigali',
 'Africa/Kinshasa',
 'Africa/Lagos',
 'Africa/Libreville',
 'Africa/Lome',
 'Africa/Luanda',
 'Africa/Lubumbashi',
 'Africa/Lusaka',
 'Africa/Malabo',
 'Africa/Maputo',
 'Africa/Maseru',
 'Africa/Mbabane',
 'Africa/Mogadishu',
 'Africa/Monrovia',
 'Africa/Nairobi',
 'Africa/Ndjamena',
 'Africa/Niamey',
 'Africa/Nouakchott',
 'Africa/Ouagadougou',
 'Africa/Porto-Novo',
 'Africa/Sao_Tome']

In [59]:
# using localize
pd.date_range(
    start=pd.to_datetime("1/1/2024", dayfirst=True).tz_localize("Africa/Kampala"),
    end=pd.to_datetime("31/12/2025", dayfirst=True).tz_localize("Africa/Kampala"),
    )

DatetimeIndex(['2024-01-01 00:00:00+03:00', '2024-01-02 00:00:00+03:00',
               '2024-01-03 00:00:00+03:00', '2024-01-04 00:00:00+03:00',
               '2024-01-05 00:00:00+03:00', '2024-01-06 00:00:00+03:00',
               '2024-01-07 00:00:00+03:00', '2024-01-08 00:00:00+03:00',
               '2024-01-09 00:00:00+03:00', '2024-01-10 00:00:00+03:00',
               ...
               '2025-12-22 00:00:00+03:00', '2025-12-23 00:00:00+03:00',
               '2025-12-24 00:00:00+03:00', '2025-12-25 00:00:00+03:00',
               '2025-12-26 00:00:00+03:00', '2025-12-27 00:00:00+03:00',
               '2025-12-28 00:00:00+03:00', '2025-12-29 00:00:00+03:00',
               '2025-12-30 00:00:00+03:00', '2025-12-31 00:00:00+03:00'],
              dtype='datetime64[ns, Africa/Kampala]', length=731, freq='D')

In [60]:
pd.date_range(start="1/1/2024",
              periods=5,
              freq="3ME"
             )

DatetimeIndex(['2024-01-31', '2024-04-30', '2024-07-31', '2024-10-31',
               '2025-01-31'],
              dtype='datetime64[ns]', freq='3ME')

In [61]:
# specifing unit
pd.date_range(start="2017-01-01",
              periods=10,
              freq="100YS",
              unit='s'
             )

DatetimeIndex(['2017-01-01', '2117-01-01', '2217-01-01', '2317-01-01',
               '2417-01-01', '2517-01-01', '2617-01-01', '2717-01-01',
               '2817-01-01', '2917-01-01'],
              dtype='datetime64[s]', freq='100YS-JAN')

In [62]:
# Normalizing datetime
pd.date_range("2024-05-02 12:54:21",
              periods=5,
              normalize=True
             )

DatetimeIndex(['2024-05-02', '2024-05-03', '2024-05-04', '2024-05-05',
               '2024-05-06'],
              dtype='datetime64[ns]', freq='D')

<h4>Frequencies and Date Offsets</h4>

In [63]:
pd.date_range("2025-02-01",
              "2025-05-02 23:03:02",
              freq="4h"
             )

DatetimeIndex(['2025-02-01 00:00:00', '2025-02-01 04:00:00',
               '2025-02-01 08:00:00', '2025-02-01 12:00:00',
               '2025-02-01 16:00:00', '2025-02-01 20:00:00',
               '2025-02-02 00:00:00', '2025-02-02 04:00:00',
               '2025-02-02 08:00:00', '2025-02-02 12:00:00',
               ...
               '2025-05-01 08:00:00', '2025-05-01 12:00:00',
               '2025-05-01 16:00:00', '2025-05-01 20:00:00',
               '2025-05-02 00:00:00', '2025-05-02 04:00:00',
               '2025-05-02 08:00:00', '2025-05-02 12:00:00',
               '2025-05-02 16:00:00', '2025-05-02 20:00:00'],
              dtype='datetime64[ns]', length=546, freq='4h')

In [64]:
pd.date_range(
    "2000-01-01",
    periods=10,
    freq="1h30min3s",
)

DatetimeIndex(['2000-01-01 00:00:00', '2000-01-01 01:30:03',
               '2000-01-01 03:00:06', '2000-01-01 04:30:09',
               '2000-01-01 06:00:12', '2000-01-01 07:30:15',
               '2000-01-01 09:00:18', '2000-01-01 10:30:21',
               '2000-01-01 12:00:24', '2000-01-01 13:30:27'],
              dtype='datetime64[ns]', freq='5403s')

<h5>Week of month</h5>

In [65]:
monthly_meetups = pd.date_range(
    "2024-06-01",
    "2024-12-31",
    freq="WOM-3SAT"
)

In [66]:
monthly_meetups

DatetimeIndex(['2024-06-15', '2024-07-20', '2024-08-17', '2024-09-21',
               '2024-10-19', '2024-11-16', '2024-12-21'],
              dtype='datetime64[ns]', freq='WOM-3SAT')

<h4>Shifting (Leading and Lagging) Data</h4>

In [79]:
ts = pd.Series(
    np.random.standard_normal(10),
    index=pd.date_range(start="2030-01-01",
                        periods=10,
                        freq="ME",
                       )
)

In [80]:
(-1.491246--1.609596)/(-1.609596)

-0.07352776721612128

In [71]:
ts

2030-01-31   -1.609596
2030-02-28   -1.491246
2030-03-31    0.576856
2030-04-30    0.560571
2030-05-31    1.078305
2030-06-30   -1.361033
2030-07-31    1.555660
2030-08-31   -0.730017
2030-09-30    0.553378
2030-10-31   -0.912981
Freq: ME, dtype: float64

In [72]:
ts.shift(2)

2030-01-31         NaN
2030-02-28         NaN
2030-03-31   -1.609596
2030-04-30   -1.491246
2030-05-31    0.576856
2030-06-30    0.560571
2030-07-31    1.078305
2030-08-31   -1.361033
2030-09-30    1.555660
2030-10-31   -0.730017
Freq: ME, dtype: float64

In [73]:
ts.shift(-2)

2030-01-31    0.576856
2030-02-28    0.560571
2030-03-31    1.078305
2030-04-30   -1.361033
2030-05-31    1.555660
2030-06-30   -0.730017
2030-07-31    0.553378
2030-08-31   -0.912981
2030-09-30         NaN
2030-10-31         NaN
Freq: ME, dtype: float64

In [75]:
# Percentage change
(ts-ts.shift(1))/ts

2030-01-31         NaN
2030-02-28   -0.079363
2030-03-31    3.585127
2030-04-30   -0.029050
2030-05-31    0.480137
2030-06-30    1.792270
2030-07-31    1.874891
2030-08-31    3.130991
2030-09-30    2.319200
2030-10-31    1.606123
Freq: ME, dtype: float64

In [84]:
# Shifting index two
ts.shift(2,
         freq="ME",
        )

2030-03-31   -0.875391
2030-04-30    0.942002
2030-05-31    0.386805
2030-06-30   -1.048321
2030-07-31   -0.101505
2030-08-31    0.715255
2030-09-30   -0.173435
2030-10-31    0.062862
2030-11-30   -0.971976
2030-12-31    0.594325
Freq: ME, dtype: float64

In [87]:
ts.shift(
    3,
    freq="ME",
)

2030-04-30   -0.875391
2030-05-31    0.942002
2030-06-30    0.386805
2030-07-31   -1.048321
2030-08-31   -0.101505
2030-09-30    0.715255
2030-10-31   -0.173435
2030-11-30    0.062862
2030-12-31   -0.971976
2031-01-31    0.594325
Freq: ME, dtype: float64

In [86]:
ts.shift(
    3,
    freq="D",
)

2030-02-03   -0.875391
2030-03-03    0.942002
2030-04-03    0.386805
2030-05-03   -1.048321
2030-06-03   -0.101505
2030-07-03    0.715255
2030-08-03   -0.173435
2030-09-03    0.062862
2030-10-03   -0.971976
2030-11-03    0.594325
dtype: float64

In [101]:
ts.shift(
    1,
    freq="90min",
)

2030-01-31 01:30:00   -0.875391
2030-02-28 01:30:00    0.942002
2030-03-31 01:30:00    0.386805
2030-04-30 01:30:00   -1.048321
2030-05-31 01:30:00   -0.101505
2030-06-30 01:30:00    0.715255
2030-07-31 01:30:00   -0.173435
2030-08-31 01:30:00    0.062862
2030-09-30 01:30:00   -0.971976
2030-10-31 01:30:00    0.594325
dtype: float64

In [103]:
from pandas.tseries.offsets import Day, MonthEnd

In [99]:
# Using rollback and rollforward
now = datetime(2024,5,26)

In [100]:
now

datetime.datetime(2024, 5, 26, 0, 0)

In [105]:
# Shifting to the end of month
now + MonthEnd()

Timestamp('2024-05-31 00:00:00')

In [106]:
now + MonthEnd(3)

Timestamp('2024-07-31 00:00:00')