In [1]:
import pandas as pd
import numpy as np

<h3>Date and Time Data Types and Tools</h3>

In [2]:
from datetime import datetime
from datetime import timedelta

In [3]:
now = datetime.now()

In [4]:
now

datetime.datetime(2024, 6, 4, 18, 43, 44, 761542)

In [5]:
now.year, now.month, now.day, now.hour, now.minute

(2024, 6, 4, 18, 43)

In [6]:
now.year

2024

In [7]:
# delta
delta = datetime(2024, 5, 21) - datetime(2016, 8, 26, 8, 54)
delta

datetime.timedelta(days=2824, seconds=54360)

In [8]:
# numerical evaluations
start = datetime(2024, 1, 1)
start + timedelta(3000)

datetime.datetime(2032, 3, 19, 0, 0)

<h4>Converting between String and Datetime</h4>

In [9]:
stamp = datetime(2024, 5, 22)

In [10]:
str(stamp)

'2024-05-22 00:00:00'

In [11]:
# Conversion to a specified string format
stamp.strftime("%Y-%B-%d-%u-%A-%p")

'2024-May-22-3-Wednesday-AM'

In [12]:
value = "2024-12-03"

In [13]:
datetime.strptime(value, "%Y-%m-%d")

datetime.datetime(2024, 12, 3, 0, 0)

In [14]:
# converting a list of string dates to datetime format
datestrs = ["7/6/2025", "8/6/2025"]
[datetime.strptime(x, "%m/%d/%Y") for x in datestrs]

[datetime.datetime(2025, 7, 6, 0, 0), datetime.datetime(2025, 8, 6, 0, 0)]

In [15]:
# pandas to_datetime
datestrs = ["2011-07-06 12:00:00", "2025-08-06 00:00:00"]
pd.to_datetime(datestrs)

DatetimeIndex(['2011-07-06 12:00:00', '2025-08-06 00:00:00'], dtype='datetime64[ns]', freq=None)

In [16]:
# dataframe to datetime
df = pd.DataFrame(
    {"year": [2025, 2024, 2023],
     "month": [2, 4, 8],
     "day": [5, 4, 5]
    })

In [17]:
df

Unnamed: 0,year,month,day
0,2025,2,5
1,2024,4,4
2,2023,8,5


In [18]:
pd.to_datetime(df)

0   2025-02-05
1   2024-04-04
2   2023-08-05
dtype: datetime64[ns]

In [19]:
datestr = ["2025-07-06 12:00:00",
           "2034-08-04 13:45:34"]

In [20]:
# Missing values, Not a Time values are also accepted
idx = pd.to_datetime(datestrs + [None])

In [21]:
idx

DatetimeIndex(['2011-07-06 12:00:00', '2025-08-06 00:00:00', 'NaT'], dtype='datetime64[ns]', freq=None)

In [22]:
# indexing idx
idx[2]

NaT

In [23]:
pd.isna(idx)

array([False, False,  True])

<h4>Time Series Basics</h4>

In [24]:
# time series
dates = [datetime(2024, 6, 2),
        datetime(2025, 10, 21),
        datetime(2024, 4, 12),
        datetime(2023, 10, 5),
        datetime(2016, 11, 3),
        datetime(2017, 3, 15),
        datetime(2019, 6, 3),
        datetime(2020, 5, 8),
       ]

In [25]:
ts =pd.Series(np.random.standard_normal(8),
              index=dates,
             )

In [26]:
ts

2024-06-02   -0.806007
2025-10-21   -0.859244
2024-04-12    2.063961
2023-10-05    0.475198
2016-11-03   -0.044286
2017-03-15   -1.106826
2019-06-03    1.049382
2020-05-08   -1.373954
dtype: float64

In [27]:
ts + ts[::3]

2016-11-03         NaN
2017-03-15         NaN
2019-06-03    2.098765
2020-05-08         NaN
2023-10-05    0.950396
2024-04-12         NaN
2024-06-02   -1.612015
2025-10-21         NaN
dtype: float64

In [28]:
ts[::3]

2024-06-02   -0.806007
2023-10-05    0.475198
2019-06-03    1.049382
dtype: float64

<h4>indexing, Selecting and Subsetting</h4>

In [29]:
# same as indexing a series
stamp = ts.index[2]

In [30]:
stamp

Timestamp('2024-04-12 00:00:00')

In [31]:
ts.iat[0]

-0.8060074693364736

In [32]:
# indexing by year
longer_ts = pd.Series(np.random.standard_normal(2000),
                      index=pd.date_range("2024-05-22",
                                          periods=2000))

In [33]:
longer_ts["2024"]

2024-05-22   -0.732235
2024-05-23   -0.686546
2024-05-24   -0.246911
2024-05-25   -1.587965
2024-05-26   -0.167832
                ...   
2024-12-27   -0.261326
2024-12-28    0.447958
2024-12-29    0.245835
2024-12-30   -0.673547
2024-12-31   -0.666550
Freq: D, Length: 224, dtype: float64

In [34]:
# selecting by year-month
longer_ts["2024-06"]

2024-06-01    0.140587
2024-06-02   -0.217420
2024-06-03    0.768619
2024-06-04   -0.385564
2024-06-05   -0.914844
2024-06-06   -1.273571
2024-06-07   -1.556623
2024-06-08    1.130598
2024-06-09    0.144953
2024-06-10    1.130902
2024-06-11   -2.908657
2024-06-12    0.472808
2024-06-13   -0.054097
2024-06-14    0.529448
2024-06-15   -0.152938
2024-06-16    1.993067
2024-06-17   -0.445564
2024-06-18   -1.031190
2024-06-19   -0.592980
2024-06-20   -0.643106
2024-06-21   -0.095464
2024-06-22   -0.138888
2024-06-23    1.342791
2024-06-24    0.403548
2024-06-25   -1.759578
2024-06-26   -0.086907
2024-06-27    0.178057
2024-06-28    0.148710
2024-06-29    0.677658
2024-06-30   -1.008265
Freq: D, dtype: float64

In [35]:
# Slicing by datetime
longer_ts[datetime(2024,5,22):
datetime(2024,8,1)
]

2024-05-22   -0.732235
2024-05-23   -0.686546
2024-05-24   -0.246911
2024-05-25   -1.587965
2024-05-26   -0.167832
                ...   
2024-07-28   -0.113187
2024-07-29    0.823853
2024-07-30    1.735115
2024-07-31    0.389623
2024-08-01   -0.868612
Freq: D, Length: 72, dtype: float64

In [36]:
# Slicing with a timestamp not contained in the ts
longer_ts[datetime(2016, 8, 26):
datetime(2024, 8, 26)
]

2024-05-22   -0.732235
2024-05-23   -0.686546
2024-05-24   -0.246911
2024-05-25   -1.587965
2024-05-26   -0.167832
                ...   
2024-08-22   -1.367976
2024-08-23   -1.496630
2024-08-24    1.242666
2024-08-25   -2.435061
2024-08-26   -1.491200
Freq: D, Length: 97, dtype: float64

In [37]:
longer_ts

2024-05-22   -0.732235
2024-05-23   -0.686546
2024-05-24   -0.246911
2024-05-25   -1.587965
2024-05-26   -0.167832
                ...   
2029-11-07    0.321696
2029-11-08    0.360712
2029-11-09   -0.908438
2029-11-10    1.349243
2029-11-11   -1.942261
Freq: D, Length: 2000, dtype: float64

In [38]:
# truncating after a specific date
longer_ts.truncate(after="2025-05-22")

2024-05-22   -0.732235
2024-05-23   -0.686546
2024-05-24   -0.246911
2024-05-25   -1.587965
2024-05-26   -0.167832
                ...   
2025-05-18   -0.946069
2025-05-19    0.242977
2025-05-20   -0.666771
2025-05-21    1.800121
2025-05-22    0.743009
Freq: D, Length: 366, dtype: float64

In [39]:
dates = pd.date_range("2024-01-01",
                      periods=100,
                      freq="W-WED"
                     )

In [40]:
long_df = pd.DataFrame(np.random.standard_normal((100,4)),
                       index=dates,
                       columns=["Iganga", "Jinja", "Kampala", "Busia"])

In [41]:
long_df.loc["2024"]

Unnamed: 0,Iganga,Jinja,Kampala,Busia
2024-01-03,-2.668649,-0.310719,-1.988816,0.563486
2024-01-10,-0.459162,-0.181965,0.504145,0.557405
2024-01-17,-1.303635,-0.265742,-1.647765,0.163668
2024-01-24,1.19804,-0.24304,-0.038739,1.312665
2024-01-31,3.237067,-1.018263,0.010527,-0.692321
2024-02-07,1.285843,0.084531,-0.775765,-0.090048
2024-02-14,0.701687,0.722341,-0.559868,0.234922
2024-02-21,-2.137691,-0.781529,-1.129145,0.223945
2024-02-28,-0.071003,0.390559,-1.509982,-0.504609
2024-03-06,-0.515817,0.48718,-0.209669,-0.247502


<h4>Time Series with Duplicates</h4>

In [42]:
dates = pd.DatetimeIndex(["2000-01-01",
                          "2000-01-02",
                          "2000-01-02",
                          "2000-01-02",
                          "2000-01-03"])

In [43]:
dup_ts = pd.Series(np.arange(5),
                   index=dates)

In [44]:
dup_ts

2000-01-01    0
2000-01-02    1
2000-01-02    2
2000-01-02    3
2000-01-03    4
dtype: int32

In [45]:
dup_ts["2000-01-03"]

4

In [46]:
dup_ts["2000-01-02"]

2000-01-02    1
2000-01-02    2
2000-01-02    3
dtype: int32

In [47]:
# Aggregating
grouped = dup_ts.groupby(level=0)

In [48]:
grouped.mean()

2000-01-01    0.0
2000-01-02    2.0
2000-01-03    4.0
dtype: float64

In [49]:
grouped.count()

2000-01-01    1
2000-01-02    3
2000-01-03    1
dtype: int64

<h4>Date Ranges, Frequencies and Shifting</h4>

In [50]:
ts

2024-06-02   -0.806007
2025-10-21   -0.859244
2024-04-12    2.063961
2023-10-05    0.475198
2016-11-03   -0.044286
2017-03-15   -1.106826
2019-06-03    1.049382
2020-05-08   -1.373954
dtype: float64

In [51]:
resampler = ts.resample("D")

In [52]:
resampler

<pandas.core.resample.DatetimeIndexResampler object at 0x000001A39A4590A0>

<h4>Generating Date Ranges</h4>

In [53]:
index = pd.date_range("2024-05-01",
                      "2024-12-31")

In [54]:
index

DatetimeIndex(['2024-05-01', '2024-05-02', '2024-05-03', '2024-05-04',
               '2024-05-05', '2024-05-06', '2024-05-07', '2024-05-08',
               '2024-05-09', '2024-05-10',
               ...
               '2024-12-22', '2024-12-23', '2024-12-24', '2024-12-25',
               '2024-12-26', '2024-12-27', '2024-12-28', '2024-12-29',
               '2024-12-30', '2024-12-31'],
              dtype='datetime64[ns]', length=245, freq='D')

In [55]:
# using start date, end date
pd.date_range(start="2024-05-25", periods=10)

DatetimeIndex(['2024-05-25', '2024-05-26', '2024-05-27', '2024-05-28',
               '2024-05-29', '2024-05-30', '2024-05-31', '2024-06-01',
               '2024-06-02', '2024-06-03'],
              dtype='datetime64[ns]', freq='D')

In [56]:
# using end
pd.date_range(end="2024-12-31",
              periods=200,
             tz='Africa/Kampala',
             freq="MS",
              inclusive="both",
             )

DatetimeIndex(['2008-05-01 00:00:00+03:00', '2008-06-01 00:00:00+03:00',
               '2008-07-01 00:00:00+03:00', '2008-08-01 00:00:00+03:00',
               '2008-09-01 00:00:00+03:00', '2008-10-01 00:00:00+03:00',
               '2008-11-01 00:00:00+03:00', '2008-12-01 00:00:00+03:00',
               '2009-01-01 00:00:00+03:00', '2009-02-01 00:00:00+03:00',
               ...
               '2024-03-01 00:00:00+03:00', '2024-04-01 00:00:00+03:00',
               '2024-05-01 00:00:00+03:00', '2024-06-01 00:00:00+03:00',
               '2024-07-01 00:00:00+03:00', '2024-08-01 00:00:00+03:00',
               '2024-09-01 00:00:00+03:00', '2024-10-01 00:00:00+03:00',
               '2024-11-01 00:00:00+03:00', '2024-12-01 00:00:00+03:00'],
              dtype='datetime64[ns, Africa/Kampala]', length=200, freq='MS')

In [57]:
import pytz

In [58]:
# time zone samples
pytz.all_timezones[:50]

['Africa/Abidjan',
 'Africa/Accra',
 'Africa/Addis_Ababa',
 'Africa/Algiers',
 'Africa/Asmara',
 'Africa/Asmera',
 'Africa/Bamako',
 'Africa/Bangui',
 'Africa/Banjul',
 'Africa/Bissau',
 'Africa/Blantyre',
 'Africa/Brazzaville',
 'Africa/Bujumbura',
 'Africa/Cairo',
 'Africa/Casablanca',
 'Africa/Ceuta',
 'Africa/Conakry',
 'Africa/Dakar',
 'Africa/Dar_es_Salaam',
 'Africa/Djibouti',
 'Africa/Douala',
 'Africa/El_Aaiun',
 'Africa/Freetown',
 'Africa/Gaborone',
 'Africa/Harare',
 'Africa/Johannesburg',
 'Africa/Juba',
 'Africa/Kampala',
 'Africa/Khartoum',
 'Africa/Kigali',
 'Africa/Kinshasa',
 'Africa/Lagos',
 'Africa/Libreville',
 'Africa/Lome',
 'Africa/Luanda',
 'Africa/Lubumbashi',
 'Africa/Lusaka',
 'Africa/Malabo',
 'Africa/Maputo',
 'Africa/Maseru',
 'Africa/Mbabane',
 'Africa/Mogadishu',
 'Africa/Monrovia',
 'Africa/Nairobi',
 'Africa/Ndjamena',
 'Africa/Niamey',
 'Africa/Nouakchott',
 'Africa/Ouagadougou',
 'Africa/Porto-Novo',
 'Africa/Sao_Tome']

In [59]:
# using localize
pd.date_range(
    start=pd.to_datetime("1/1/2024", dayfirst=True).tz_localize("Africa/Kampala"),
    end=pd.to_datetime("31/12/2025", dayfirst=True).tz_localize("Africa/Kampala"),
    )

DatetimeIndex(['2024-01-01 00:00:00+03:00', '2024-01-02 00:00:00+03:00',
               '2024-01-03 00:00:00+03:00', '2024-01-04 00:00:00+03:00',
               '2024-01-05 00:00:00+03:00', '2024-01-06 00:00:00+03:00',
               '2024-01-07 00:00:00+03:00', '2024-01-08 00:00:00+03:00',
               '2024-01-09 00:00:00+03:00', '2024-01-10 00:00:00+03:00',
               ...
               '2025-12-22 00:00:00+03:00', '2025-12-23 00:00:00+03:00',
               '2025-12-24 00:00:00+03:00', '2025-12-25 00:00:00+03:00',
               '2025-12-26 00:00:00+03:00', '2025-12-27 00:00:00+03:00',
               '2025-12-28 00:00:00+03:00', '2025-12-29 00:00:00+03:00',
               '2025-12-30 00:00:00+03:00', '2025-12-31 00:00:00+03:00'],
              dtype='datetime64[ns, Africa/Kampala]', length=731, freq='D')

In [60]:
pd.date_range(start="1/1/2024",
              periods=5,
              freq="3ME"
             )

DatetimeIndex(['2024-01-31', '2024-04-30', '2024-07-31', '2024-10-31',
               '2025-01-31'],
              dtype='datetime64[ns]', freq='3ME')

In [61]:
# specifing unit
pd.date_range(start="2017-01-01",
              periods=10,
              freq="100YS",
              unit='s'
             )

DatetimeIndex(['2017-01-01', '2117-01-01', '2217-01-01', '2317-01-01',
               '2417-01-01', '2517-01-01', '2617-01-01', '2717-01-01',
               '2817-01-01', '2917-01-01'],
              dtype='datetime64[s]', freq='100YS-JAN')

In [62]:
# Normalizing datetime
pd.date_range("2024-05-02 12:54:21",
              periods=5,
              normalize=True
             )

DatetimeIndex(['2024-05-02', '2024-05-03', '2024-05-04', '2024-05-05',
               '2024-05-06'],
              dtype='datetime64[ns]', freq='D')

<h4>Frequencies and Date Offsets</h4>

In [63]:
pd.date_range("2025-02-01",
              "2025-05-02 23:03:02",
              freq="4h"
             )

DatetimeIndex(['2025-02-01 00:00:00', '2025-02-01 04:00:00',
               '2025-02-01 08:00:00', '2025-02-01 12:00:00',
               '2025-02-01 16:00:00', '2025-02-01 20:00:00',
               '2025-02-02 00:00:00', '2025-02-02 04:00:00',
               '2025-02-02 08:00:00', '2025-02-02 12:00:00',
               ...
               '2025-05-01 08:00:00', '2025-05-01 12:00:00',
               '2025-05-01 16:00:00', '2025-05-01 20:00:00',
               '2025-05-02 00:00:00', '2025-05-02 04:00:00',
               '2025-05-02 08:00:00', '2025-05-02 12:00:00',
               '2025-05-02 16:00:00', '2025-05-02 20:00:00'],
              dtype='datetime64[ns]', length=546, freq='4h')

In [64]:
pd.date_range(
    "2000-01-01",
    periods=10,
    freq="1h30min3s",
)

DatetimeIndex(['2000-01-01 00:00:00', '2000-01-01 01:30:03',
               '2000-01-01 03:00:06', '2000-01-01 04:30:09',
               '2000-01-01 06:00:12', '2000-01-01 07:30:15',
               '2000-01-01 09:00:18', '2000-01-01 10:30:21',
               '2000-01-01 12:00:24', '2000-01-01 13:30:27'],
              dtype='datetime64[ns]', freq='5403s')

<h5>Week of month</h5>

In [65]:
monthly_meetups = pd.date_range(
    "2024-06-01",
    "2024-12-31",
    freq="WOM-3SAT"
)

In [66]:
monthly_meetups

DatetimeIndex(['2024-06-15', '2024-07-20', '2024-08-17', '2024-09-21',
               '2024-10-19', '2024-11-16', '2024-12-21'],
              dtype='datetime64[ns]', freq='WOM-3SAT')

<h4>Shifting (Leading and Lagging) Data</h4>

In [67]:
ts = pd.Series(
    np.random.standard_normal(10),
    index=pd.date_range(start="2030-01-01",
                        periods=10,
                        freq="ME",
                       )
)

In [68]:
(-1.491246--1.609596)/(-1.609596)

-0.07352776721612128

In [69]:
ts

2030-01-31   -0.500819
2030-02-28    0.154942
2030-03-31    1.685348
2030-04-30    0.133014
2030-05-31   -2.922808
2030-06-30   -0.005664
2030-07-31   -0.049653
2030-08-31   -2.135810
2030-09-30   -0.488019
2030-10-31   -0.458049
Freq: ME, dtype: float64

In [70]:
ts.shift(2)

2030-01-31         NaN
2030-02-28         NaN
2030-03-31   -0.500819
2030-04-30    0.154942
2030-05-31    1.685348
2030-06-30    0.133014
2030-07-31   -2.922808
2030-08-31   -0.005664
2030-09-30   -0.049653
2030-10-31   -2.135810
Freq: ME, dtype: float64

In [71]:
ts.shift(-2)

2030-01-31    1.685348
2030-02-28    0.133014
2030-03-31   -2.922808
2030-04-30   -0.005664
2030-05-31   -0.049653
2030-06-30   -2.135810
2030-07-31   -0.488019
2030-08-31   -0.458049
2030-09-30         NaN
2030-10-31         NaN
Freq: ME, dtype: float64

In [72]:
# Percentage change
(ts-ts.shift(1))/ts

2030-01-31           NaN
2030-02-28      4.232308
2030-03-31      0.908065
2030-04-30    -11.670444
2030-05-31      1.045509
2030-06-30   -515.052294
2030-07-31      0.885932
2030-08-31      0.976752
2030-09-30     -3.376489
2030-10-31     -0.065429
Freq: ME, dtype: float64

In [73]:
# Shifting index two
ts.shift(2,
         freq="ME",
        )

2030-03-31   -0.500819
2030-04-30    0.154942
2030-05-31    1.685348
2030-06-30    0.133014
2030-07-31   -2.922808
2030-08-31   -0.005664
2030-09-30   -0.049653
2030-10-31   -2.135810
2030-11-30   -0.488019
2030-12-31   -0.458049
Freq: ME, dtype: float64

In [74]:
ts.shift(
    3,
    freq="ME",
)

2030-04-30   -0.500819
2030-05-31    0.154942
2030-06-30    1.685348
2030-07-31    0.133014
2030-08-31   -2.922808
2030-09-30   -0.005664
2030-10-31   -0.049653
2030-11-30   -2.135810
2030-12-31   -0.488019
2031-01-31   -0.458049
Freq: ME, dtype: float64

In [75]:
ts.shift(
    3,
    freq="D",
)

2030-02-03   -0.500819
2030-03-03    0.154942
2030-04-03    1.685348
2030-05-03    0.133014
2030-06-03   -2.922808
2030-07-03   -0.005664
2030-08-03   -0.049653
2030-09-03   -2.135810
2030-10-03   -0.488019
2030-11-03   -0.458049
dtype: float64

In [76]:
ts.shift(
    1,
    freq="90min",
)

2030-01-31 01:30:00   -0.500819
2030-02-28 01:30:00    0.154942
2030-03-31 01:30:00    1.685348
2030-04-30 01:30:00    0.133014
2030-05-31 01:30:00   -2.922808
2030-06-30 01:30:00   -0.005664
2030-07-31 01:30:00   -0.049653
2030-08-31 01:30:00   -2.135810
2030-09-30 01:30:00   -0.488019
2030-10-31 01:30:00   -0.458049
dtype: float64

In [77]:
from pandas.tseries.offsets import Day, MonthEnd, Hour

In [78]:
# Using rollback and rollforward
now = datetime(2024,5,26)

In [79]:
now

datetime.datetime(2024, 5, 26, 0, 0)

In [80]:
# Shifting to the end of month
now + MonthEnd()

Timestamp('2024-05-31 00:00:00')

In [81]:
now + MonthEnd(3)

Timestamp('2024-07-31 00:00:00')

In [82]:
offset = MonthEnd()

In [83]:
offset.rollback(now)

Timestamp('2024-04-30 00:00:00')

In [84]:
offset.rollforward(now)

Timestamp('2024-05-31 00:00:00')

In [85]:
now

datetime.datetime(2024, 5, 26, 0, 0)

In [86]:
# using offset creatively

In [87]:
ts.groupby(MonthEnd().rollforward).mean()

2030-01-31   -0.500819
2030-02-28    0.154942
2030-03-31    1.685348
2030-04-30    0.133014
2030-05-31   -2.922808
2030-06-30   -0.005664
2030-07-31   -0.049653
2030-08-31   -2.135810
2030-09-30   -0.488019
2030-10-31   -0.458049
dtype: float64

In [88]:
# Using resample
ts.resample("ME").mean()

2030-01-31   -0.500819
2030-02-28    0.154942
2030-03-31    1.685348
2030-04-30    0.133014
2030-05-31   -2.922808
2030-06-30   -0.005664
2030-07-31   -0.049653
2030-08-31   -2.135810
2030-09-30   -0.488019
2030-10-31   -0.458049
Freq: ME, dtype: float64

<h4>Time Zone Localization and Conversion</h4>

In [89]:
pytz.common_timezones[:5]

['Africa/Abidjan',
 'Africa/Accra',
 'Africa/Addis_Ababa',
 'Africa/Algiers',
 'Africa/Asmara']

In [90]:
# Timezone objects
ugandaTime = pytz.timezone("Africa/Kampala")

In [91]:
ugandaTime

<DstTzInfo 'Africa/Kampala' LMT+2:27:00 STD>

In [92]:
dates = pd.date_range(
    "2024-05-26 19:23",
    periods=10,
    tz="Africa/Kampala",
)

In [93]:
dates

DatetimeIndex(['2024-05-26 19:23:00+03:00', '2024-05-27 19:23:00+03:00',
               '2024-05-28 19:23:00+03:00', '2024-05-29 19:23:00+03:00',
               '2024-05-30 19:23:00+03:00', '2024-05-31 19:23:00+03:00',
               '2024-06-01 19:23:00+03:00', '2024-06-02 19:23:00+03:00',
               '2024-06-03 19:23:00+03:00', '2024-06-04 19:23:00+03:00'],
              dtype='datetime64[ns, Africa/Kampala]', freq='D')

In [94]:
# time series are time zone naive 
ts = pd.Series(
    np.random.standard_normal(len(dates)),
    index=dates,
)

In [95]:
ts

2024-05-26 19:23:00+03:00   -0.213610
2024-05-27 19:23:00+03:00    0.087510
2024-05-28 19:23:00+03:00   -1.273215
2024-05-29 19:23:00+03:00    0.629271
2024-05-30 19:23:00+03:00    0.026809
2024-05-31 19:23:00+03:00   -1.437645
2024-06-01 19:23:00+03:00    1.792666
2024-06-02 19:23:00+03:00   -2.202477
2024-06-03 19:23:00+03:00   -1.250108
2024-06-04 19:23:00+03:00    0.702576
Freq: D, dtype: float64

In [96]:
# printing time zone
print(ts.index.tz)

Africa/Kampala


In [97]:
# Generating datetime ranges with a tz attribute
dates = pd.date_range(
    "2024-05-26 19:23",
    periods=10,
    tz="Africa/Kampala",
)
dates

DatetimeIndex(['2024-05-26 19:23:00+03:00', '2024-05-27 19:23:00+03:00',
               '2024-05-28 19:23:00+03:00', '2024-05-29 19:23:00+03:00',
               '2024-05-30 19:23:00+03:00', '2024-05-31 19:23:00+03:00',
               '2024-06-01 19:23:00+03:00', '2024-06-02 19:23:00+03:00',
               '2024-06-03 19:23:00+03:00', '2024-06-04 19:23:00+03:00'],
              dtype='datetime64[ns, Africa/Kampala]', freq='D')

In [98]:
# conversion
ts_utc = ts.tz_convert("utc")
ts_utc

2024-05-26 16:23:00+00:00   -0.213610
2024-05-27 16:23:00+00:00    0.087510
2024-05-28 16:23:00+00:00   -1.273215
2024-05-29 16:23:00+00:00    0.629271
2024-05-30 16:23:00+00:00    0.026809
2024-05-31 16:23:00+00:00   -1.437645
2024-06-01 16:23:00+00:00    1.792666
2024-06-02 16:23:00+00:00   -2.202477
2024-06-03 16:23:00+00:00   -1.250108
2024-06-04 16:23:00+00:00    0.702576
Freq: D, dtype: float64

In [99]:
ts_nairobi = ts_utc.tz_convert("Africa/Nairobi")
ts_nairobi

2024-05-26 19:23:00+03:00   -0.213610
2024-05-27 19:23:00+03:00    0.087510
2024-05-28 19:23:00+03:00   -1.273215
2024-05-29 19:23:00+03:00    0.629271
2024-05-30 19:23:00+03:00    0.026809
2024-05-31 19:23:00+03:00   -1.437645
2024-06-01 19:23:00+03:00    1.792666
2024-06-02 19:23:00+03:00   -2.202477
2024-06-03 19:23:00+03:00   -1.250108
2024-06-04 19:23:00+03:00    0.702576
Freq: D, dtype: float64

In [100]:
ts_cairo = ts_nairobi.tz_convert("Africa/Cairo")
ts_cairo

2024-05-26 19:23:00+03:00   -0.213610
2024-05-27 19:23:00+03:00    0.087510
2024-05-28 19:23:00+03:00   -1.273215
2024-05-29 19:23:00+03:00    0.629271
2024-05-30 19:23:00+03:00    0.026809
2024-05-31 19:23:00+03:00   -1.437645
2024-06-01 19:23:00+03:00    1.792666
2024-06-02 19:23:00+03:00   -2.202477
2024-06-03 19:23:00+03:00   -1.250108
2024-06-04 19:23:00+03:00    0.702576
Freq: D, dtype: float64

In [101]:
ts_cairo.tz_convert("Africa/Kampala")

2024-05-26 19:23:00+03:00   -0.213610
2024-05-27 19:23:00+03:00    0.087510
2024-05-28 19:23:00+03:00   -1.273215
2024-05-29 19:23:00+03:00    0.629271
2024-05-30 19:23:00+03:00    0.026809
2024-05-31 19:23:00+03:00   -1.437645
2024-06-01 19:23:00+03:00    1.792666
2024-06-02 19:23:00+03:00   -2.202477
2024-06-03 19:23:00+03:00   -1.250108
2024-06-04 19:23:00+03:00    0.702576
Freq: D, dtype: float64

<h4>Operations with Time Zone-Aware Timestamp Objects</h4>

In [102]:
stamp = pd.Timestamp("2024-05-27 17:32:04")
stamp

Timestamp('2024-05-27 17:32:04')

In [103]:
stamp_utc = stamp.tz_localize("utc")
stamp_utc

Timestamp('2024-05-27 17:32:04+0000', tz='UTC')

In [104]:
# using tz in definition
stamp_kampala = pd.Timestamp(
    "2024-05-27 17:48:49",
    tz="Africa/Kampala",
)
stamp_kampala

Timestamp('2024-05-27 17:48:49+0300', tz='Africa/Kampala')

In [105]:
# timestamps store utc value since the Unix epoch
stamp_kampala.value

1716821329000000000

In [106]:
stamp_utc.value

1716831124000000000

In [107]:
# Daylight Saving Transition
# 30 minutes before transitioning into DST
stamp = pd.Timestamp(
    "2012-03-11 01:30",
    tz="US/Eastern",
)
stamp

Timestamp('2012-03-11 01:30:00-0500', tz='US/Eastern')

In [108]:
stamp + Hour()

Timestamp('2012-03-11 03:30:00-0400', tz='US/Eastern')

In [109]:
# 90 minutes before transitioning out of DST
stamp = pd.Timestamp(
    "2012-11-04 00:30",
    tz="US/Eastern",
    )
stamp

Timestamp('2012-11-04 00:30:00-0400', tz='US/Eastern')

In [110]:
# Adding 2 hours
stamp + 2*Hour()

Timestamp('2012-11-04 01:30:00-0500', tz='US/Eastern')

<h4>Operations between different time zones</h4>

In [111]:
dates = pd.date_range(
    "2024-05-27 10:43",
    periods=20,
    freq="ME",
)
ts = pd.Series(
    np.random.standard_normal(
        len(dates)),
    index=dates,
)
ts

2024-05-31 10:43:00    0.907893
2024-06-30 10:43:00   -1.241167
2024-07-31 10:43:00    0.657569
2024-08-31 10:43:00   -0.635825
2024-09-30 10:43:00   -0.320529
2024-10-31 10:43:00   -0.045734
2024-11-30 10:43:00   -0.406441
2024-12-31 10:43:00    0.817643
2025-01-31 10:43:00    0.869735
2025-02-28 10:43:00    0.782819
2025-03-31 10:43:00   -1.230233
2025-04-30 10:43:00    1.202785
2025-05-31 10:43:00   -0.347654
2025-06-30 10:43:00    0.978996
2025-07-31 10:43:00    0.242397
2025-08-31 10:43:00    0.280563
2025-09-30 10:43:00   -0.755334
2025-10-31 10:43:00   -0.324641
2025-11-30 10:43:00    1.090427
2025-12-31 10:43:00   -0.323956
Freq: ME, dtype: float64

In [112]:
ts1 = ts[:10].tz_localize("Africa/Kampala")
ts2 = ts[4:].tz_localize("Africa/Cairo")
result = ts1 + ts2
result.index

DatetimeIndex(['2024-05-31 07:43:00+00:00', '2024-06-30 07:43:00+00:00',
               '2024-07-31 07:43:00+00:00', '2024-08-31 07:43:00+00:00',
               '2024-09-30 07:43:00+00:00', '2024-10-31 07:43:00+00:00',
               '2024-11-30 07:43:00+00:00', '2024-11-30 08:43:00+00:00',
               '2024-12-31 07:43:00+00:00', '2024-12-31 08:43:00+00:00',
               '2025-01-31 07:43:00+00:00', '2025-01-31 08:43:00+00:00',
               '2025-02-28 07:43:00+00:00', '2025-02-28 08:43:00+00:00',
               '2025-03-31 08:43:00+00:00', '2025-04-30 07:43:00+00:00',
               '2025-05-31 07:43:00+00:00', '2025-06-30 07:43:00+00:00',
               '2025-07-31 07:43:00+00:00', '2025-08-31 07:43:00+00:00',
               '2025-09-30 07:43:00+00:00', '2025-10-31 08:43:00+00:00',
               '2025-11-30 08:43:00+00:00', '2025-12-31 08:43:00+00:00'],
              dtype='datetime64[ns, UTC]', freq=None)

In [113]:
# Operations between zone-aware and zone-naive will raise an exception

<h4>Periods and Period Arithmetics</h4>

In [114]:
p = pd.Period("2025",
              freq="Y-MAY",
             )
p

Period('2025', 'Y-MAY')

In [115]:
p + 4

Period('2029', 'Y-MAY')

In [116]:
p - 14

Period('2011', 'Y-MAY')

In [117]:
p

Period('2025', 'Y-MAY')

In [118]:
pd.Period("2030",
          freq="Y-MAY",
         ) - p

<5 * YearEnds: month=5>

In [119]:
# using periods range
periods = pd.period_range(
    "2024-05-27",
    "2030-05-27",
    freq="M",
)
periods

PeriodIndex(['2024-05', '2024-06', '2024-07', '2024-08', '2024-09', '2024-10',
             '2024-11', '2024-12', '2025-01', '2025-02', '2025-03', '2025-04',
             '2025-05', '2025-06', '2025-07', '2025-08', '2025-09', '2025-10',
             '2025-11', '2025-12', '2026-01', '2026-02', '2026-03', '2026-04',
             '2026-05', '2026-06', '2026-07', '2026-08', '2026-09', '2026-10',
             '2026-11', '2026-12', '2027-01', '2027-02', '2027-03', '2027-04',
             '2027-05', '2027-06', '2027-07', '2027-08', '2027-09', '2027-10',
             '2027-11', '2027-12', '2028-01', '2028-02', '2028-03', '2028-04',
             '2028-05', '2028-06', '2028-07', '2028-08', '2028-09', '2028-10',
             '2028-11', '2028-12', '2029-01', '2029-02', '2029-03', '2029-04',
             '2029-05', '2029-06', '2029-07', '2029-08', '2029-09', '2029-10',
             '2029-11', '2029-12', '2030-01', '2030-02', '2030-03', '2030-04',
             '2030-05'],
            dtype='period[M

In [120]:
# using periods for index
pd.Series(
    np.random.standard_normal(73),
    index=periods,
)

2024-05    0.006672
2024-06    0.045700
2024-07   -1.700771
2024-08   -0.796686
2024-09    0.034307
             ...   
2030-01   -0.135592
2030-02   -0.118152
2030-03   -0.697547
2030-04   -0.728652
2030-05   -0.455565
Freq: M, Length: 73, dtype: float64

In [121]:
# using strings
values = [
    "2024Q1",
    "2025Q3",
    "2026Q1",
]
values

['2024Q1', '2025Q3', '2026Q1']

In [122]:
# 
index = pd.PeriodIndex(values,
                       freq="Q-DEC",
                      )
index

PeriodIndex(['2024Q1', '2025Q3', '2026Q1'], dtype='period[Q-DEC]')

<h4>Period Frequency Conversions</h4>

In [123]:
p = pd.Period(
    "2025",
    freq="Y-DEC",
)
p

Period('2025', 'Y-DEC')

In [124]:
# conversion
p.asfreq("M",
         how="start",
        )

Period('2025-01', 'M')

In [125]:
# conversion
p.asfreq("M",
         how="end",
        )

Period('2025-12', 'M')

In [126]:
# Fiscal year
p = pd.Period(
    "2024",
    freq="Y-JUN",
)
p

Period('2024', 'Y-JUN')

In [127]:
p.asfreq("M",
         how="start"
        )

Period('2023-07', 'M')

In [128]:
p.asfreq("M",
         how="end",
        )

Period('2024-06', 'M')

In [129]:
# period conversions
# June 2024 is the end of the period
p = pd.Period(
    "2024",
    "Y-JUN"
)
p

Period('2024', 'Y-JUN')

In [130]:
p.asfreq(
    "M",
    how="start"
)

Period('2023-07', 'M')

In [131]:
p.asfreq(
    "M",
    how="end",
)

Period('2024-06', 'M')

In [132]:
# conversion
p.asfreq(
    "Y-AUG"
)

Period('2024', 'Y-AUG')

In [133]:
# more examples
periods = pd.period_range(
    "2035", "2040",
    freq="Y-JUN"
)

In [134]:
periods

PeriodIndex(['2035', '2036', '2037', '2038', '2039', '2040'], dtype='period[Y-JUN]')

In [135]:
ts = pd.Series(np.random.standard_normal(
    len(periods)),
               index=periods,
              )
ts

2035    2.147163
2036    0.360643
2037   -0.638033
2038    0.800899
2039   -0.740795
2040    0.094128
Freq: Y-JUN, dtype: float64

In [136]:
# conversion to months frequency
ts.asfreq(
    "M",
    how="start"
)

2034-07    2.147163
2035-07    0.360643
2036-07   -0.638033
2037-07    0.800899
2038-07   -0.740795
2039-07    0.094128
Freq: M, dtype: float64

In [137]:
ts.asfreq(
    "M",
    how="end"
)

2035-06    2.147163
2036-06    0.360643
2037-06   -0.638033
2038-06    0.800899
2039-06   -0.740795
2040-06    0.094128
Freq: M, dtype: float64

In [138]:
# last business day of the month
ts.asfreq(
    freq="B",
    how="end"
)

  ts.asfreq(


2035-06-29    2.147163
2036-06-30    0.360643
2037-06-30   -0.638033
2038-06-30    0.800899
2039-06-30   -0.740795
2040-06-29    0.094128
Freq: B, dtype: float64

 <h4>Quarterly Period Frequencies</h4>

In [139]:
p = pd.Period(
    "2024Q4",
    freq="Q-JAN",
)

In [140]:
p

Period('2024Q4', 'Q-JAN')

In [141]:
p.asfreq(freq="Q-JUN")

Period('2024Q3', 'Q-JUN')

In [142]:
# converting to daily frequencies
p.asfreq("D",
         how="start",
        )

Period('2023-11-01', 'D')

In [143]:
# fiscal periods
fs = pd.period_range(
    "2023Q1",
    "2030Q4",
    freq="Q-MAY"
)
fs

PeriodIndex(['2023Q1', '2023Q2', '2023Q3', '2023Q4', '2024Q1', '2024Q2',
             '2024Q3', '2024Q4', '2025Q1', '2025Q2', '2025Q3', '2025Q4',
             '2026Q1', '2026Q2', '2026Q3', '2026Q4', '2027Q1', '2027Q2',
             '2027Q3', '2027Q4', '2028Q1', '2028Q2', '2028Q3', '2028Q4',
             '2029Q1', '2029Q2', '2029Q3', '2029Q4', '2030Q1', '2030Q2',
             '2030Q3', '2030Q4'],
            dtype='period[Q-MAY]')

In [144]:
fs.asfreq("M",
          how="start")

PeriodIndex(['2022-06', '2022-09', '2022-12', '2023-03', '2023-06', '2023-09',
             '2023-12', '2024-03', '2024-06', '2024-09', '2024-12', '2025-03',
             '2025-06', '2025-09', '2025-12', '2026-03', '2026-06', '2026-09',
             '2026-12', '2027-03', '2027-06', '2027-09', '2027-12', '2028-03',
             '2028-06', '2028-09', '2028-12', '2029-03', '2029-06', '2029-09',
             '2029-12', '2030-03'],
            dtype='period[M]')

In [145]:
fs.asfreq(
    "D",
    how="end",
)

PeriodIndex(['2022-08-31', '2022-11-30', '2023-02-28', '2023-05-31',
             '2023-08-31', '2023-11-30', '2024-02-29', '2024-05-31',
             '2024-08-31', '2024-11-30', '2025-02-28', '2025-05-31',
             '2025-08-31', '2025-11-30', '2026-02-28', '2026-05-31',
             '2026-08-31', '2026-11-30', '2027-02-28', '2027-05-31',
             '2027-08-31', '2027-11-30', '2028-02-29', '2028-05-31',
             '2028-08-31', '2028-11-30', '2029-02-28', '2029-05-31',
             '2029-08-31', '2029-11-30', '2030-02-28', '2030-05-31'],
            dtype='period[D]')

In [146]:
(fs.asfreq(
    freq="B",
    how="end",
) - 1).asfreq(
    "min",
    how="start",
) + 16 * 60

  (fs.asfreq(


PeriodIndex(['2022-08-30 16:00', '2022-11-29 16:00', '2023-02-27 16:00',
             '2023-05-30 16:00', '2023-08-30 16:00', '2023-11-29 16:00',
             '2024-02-28 16:00', '2024-05-30 16:00', '2024-08-29 16:00',
             '2024-11-28 16:00', '2025-02-27 16:00', '2025-05-29 16:00',
             '2025-08-28 16:00', '2025-11-27 16:00', '2026-02-26 16:00',
             '2026-05-28 16:00', '2026-08-28 16:00', '2026-11-27 16:00',
             '2027-02-25 16:00', '2027-05-28 16:00', '2027-08-30 16:00',
             '2027-11-29 16:00', '2028-02-28 16:00', '2028-05-30 16:00',
             '2028-08-30 16:00', '2028-11-29 16:00', '2029-02-27 16:00',
             '2029-05-30 16:00', '2029-08-30 16:00', '2029-11-29 16:00',
             '2030-02-27 16:00', '2030-05-30 16:00'],
            dtype='period[min]')

In [147]:
# More examples
periods = pd.period_range(
    "2024Q4",
    "2026Q1",
    freq="Q-JAN",
)

In [148]:
periods

PeriodIndex(['2024Q4', '2025Q1', '2025Q2', '2025Q3', '2025Q4', '2026Q1'], dtype='period[Q-JAN]')

In [149]:
ts = pd.Series(
    np.arange(len(periods)),
    index=periods)
ts

2024Q4    0
2025Q1    1
2025Q2    2
2025Q3    3
2025Q4    4
2026Q1    5
Freq: Q-JAN, dtype: int32

In [150]:
new_periods = (periods.asfreq("B", "end")-1).asfreq("h", "start") + 16
new_periods

  new_periods = (periods.asfreq("B", "end")-1).asfreq("h", "start") + 16


PeriodIndex(['2024-01-30 16:00', '2024-04-29 16:00', '2024-07-30 16:00',
             '2024-10-30 16:00', '2025-01-30 16:00', '2025-04-29 16:00'],
            dtype='period[h]')

In [151]:
ts.index = new_periods.to_timestamp()
ts

2024-01-30 16:00:00    0
2024-04-29 16:00:00    1
2024-07-30 16:00:00    2
2024-10-30 16:00:00    3
2025-01-30 16:00:00    4
2025-04-29 16:00:00    5
dtype: int32

<h4>Converting Timestamps to periods (and Back)</h4>

In [152]:
dates = pd.date_range(
    "2024-01-15",
    periods=10,
    freq="ME",
)
dates

DatetimeIndex(['2024-01-31', '2024-02-29', '2024-03-31', '2024-04-30',
               '2024-05-31', '2024-06-30', '2024-07-31', '2024-08-31',
               '2024-09-30', '2024-10-31'],
              dtype='datetime64[ns]', freq='ME')

In [153]:
ts = pd.Series(
    np.random.standard_normal(len(dates)),
    index=dates)
ts

2024-01-31    0.156796
2024-02-29   -0.261792
2024-03-31    0.461208
2024-04-30   -3.313875
2024-05-31   -0.936931
2024-06-30    0.370242
2024-07-31    0.409904
2024-08-31    0.350207
2024-09-30   -0.068557
2024-10-31   -1.845418
Freq: ME, dtype: float64

In [154]:
ts.to_period()

2024-01    0.156796
2024-02   -0.261792
2024-03    0.461208
2024-04   -3.313875
2024-05   -0.936931
2024-06    0.370242
2024-07    0.409904
2024-08    0.350207
2024-09   -0.068557
2024-10   -1.845418
Freq: M, dtype: float64

In [155]:
dates = pd.date_range(
    "2024-01-29",
    periods=10,
)
ts2 = pd.Series(
    np.random.standard_normal(len(dates)),
    index=dates)
ts2

2024-01-29    0.527806
2024-01-30   -0.675524
2024-01-31    1.059416
2024-02-01    0.236026
2024-02-02    0.916297
2024-02-03    0.352660
2024-02-04   -1.922992
2024-02-05   -1.536607
2024-02-06    0.386639
2024-02-07   -0.271558
Freq: D, dtype: float64

In [156]:
pts = ts2.to_period("M")
pts

2024-01    0.527806
2024-01   -0.675524
2024-01    1.059416
2024-02    0.236026
2024-02    0.916297
2024-02    0.352660
2024-02   -1.922992
2024-02   -1.536607
2024-02    0.386639
2024-02   -0.271558
Freq: M, dtype: float64

In [157]:
# converting back to timestamp
pts.to_timestamp(
    how="end")

2024-01-31 23:59:59.999999999    0.527806
2024-01-31 23:59:59.999999999   -0.675524
2024-01-31 23:59:59.999999999    1.059416
2024-02-29 23:59:59.999999999    0.236026
2024-02-29 23:59:59.999999999    0.916297
2024-02-29 23:59:59.999999999    0.352660
2024-02-29 23:59:59.999999999   -1.922992
2024-02-29 23:59:59.999999999   -1.536607
2024-02-29 23:59:59.999999999    0.386639
2024-02-29 23:59:59.999999999   -0.271558
dtype: float64

In [158]:
data = pd.read_csv("../Data/macrodata.csv")
data

Unnamed: 0,year,quarter,realgdp,realcons,realinv,realgovt,realdpi,cpi,m1,tbilrate,unemp,pop,infl,realint
0,1959,1,2710.349,1707.4,286.898,470.045,1886.9,28.980,139.7,2.82,5.8,177.146,0.00,0.00
1,1959,2,2778.801,1733.7,310.859,481.301,1919.7,29.150,141.7,3.08,5.1,177.830,2.34,0.74
2,1959,3,2775.488,1751.8,289.226,491.260,1916.4,29.350,140.5,3.82,5.3,178.657,2.74,1.09
3,1959,4,2785.204,1753.7,299.356,484.052,1931.3,29.370,140.0,4.33,5.6,179.386,0.27,4.06
4,1960,1,2847.699,1770.5,331.722,462.199,1955.5,29.540,139.6,3.50,5.2,180.007,2.31,1.19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
198,2008,3,13324.600,9267.7,1990.693,991.551,9838.3,216.889,1474.7,1.17,6.0,305.270,-3.16,4.33
199,2008,4,13141.920,9195.3,1857.661,1007.273,9920.4,212.174,1576.5,0.12,6.9,305.952,-8.79,8.91
200,2009,1,12925.410,9209.2,1558.494,996.287,9926.4,212.671,1592.8,0.22,8.1,306.547,0.94,-0.71
201,2009,2,12901.504,9189.0,1456.678,1023.528,10077.5,214.469,1653.6,0.18,9.2,307.226,3.37,-3.19


In [159]:
data.year

0      1959
1      1959
2      1959
3      1959
4      1960
       ... 
198    2008
199    2008
200    2009
201    2009
202    2009
Name: year, Length: 203, dtype: int64

In [160]:
# index
index = pd.PeriodIndex.from_fields(
    year=data["year"],
    quarter=data["quarter"],
    freq="Q-DEC",
)
index

PeriodIndex(['1959Q1', '1959Q2', '1959Q3', '1959Q4', '1960Q1', '1960Q2',
             '1960Q3', '1960Q4', '1961Q1', '1961Q2',
             ...
             '2007Q2', '2007Q3', '2007Q4', '2008Q1', '2008Q2', '2008Q3',
             '2008Q4', '2009Q1', '2009Q2', '2009Q3'],
            dtype='period[Q-DEC]', length=203)

In [161]:
data.index = index

In [162]:
data['cpi']

1959Q1     28.980
1959Q2     29.150
1959Q3     29.350
1959Q4     29.370
1960Q1     29.540
           ...   
2008Q3    216.889
2008Q4    212.174
2009Q1    212.671
2009Q2    214.469
2009Q3    216.385
Freq: Q-DEC, Name: cpi, Length: 203, dtype: float64

<h4>Resampling and Frequency Conversion</h4>

In [163]:
dates = pd.date_range(
    "2024-04-06",
    periods=100)
dates

DatetimeIndex(['2024-04-06', '2024-04-07', '2024-04-08', '2024-04-09',
               '2024-04-10', '2024-04-11', '2024-04-12', '2024-04-13',
               '2024-04-14', '2024-04-15', '2024-04-16', '2024-04-17',
               '2024-04-18', '2024-04-19', '2024-04-20', '2024-04-21',
               '2024-04-22', '2024-04-23', '2024-04-24', '2024-04-25',
               '2024-04-26', '2024-04-27', '2024-04-28', '2024-04-29',
               '2024-04-30', '2024-05-01', '2024-05-02', '2024-05-03',
               '2024-05-04', '2024-05-05', '2024-05-06', '2024-05-07',
               '2024-05-08', '2024-05-09', '2024-05-10', '2024-05-11',
               '2024-05-12', '2024-05-13', '2024-05-14', '2024-05-15',
               '2024-05-16', '2024-05-17', '2024-05-18', '2024-05-19',
               '2024-05-20', '2024-05-21', '2024-05-22', '2024-05-23',
               '2024-05-24', '2024-05-25', '2024-05-26', '2024-05-27',
               '2024-05-28', '2024-05-29', '2024-05-30', '2024-05-31',
      

In [164]:
ts = pd.Series(
    np.random.standard_normal(len(dates)),
    index=dates)
ts

2024-04-06    0.347004
2024-04-07    0.531732
2024-04-08    0.999678
2024-04-09    0.566659
2024-04-10    1.714136
                ...   
2024-07-10    0.586983
2024-07-11   -1.046639
2024-07-12   -1.713615
2024-07-13    0.665565
2024-07-14   -0.576224
Freq: D, Length: 100, dtype: float64

In [165]:
ts.resample(
    "ME",
).mean()

2024-04-30    0.188581
2024-05-31   -0.166284
2024-06-30    0.140989
2024-07-31   -0.414172
Freq: ME, dtype: float64

In [166]:
ts.resample("ME",
            # kind="period",
           ).mean().to_period()

2024-04    0.188581
2024-05   -0.166284
2024-06    0.140989
2024-07   -0.414172
Freq: M, dtype: float64

<h5>Downsampling</h5>

In [167]:
dates = pd.date_range(
    "2025-07-12",
    periods=20,
    freq="min",
)
dates

DatetimeIndex(['2025-07-12 00:00:00', '2025-07-12 00:01:00',
               '2025-07-12 00:02:00', '2025-07-12 00:03:00',
               '2025-07-12 00:04:00', '2025-07-12 00:05:00',
               '2025-07-12 00:06:00', '2025-07-12 00:07:00',
               '2025-07-12 00:08:00', '2025-07-12 00:09:00',
               '2025-07-12 00:10:00', '2025-07-12 00:11:00',
               '2025-07-12 00:12:00', '2025-07-12 00:13:00',
               '2025-07-12 00:14:00', '2025-07-12 00:15:00',
               '2025-07-12 00:16:00', '2025-07-12 00:17:00',
               '2025-07-12 00:18:00', '2025-07-12 00:19:00'],
              dtype='datetime64[ns]', freq='min')

In [168]:
ts = pd.Series(
    np.random.standard_normal(len(dates)),
    index=dates)
ts

2025-07-12 00:00:00    0.731696
2025-07-12 00:01:00    0.849202
2025-07-12 00:02:00    0.130072
2025-07-12 00:03:00    0.466947
2025-07-12 00:04:00   -0.870147
2025-07-12 00:05:00   -1.183696
2025-07-12 00:06:00    0.861457
2025-07-12 00:07:00    0.946027
2025-07-12 00:08:00    0.529799
2025-07-12 00:09:00    1.320814
2025-07-12 00:10:00    1.456590
2025-07-12 00:11:00    0.087091
2025-07-12 00:12:00   -1.038426
2025-07-12 00:13:00    0.532622
2025-07-12 00:14:00    0.169932
2025-07-12 00:15:00    0.367561
2025-07-12 00:16:00   -0.938292
2025-07-12 00:17:00    0.998453
2025-07-12 00:18:00    0.088648
2025-07-12 00:19:00    0.226766
Freq: min, dtype: float64

In [169]:
# resampling to five minute chuncks
ts.resample(
    "5min"
).sum()

2025-07-12 00:00:00    1.307771
2025-07-12 00:05:00    2.474401
2025-07-12 00:10:00    1.207810
2025-07-12 00:15:00    0.743135
Freq: 5min, dtype: float64

In [170]:
ts.resample(
    "5min",
    closed="right",
).sum()

2025-07-11 23:55:00    0.731696
2025-07-12 00:00:00   -0.607622
2025-07-12 00:05:00    5.114687
2025-07-12 00:10:00    0.118780
2025-07-12 00:15:00    0.375574
Freq: 5min, dtype: float64

In [171]:
# What does label mean?
ts.resample("5min",
            closed="right",
            label="left"
           ).sum()

2025-07-11 23:55:00    0.731696
2025-07-12 00:00:00   -0.607622
2025-07-12 00:05:00    5.114687
2025-07-12 00:10:00    0.118780
2025-07-12 00:15:00    0.375574
Freq: 5min, dtype: float64

In [172]:
ts.resample("5min",
            closed="left",
           ).sum()

2025-07-12 00:00:00    1.307771
2025-07-12 00:05:00    2.474401
2025-07-12 00:10:00    1.207810
2025-07-12 00:15:00    0.743135
Freq: 5min, dtype: float64

In [173]:
# offseting an interval
from pandas.tseries.frequencies import to_offset
result = ts.resample("5min",
                     closed="right",
                     label="right").sum()
result

2025-07-12 00:00:00    0.731696
2025-07-12 00:05:00   -0.607622
2025-07-12 00:10:00    5.114687
2025-07-12 00:15:00    0.118780
2025-07-12 00:20:00    0.375574
Freq: 5min, dtype: float64

In [174]:
result.index = result.index + to_offset("-1s")
result

2025-07-11 23:59:59    0.731696
2025-07-12 00:04:59   -0.607622
2025-07-12 00:09:59    5.114687
2025-07-12 00:14:59    0.118780
2025-07-12 00:19:59    0.375574
Freq: 5min, dtype: float64

In [175]:
# ohlc resampling
ts = pd.Series(
    np.random.permutation(
        np.arange(len(dates))),
    index=dates)

In [176]:
ts

2025-07-12 00:00:00    18
2025-07-12 00:01:00    10
2025-07-12 00:02:00    19
2025-07-12 00:03:00     3
2025-07-12 00:04:00    17
2025-07-12 00:05:00    16
2025-07-12 00:06:00     6
2025-07-12 00:07:00     1
2025-07-12 00:08:00     5
2025-07-12 00:09:00     9
2025-07-12 00:10:00     4
2025-07-12 00:11:00    11
2025-07-12 00:12:00     8
2025-07-12 00:13:00    12
2025-07-12 00:14:00     0
2025-07-12 00:15:00    14
2025-07-12 00:16:00     2
2025-07-12 00:17:00    13
2025-07-12 00:18:00     7
2025-07-12 00:19:00    15
Freq: min, dtype: int32

In [177]:
# ts = pd.to_datetime(ts)
# ts.index = ts.index.strftime("%Y-%m-%d")
# ts

In [178]:
ts.resample(
    "5min",
    closed="left",
    # label="left"
).ohlc()

Unnamed: 0,open,high,low,close
2025-07-12 00:00:00,18,19,3,17
2025-07-12 00:05:00,16,16,1,9
2025-07-12 00:10:00,4,12,0,0
2025-07-12 00:15:00,14,15,2,15


<h4>Upsampling and interpolation</h4>

In [179]:
frame = pd.DataFrame(
    np.random.standard_normal((10, 4)),
    index=pd.date_range("2024-05-01", periods=10, freq="W-MON"),
    columns=["Iganga", "Jinja", "Kampala", "Fort Portal"]
)
frame

Unnamed: 0,Iganga,Jinja,Kampala,Fort Portal
2024-05-06,-0.551861,-0.094808,0.546583,-0.394074
2024-05-13,0.153898,-1.657892,0.524284,0.766087
2024-05-20,-0.531409,0.559175,0.053071,-0.824523
2024-05-27,0.436767,-0.794947,-0.12302,-0.852742
2024-06-03,-0.089838,0.463463,-1.650325,0.502372
2024-06-10,-0.131268,-0.074786,0.849789,-1.147057
2024-06-17,0.546335,-0.138178,-0.944812,-1.334523
2024-06-24,0.798924,1.294064,2.013604,0.146318
2024-07-01,0.22811,0.531205,0.456977,-2.235782
2024-07-08,-1.123917,0.883566,-1.518537,0.140566


In [180]:
# upsampling
df_daily = frame.resample("D").asfreq()
df_daily

Unnamed: 0,Iganga,Jinja,Kampala,Fort Portal
2024-05-06,-0.551861,-0.094808,0.546583,-0.394074
2024-05-07,,,,
2024-05-08,,,,
2024-05-09,,,,
2024-05-10,,,,
...,...,...,...,...
2024-07-04,,,,
2024-07-05,,,,
2024-07-06,,,,
2024-07-07,,,,


In [181]:
df_daily.tail(40)

Unnamed: 0,Iganga,Jinja,Kampala,Fort Portal
2024-05-30,,,,
2024-05-31,,,,
2024-06-01,,,,
2024-06-02,,,,
2024-06-03,-0.089838,0.463463,-1.650325,0.502372
2024-06-04,,,,
2024-06-05,,,,
2024-06-06,,,,
2024-06-07,,,,
2024-06-08,,,,


In [182]:
frame.resample("D").ffill()

Unnamed: 0,Iganga,Jinja,Kampala,Fort Portal
2024-05-06,-0.551861,-0.094808,0.546583,-0.394074
2024-05-07,-0.551861,-0.094808,0.546583,-0.394074
2024-05-08,-0.551861,-0.094808,0.546583,-0.394074
2024-05-09,-0.551861,-0.094808,0.546583,-0.394074
2024-05-10,-0.551861,-0.094808,0.546583,-0.394074
...,...,...,...,...
2024-07-04,0.228110,0.531205,0.456977,-2.235782
2024-07-05,0.228110,0.531205,0.456977,-2.235782
2024-07-06,0.228110,0.531205,0.456977,-2.235782
2024-07-07,0.228110,0.531205,0.456977,-2.235782


In [183]:
frame.resample("D").ffill(limit=5).head(10)

Unnamed: 0,Iganga,Jinja,Kampala,Fort Portal
2024-05-06,-0.551861,-0.094808,0.546583,-0.394074
2024-05-07,-0.551861,-0.094808,0.546583,-0.394074
2024-05-08,-0.551861,-0.094808,0.546583,-0.394074
2024-05-09,-0.551861,-0.094808,0.546583,-0.394074
2024-05-10,-0.551861,-0.094808,0.546583,-0.394074
2024-05-11,-0.551861,-0.094808,0.546583,-0.394074
2024-05-12,,,,
2024-05-13,0.153898,-1.657892,0.524284,0.766087
2024-05-14,0.153898,-1.657892,0.524284,0.766087
2024-05-15,0.153898,-1.657892,0.524284,0.766087


In [184]:
# using a different index
frame.resample("W-THU").ffill(limit=2)

Unnamed: 0,Iganga,Jinja,Kampala,Fort Portal
2024-05-09,-0.551861,-0.094808,0.546583,-0.394074
2024-05-16,0.153898,-1.657892,0.524284,0.766087
2024-05-23,-0.531409,0.559175,0.053071,-0.824523
2024-05-30,0.436767,-0.794947,-0.12302,-0.852742
2024-06-06,-0.089838,0.463463,-1.650325,0.502372
2024-06-13,-0.131268,-0.074786,0.849789,-1.147057
2024-06-20,0.546335,-0.138178,-0.944812,-1.334523
2024-06-27,0.798924,1.294064,2.013604,0.146318
2024-07-04,0.22811,0.531205,0.456977,-2.235782
2024-07-11,-1.123917,0.883566,-1.518537,0.140566


<h4>Resampling with Periods</h4>

In [185]:
frame = pd.DataFrame(
    np.random.standard_normal((24, 4)),
    index=pd.period_range(
        "1-2024", "12-2025",
        freq="M"),
    columns=["Iganga", "Jinja", "Kampala", "Masaka"]
)
frame.head()

Unnamed: 0,Iganga,Jinja,Kampala,Masaka
2024-01,1.975323,0.673556,0.21758,0.771819
2024-02,0.78684,0.17575,-0.137525,1.024779
2024-03,-0.407422,-0.248524,-0.371156,0.775639
2024-04,-0.461714,1.078265,0.156763,1.268105
2024-05,-0.080403,-0.259791,1.453013,0.338919


In [186]:
frame = frame.to_timestamp()

In [187]:
annual_frame = frame.resample("YE-DEC").mean()
annual_frame

Unnamed: 0,Iganga,Jinja,Kampala,Masaka
2024-12-31,0.075945,-0.045606,0.172571,-0.029351
2025-12-31,-0.034672,0.028471,0.394794,-0.181423


In [188]:
# Q-DEC: quarterly, year ending in December
annual_frame.resample("QE-DEC").ffill()

Unnamed: 0,Iganga,Jinja,Kampala,Masaka
2024-12-31,0.075945,-0.045606,0.172571,-0.029351
2025-03-31,0.075945,-0.045606,0.172571,-0.029351
2025-06-30,0.075945,-0.045606,0.172571,-0.029351
2025-09-30,0.075945,-0.045606,0.172571,-0.029351
2025-12-31,-0.034672,0.028471,0.394794,-0.181423


In [195]:
# using convention
annual_frame.resample(
    "YE-DEC",
    # convention="end"
).asfreq()    

Unnamed: 0,Iganga,Jinja,Kampala,Masaka
2024-12-31,0.075945,-0.045606,0.172571,-0.029351
2025-12-31,-0.034672,0.028471,0.394794,-0.181423


In [197]:
# Downsampling and upsampling
annual_frame.resample("QE-MAR").ffill()

Unnamed: 0,Iganga,Jinja,Kampala,Masaka
2024-12-31,0.075945,-0.045606,0.172571,-0.029351
2025-03-31,0.075945,-0.045606,0.172571,-0.029351
2025-06-30,0.075945,-0.045606,0.172571,-0.029351
2025-09-30,0.075945,-0.045606,0.172571,-0.029351
2025-12-31,-0.034672,0.028471,0.394794,-0.181423


In [198]:
annual_frame

Unnamed: 0,Iganga,Jinja,Kampala,Masaka
2024-12-31,0.075945,-0.045606,0.172571,-0.029351
2025-12-31,-0.034672,0.028471,0.394794,-0.181423


<h4>Grouped Time Sampling</h4>