In [1]:
import pandas as pd
import numpy as np

<h3>Date and Time Data Types and Tools</h3>

In [2]:
from datetime import datetime
from datetime import timedelta

In [3]:
now = datetime.now()

In [4]:
now

datetime.datetime(2024, 5, 29, 21, 2, 11, 375929)

In [5]:
now.year, now.month, now.day, now.hour, now.minute

(2024, 5, 29, 21, 2)

In [6]:
now.year

2024

In [7]:
# delta
delta = datetime(2024, 5, 21) - datetime(2016, 8, 26, 8, 54)
delta

datetime.timedelta(days=2824, seconds=54360)

In [8]:
# numerical evaluations
start = datetime(2024, 1, 1)
start + timedelta(3000)

datetime.datetime(2032, 3, 19, 0, 0)

<h4>Converting between String and Datetime</h4>

In [9]:
stamp = datetime(2024, 5, 22)

In [10]:
str(stamp)

'2024-05-22 00:00:00'

In [11]:
# Conversion to a specified string format
stamp.strftime("%Y-%B-%d-%u-%A-%p")

'2024-May-22-3-Wednesday-AM'

In [12]:
value = "2024-12-03"

In [13]:
datetime.strptime(value, "%Y-%m-%d")

datetime.datetime(2024, 12, 3, 0, 0)

In [14]:
# converting a list of string dates to datetime format
datestrs = ["7/6/2025", "8/6/2025"]
[datetime.strptime(x, "%m/%d/%Y") for x in datestrs]

[datetime.datetime(2025, 7, 6, 0, 0), datetime.datetime(2025, 8, 6, 0, 0)]

In [15]:
# pandas to_datetime
datestrs = ["2011-07-06 12:00:00", "2025-08-06 00:00:00"]
pd.to_datetime(datestrs)

DatetimeIndex(['2011-07-06 12:00:00', '2025-08-06 00:00:00'], dtype='datetime64[ns]', freq=None)

In [16]:
# dataframe to datetime
df = pd.DataFrame(
    {"year": [2025, 2024, 2023],
     "month": [2, 4, 8],
     "day": [5, 4, 5]
    })

In [17]:
df

Unnamed: 0,year,month,day
0,2025,2,5
1,2024,4,4
2,2023,8,5


In [18]:
pd.to_datetime(df)

0   2025-02-05
1   2024-04-04
2   2023-08-05
dtype: datetime64[ns]

In [19]:
datestr = ["2025-07-06 12:00:00",
           "2034-08-04 13:45:34"]

In [20]:
# Missing values, Not a Time values are also accepted
idx = pd.to_datetime(datestrs + [None])

In [21]:
idx

DatetimeIndex(['2011-07-06 12:00:00', '2025-08-06 00:00:00', 'NaT'], dtype='datetime64[ns]', freq=None)

In [22]:
# indexing idx
idx[2]

NaT

In [23]:
pd.isna(idx)

array([False, False,  True])

<h4>Time Series Basics</h4>

In [24]:
# time series
dates = [datetime(2024, 6, 2),
        datetime(2025, 10, 21),
        datetime(2024, 4, 12),
        datetime(2023, 10, 5),
        datetime(2016, 11, 3),
        datetime(2017, 3, 15),
        datetime(2019, 6, 3),
        datetime(2020, 5, 8),
       ]

In [25]:
ts =pd.Series(np.random.standard_normal(8),
              index=dates,
             )

In [26]:
ts

2024-06-02    1.038238
2025-10-21   -0.629320
2024-04-12    0.367350
2023-10-05   -1.506751
2016-11-03   -1.959226
2017-03-15   -0.416454
2019-06-03   -1.012922
2020-05-08   -1.188495
dtype: float64

In [27]:
ts + ts[::3]

2016-11-03         NaN
2017-03-15         NaN
2019-06-03   -2.025844
2020-05-08         NaN
2023-10-05   -3.013501
2024-04-12         NaN
2024-06-02    2.076475
2025-10-21         NaN
dtype: float64

In [28]:
ts[::3]

2024-06-02    1.038238
2023-10-05   -1.506751
2019-06-03   -1.012922
dtype: float64

<h4>indexing, Selecting and Subsetting</h4>

In [29]:
# same as indexing a series
stamp = ts.index[2]

In [30]:
stamp

Timestamp('2024-04-12 00:00:00')

In [31]:
ts.iat[0]

1.0382375452002524

In [32]:
# indexing by year
longer_ts = pd.Series(np.random.standard_normal(2000),
                      index=pd.date_range("2024-05-22",
                                          periods=2000))

In [33]:
longer_ts["2024"]

2024-05-22   -0.685322
2024-05-23    0.532583
2024-05-24    0.206950
2024-05-25   -0.862068
2024-05-26   -0.389251
                ...   
2024-12-27    1.074015
2024-12-28    1.177022
2024-12-29    0.767054
2024-12-30    1.925113
2024-12-31    0.389089
Freq: D, Length: 224, dtype: float64

In [34]:
# selecting by year-month
longer_ts["2024-06"]

2024-06-01   -0.148617
2024-06-02   -0.169442
2024-06-03   -0.666721
2024-06-04    0.365881
2024-06-05   -0.462208
2024-06-06   -0.823170
2024-06-07    1.185474
2024-06-08   -0.255175
2024-06-09   -0.138412
2024-06-10   -1.351614
2024-06-11   -0.737369
2024-06-12    1.344282
2024-06-13    0.783791
2024-06-14    0.615366
2024-06-15    0.624517
2024-06-16   -0.388982
2024-06-17   -0.385174
2024-06-18   -1.199274
2024-06-19    0.303263
2024-06-20    0.754300
2024-06-21    0.327413
2024-06-22    0.478594
2024-06-23   -0.978744
2024-06-24    0.755765
2024-06-25    1.045328
2024-06-26    0.322764
2024-06-27    0.468615
2024-06-28    0.543476
2024-06-29   -0.293638
2024-06-30    0.336061
Freq: D, dtype: float64

In [35]:
# Slicing by datetime
longer_ts[datetime(2024,5,22):
datetime(2024,8,1)
]

2024-05-22   -0.685322
2024-05-23    0.532583
2024-05-24    0.206950
2024-05-25   -0.862068
2024-05-26   -0.389251
                ...   
2024-07-28   -1.071319
2024-07-29   -2.510470
2024-07-30    0.799755
2024-07-31   -2.427698
2024-08-01   -0.992733
Freq: D, Length: 72, dtype: float64

In [36]:
# Slicing with a timestamp not contained in the ts
longer_ts[datetime(2016, 8, 26):
datetime(2024, 8, 26)
]

2024-05-22   -0.685322
2024-05-23    0.532583
2024-05-24    0.206950
2024-05-25   -0.862068
2024-05-26   -0.389251
                ...   
2024-08-22   -1.401785
2024-08-23    0.108246
2024-08-24    0.158415
2024-08-25   -1.195792
2024-08-26    0.296490
Freq: D, Length: 97, dtype: float64

In [37]:
longer_ts

2024-05-22   -0.685322
2024-05-23    0.532583
2024-05-24    0.206950
2024-05-25   -0.862068
2024-05-26   -0.389251
                ...   
2029-11-07   -0.471484
2029-11-08    0.760423
2029-11-09    0.524505
2029-11-10    1.323066
2029-11-11   -1.734022
Freq: D, Length: 2000, dtype: float64

In [38]:
# truncating after a specific date
longer_ts.truncate(after="2025-05-22")

2024-05-22   -0.685322
2024-05-23    0.532583
2024-05-24    0.206950
2024-05-25   -0.862068
2024-05-26   -0.389251
                ...   
2025-05-18    0.065215
2025-05-19   -1.111699
2025-05-20    0.038141
2025-05-21    0.812307
2025-05-22   -0.931517
Freq: D, Length: 366, dtype: float64

In [39]:
dates = pd.date_range("2024-01-01",
                      periods=100,
                      freq="W-WED"
                     )

In [40]:
long_df = pd.DataFrame(np.random.standard_normal((100,4)),
                       index=dates,
                       columns=["Iganga", "Jinja", "Kampala", "Busia"])

In [41]:
long_df.loc["2024"]

Unnamed: 0,Iganga,Jinja,Kampala,Busia
2024-01-03,0.239535,0.489619,-0.71087,0.742152
2024-01-10,1.489818,-0.947676,-0.630048,-0.067397
2024-01-17,1.619724,1.774183,0.025465,1.445929
2024-01-24,-0.250587,-0.323952,-0.711116,-0.316298
2024-01-31,0.635494,0.796663,-1.720337,-1.230391
2024-02-07,0.861672,2.955161,0.550656,-0.00906
2024-02-14,-0.940367,0.083155,-1.017849,0.306054
2024-02-21,-0.281508,-0.435279,-0.436649,-1.405585
2024-02-28,-0.449802,-0.120121,0.6844,1.055442
2024-03-06,0.501335,-0.792473,1.647876,-1.207952


<h4>Time Series with Duplicates</h4>

In [42]:
dates = pd.DatetimeIndex(["2000-01-01",
                          "2000-01-02",
                          "2000-01-02",
                          "2000-01-02",
                          "2000-01-03"])

In [43]:
dup_ts = pd.Series(np.arange(5),
                   index=dates)

In [44]:
dup_ts

2000-01-01    0
2000-01-02    1
2000-01-02    2
2000-01-02    3
2000-01-03    4
dtype: int32

In [45]:
dup_ts["2000-01-03"]

4

In [46]:
dup_ts["2000-01-02"]

2000-01-02    1
2000-01-02    2
2000-01-02    3
dtype: int32

In [47]:
# Aggregating
grouped = dup_ts.groupby(level=0)

In [48]:
grouped.mean()

2000-01-01    0.0
2000-01-02    2.0
2000-01-03    4.0
dtype: float64

In [49]:
grouped.count()

2000-01-01    1
2000-01-02    3
2000-01-03    1
dtype: int64

<h4>Date Ranges, Frequencies and Shifting</h4>

In [50]:
ts

2024-06-02    1.038238
2025-10-21   -0.629320
2024-04-12    0.367350
2023-10-05   -1.506751
2016-11-03   -1.959226
2017-03-15   -0.416454
2019-06-03   -1.012922
2020-05-08   -1.188495
dtype: float64

In [51]:
resampler = ts.resample("D")

In [52]:
resampler

<pandas.core.resample.DatetimeIndexResampler object at 0x00000243EABFC620>

<h4>Generating Date Ranges</h4>

In [53]:
index = pd.date_range("2024-05-01",
                      "2024-12-31")

In [54]:
index

DatetimeIndex(['2024-05-01', '2024-05-02', '2024-05-03', '2024-05-04',
               '2024-05-05', '2024-05-06', '2024-05-07', '2024-05-08',
               '2024-05-09', '2024-05-10',
               ...
               '2024-12-22', '2024-12-23', '2024-12-24', '2024-12-25',
               '2024-12-26', '2024-12-27', '2024-12-28', '2024-12-29',
               '2024-12-30', '2024-12-31'],
              dtype='datetime64[ns]', length=245, freq='D')

In [55]:
# using start date, end date
pd.date_range(start="2024-05-25", periods=10)

DatetimeIndex(['2024-05-25', '2024-05-26', '2024-05-27', '2024-05-28',
               '2024-05-29', '2024-05-30', '2024-05-31', '2024-06-01',
               '2024-06-02', '2024-06-03'],
              dtype='datetime64[ns]', freq='D')

In [56]:
# using end
pd.date_range(end="2024-12-31",
              periods=200,
             tz='Africa/Kampala',
             freq="MS",
              inclusive="both",
             )

DatetimeIndex(['2008-05-01 00:00:00+03:00', '2008-06-01 00:00:00+03:00',
               '2008-07-01 00:00:00+03:00', '2008-08-01 00:00:00+03:00',
               '2008-09-01 00:00:00+03:00', '2008-10-01 00:00:00+03:00',
               '2008-11-01 00:00:00+03:00', '2008-12-01 00:00:00+03:00',
               '2009-01-01 00:00:00+03:00', '2009-02-01 00:00:00+03:00',
               ...
               '2024-03-01 00:00:00+03:00', '2024-04-01 00:00:00+03:00',
               '2024-05-01 00:00:00+03:00', '2024-06-01 00:00:00+03:00',
               '2024-07-01 00:00:00+03:00', '2024-08-01 00:00:00+03:00',
               '2024-09-01 00:00:00+03:00', '2024-10-01 00:00:00+03:00',
               '2024-11-01 00:00:00+03:00', '2024-12-01 00:00:00+03:00'],
              dtype='datetime64[ns, Africa/Kampala]', length=200, freq='MS')

In [57]:
import pytz

In [58]:
# time zone samples
pytz.all_timezones[:50]

['Africa/Abidjan',
 'Africa/Accra',
 'Africa/Addis_Ababa',
 'Africa/Algiers',
 'Africa/Asmara',
 'Africa/Asmera',
 'Africa/Bamako',
 'Africa/Bangui',
 'Africa/Banjul',
 'Africa/Bissau',
 'Africa/Blantyre',
 'Africa/Brazzaville',
 'Africa/Bujumbura',
 'Africa/Cairo',
 'Africa/Casablanca',
 'Africa/Ceuta',
 'Africa/Conakry',
 'Africa/Dakar',
 'Africa/Dar_es_Salaam',
 'Africa/Djibouti',
 'Africa/Douala',
 'Africa/El_Aaiun',
 'Africa/Freetown',
 'Africa/Gaborone',
 'Africa/Harare',
 'Africa/Johannesburg',
 'Africa/Juba',
 'Africa/Kampala',
 'Africa/Khartoum',
 'Africa/Kigali',
 'Africa/Kinshasa',
 'Africa/Lagos',
 'Africa/Libreville',
 'Africa/Lome',
 'Africa/Luanda',
 'Africa/Lubumbashi',
 'Africa/Lusaka',
 'Africa/Malabo',
 'Africa/Maputo',
 'Africa/Maseru',
 'Africa/Mbabane',
 'Africa/Mogadishu',
 'Africa/Monrovia',
 'Africa/Nairobi',
 'Africa/Ndjamena',
 'Africa/Niamey',
 'Africa/Nouakchott',
 'Africa/Ouagadougou',
 'Africa/Porto-Novo',
 'Africa/Sao_Tome']

In [59]:
# using localize
pd.date_range(
    start=pd.to_datetime("1/1/2024", dayfirst=True).tz_localize("Africa/Kampala"),
    end=pd.to_datetime("31/12/2025", dayfirst=True).tz_localize("Africa/Kampala"),
    )

DatetimeIndex(['2024-01-01 00:00:00+03:00', '2024-01-02 00:00:00+03:00',
               '2024-01-03 00:00:00+03:00', '2024-01-04 00:00:00+03:00',
               '2024-01-05 00:00:00+03:00', '2024-01-06 00:00:00+03:00',
               '2024-01-07 00:00:00+03:00', '2024-01-08 00:00:00+03:00',
               '2024-01-09 00:00:00+03:00', '2024-01-10 00:00:00+03:00',
               ...
               '2025-12-22 00:00:00+03:00', '2025-12-23 00:00:00+03:00',
               '2025-12-24 00:00:00+03:00', '2025-12-25 00:00:00+03:00',
               '2025-12-26 00:00:00+03:00', '2025-12-27 00:00:00+03:00',
               '2025-12-28 00:00:00+03:00', '2025-12-29 00:00:00+03:00',
               '2025-12-30 00:00:00+03:00', '2025-12-31 00:00:00+03:00'],
              dtype='datetime64[ns, Africa/Kampala]', length=731, freq='D')

In [60]:
pd.date_range(start="1/1/2024",
              periods=5,
              freq="3ME"
             )

DatetimeIndex(['2024-01-31', '2024-04-30', '2024-07-31', '2024-10-31',
               '2025-01-31'],
              dtype='datetime64[ns]', freq='3ME')

In [61]:
# specifing unit
pd.date_range(start="2017-01-01",
              periods=10,
              freq="100YS",
              unit='s'
             )

DatetimeIndex(['2017-01-01', '2117-01-01', '2217-01-01', '2317-01-01',
               '2417-01-01', '2517-01-01', '2617-01-01', '2717-01-01',
               '2817-01-01', '2917-01-01'],
              dtype='datetime64[s]', freq='100YS-JAN')

In [62]:
# Normalizing datetime
pd.date_range("2024-05-02 12:54:21",
              periods=5,
              normalize=True
             )

DatetimeIndex(['2024-05-02', '2024-05-03', '2024-05-04', '2024-05-05',
               '2024-05-06'],
              dtype='datetime64[ns]', freq='D')

<h4>Frequencies and Date Offsets</h4>

In [63]:
pd.date_range("2025-02-01",
              "2025-05-02 23:03:02",
              freq="4h"
             )

DatetimeIndex(['2025-02-01 00:00:00', '2025-02-01 04:00:00',
               '2025-02-01 08:00:00', '2025-02-01 12:00:00',
               '2025-02-01 16:00:00', '2025-02-01 20:00:00',
               '2025-02-02 00:00:00', '2025-02-02 04:00:00',
               '2025-02-02 08:00:00', '2025-02-02 12:00:00',
               ...
               '2025-05-01 08:00:00', '2025-05-01 12:00:00',
               '2025-05-01 16:00:00', '2025-05-01 20:00:00',
               '2025-05-02 00:00:00', '2025-05-02 04:00:00',
               '2025-05-02 08:00:00', '2025-05-02 12:00:00',
               '2025-05-02 16:00:00', '2025-05-02 20:00:00'],
              dtype='datetime64[ns]', length=546, freq='4h')

In [64]:
pd.date_range(
    "2000-01-01",
    periods=10,
    freq="1h30min3s",
)

DatetimeIndex(['2000-01-01 00:00:00', '2000-01-01 01:30:03',
               '2000-01-01 03:00:06', '2000-01-01 04:30:09',
               '2000-01-01 06:00:12', '2000-01-01 07:30:15',
               '2000-01-01 09:00:18', '2000-01-01 10:30:21',
               '2000-01-01 12:00:24', '2000-01-01 13:30:27'],
              dtype='datetime64[ns]', freq='5403s')

<h5>Week of month</h5>

In [65]:
monthly_meetups = pd.date_range(
    "2024-06-01",
    "2024-12-31",
    freq="WOM-3SAT"
)

In [66]:
monthly_meetups

DatetimeIndex(['2024-06-15', '2024-07-20', '2024-08-17', '2024-09-21',
               '2024-10-19', '2024-11-16', '2024-12-21'],
              dtype='datetime64[ns]', freq='WOM-3SAT')

<h4>Shifting (Leading and Lagging) Data</h4>

In [67]:
ts = pd.Series(
    np.random.standard_normal(10),
    index=pd.date_range(start="2030-01-01",
                        periods=10,
                        freq="ME",
                       )
)

In [68]:
(-1.491246--1.609596)/(-1.609596)

-0.07352776721612128

In [69]:
ts

2030-01-31   -0.373822
2030-02-28   -1.073364
2030-03-31   -0.619996
2030-04-30    1.144231
2030-05-31   -1.766626
2030-06-30   -0.419181
2030-07-31   -0.688219
2030-08-31    1.838161
2030-09-30    0.684634
2030-10-31    0.662180
Freq: ME, dtype: float64

In [70]:
ts.shift(2)

2030-01-31         NaN
2030-02-28         NaN
2030-03-31   -0.373822
2030-04-30   -1.073364
2030-05-31   -0.619996
2030-06-30    1.144231
2030-07-31   -1.766626
2030-08-31   -0.419181
2030-09-30   -0.688219
2030-10-31    1.838161
Freq: ME, dtype: float64

In [71]:
ts.shift(-2)

2030-01-31   -0.619996
2030-02-28    1.144231
2030-03-31   -1.766626
2030-04-30   -0.419181
2030-05-31   -0.688219
2030-06-30    1.838161
2030-07-31    0.684634
2030-08-31    0.662180
2030-09-30         NaN
2030-10-31         NaN
Freq: ME, dtype: float64

In [72]:
# Percentage change
(ts-ts.shift(1))/ts

2030-01-31         NaN
2030-02-28    0.651729
2030-03-31   -0.731245
2030-04-30    1.541845
2030-05-31    1.647693
2030-06-30   -3.214466
2030-07-31    0.390919
2030-08-31    1.374406
2030-09-30   -1.684883
2030-10-31   -0.033908
Freq: ME, dtype: float64

In [73]:
# Shifting index two
ts.shift(2,
         freq="ME",
        )

2030-03-31   -0.373822
2030-04-30   -1.073364
2030-05-31   -0.619996
2030-06-30    1.144231
2030-07-31   -1.766626
2030-08-31   -0.419181
2030-09-30   -0.688219
2030-10-31    1.838161
2030-11-30    0.684634
2030-12-31    0.662180
Freq: ME, dtype: float64

In [74]:
ts.shift(
    3,
    freq="ME",
)

2030-04-30   -0.373822
2030-05-31   -1.073364
2030-06-30   -0.619996
2030-07-31    1.144231
2030-08-31   -1.766626
2030-09-30   -0.419181
2030-10-31   -0.688219
2030-11-30    1.838161
2030-12-31    0.684634
2031-01-31    0.662180
Freq: ME, dtype: float64

In [75]:
ts.shift(
    3,
    freq="D",
)

2030-02-03   -0.373822
2030-03-03   -1.073364
2030-04-03   -0.619996
2030-05-03    1.144231
2030-06-03   -1.766626
2030-07-03   -0.419181
2030-08-03   -0.688219
2030-09-03    1.838161
2030-10-03    0.684634
2030-11-03    0.662180
dtype: float64

In [76]:
ts.shift(
    1,
    freq="90min",
)

2030-01-31 01:30:00   -0.373822
2030-02-28 01:30:00   -1.073364
2030-03-31 01:30:00   -0.619996
2030-04-30 01:30:00    1.144231
2030-05-31 01:30:00   -1.766626
2030-06-30 01:30:00   -0.419181
2030-07-31 01:30:00   -0.688219
2030-08-31 01:30:00    1.838161
2030-09-30 01:30:00    0.684634
2030-10-31 01:30:00    0.662180
dtype: float64

In [77]:
from pandas.tseries.offsets import Day, MonthEnd, Hour

In [78]:
# Using rollback and rollforward
now = datetime(2024,5,26)

In [79]:
now

datetime.datetime(2024, 5, 26, 0, 0)

In [80]:
# Shifting to the end of month
now + MonthEnd()

Timestamp('2024-05-31 00:00:00')

In [81]:
now + MonthEnd(3)

Timestamp('2024-07-31 00:00:00')

In [82]:
offset = MonthEnd()

In [83]:
offset.rollback(now)

Timestamp('2024-04-30 00:00:00')

In [84]:
offset.rollforward(now)

Timestamp('2024-05-31 00:00:00')

In [85]:
now

datetime.datetime(2024, 5, 26, 0, 0)

In [86]:
# using offset creatively

In [87]:
ts.groupby(MonthEnd().rollforward).mean()

2030-01-31   -0.373822
2030-02-28   -1.073364
2030-03-31   -0.619996
2030-04-30    1.144231
2030-05-31   -1.766626
2030-06-30   -0.419181
2030-07-31   -0.688219
2030-08-31    1.838161
2030-09-30    0.684634
2030-10-31    0.662180
dtype: float64

In [88]:
# Using resample
ts.resample("ME").mean()

2030-01-31   -0.373822
2030-02-28   -1.073364
2030-03-31   -0.619996
2030-04-30    1.144231
2030-05-31   -1.766626
2030-06-30   -0.419181
2030-07-31   -0.688219
2030-08-31    1.838161
2030-09-30    0.684634
2030-10-31    0.662180
Freq: ME, dtype: float64

<h4>Time Zone Localization and Conversion</h4>

In [89]:
pytz.common_timezones[:5]

['Africa/Abidjan',
 'Africa/Accra',
 'Africa/Addis_Ababa',
 'Africa/Algiers',
 'Africa/Asmara']

In [90]:
# Timezone objects
ugandaTime = pytz.timezone("Africa/Kampala")

In [91]:
ugandaTime

<DstTzInfo 'Africa/Kampala' LMT+2:27:00 STD>

In [92]:
dates = pd.date_range(
    "2024-05-26 19:23",
    periods=10,
    tz="Africa/Kampala",
)

In [93]:
dates

DatetimeIndex(['2024-05-26 19:23:00+03:00', '2024-05-27 19:23:00+03:00',
               '2024-05-28 19:23:00+03:00', '2024-05-29 19:23:00+03:00',
               '2024-05-30 19:23:00+03:00', '2024-05-31 19:23:00+03:00',
               '2024-06-01 19:23:00+03:00', '2024-06-02 19:23:00+03:00',
               '2024-06-03 19:23:00+03:00', '2024-06-04 19:23:00+03:00'],
              dtype='datetime64[ns, Africa/Kampala]', freq='D')

In [94]:
# time series are time zone naive 
ts = pd.Series(
    np.random.standard_normal(len(dates)),
    index=dates,
)

In [95]:
ts

2024-05-26 19:23:00+03:00   -0.228969
2024-05-27 19:23:00+03:00   -0.105087
2024-05-28 19:23:00+03:00   -1.394841
2024-05-29 19:23:00+03:00    0.560469
2024-05-30 19:23:00+03:00    0.829216
2024-05-31 19:23:00+03:00    0.471337
2024-06-01 19:23:00+03:00    0.278529
2024-06-02 19:23:00+03:00    1.382931
2024-06-03 19:23:00+03:00   -0.549667
2024-06-04 19:23:00+03:00   -1.621545
Freq: D, dtype: float64

In [96]:
# printing time zone
print(ts.index.tz)

Africa/Kampala


In [97]:
# Generating datetime ranges with a tz attribute
dates = pd.date_range(
    "2024-05-26 19:23",
    periods=10,
    tz="Africa/Kampala",
)
dates

DatetimeIndex(['2024-05-26 19:23:00+03:00', '2024-05-27 19:23:00+03:00',
               '2024-05-28 19:23:00+03:00', '2024-05-29 19:23:00+03:00',
               '2024-05-30 19:23:00+03:00', '2024-05-31 19:23:00+03:00',
               '2024-06-01 19:23:00+03:00', '2024-06-02 19:23:00+03:00',
               '2024-06-03 19:23:00+03:00', '2024-06-04 19:23:00+03:00'],
              dtype='datetime64[ns, Africa/Kampala]', freq='D')

In [98]:
# conversion
ts_utc = ts.tz_convert("utc")
ts_utc

2024-05-26 16:23:00+00:00   -0.228969
2024-05-27 16:23:00+00:00   -0.105087
2024-05-28 16:23:00+00:00   -1.394841
2024-05-29 16:23:00+00:00    0.560469
2024-05-30 16:23:00+00:00    0.829216
2024-05-31 16:23:00+00:00    0.471337
2024-06-01 16:23:00+00:00    0.278529
2024-06-02 16:23:00+00:00    1.382931
2024-06-03 16:23:00+00:00   -0.549667
2024-06-04 16:23:00+00:00   -1.621545
Freq: D, dtype: float64

In [99]:
ts_nairobi = ts_utc.tz_convert("Africa/Nairobi")
ts_nairobi

2024-05-26 19:23:00+03:00   -0.228969
2024-05-27 19:23:00+03:00   -0.105087
2024-05-28 19:23:00+03:00   -1.394841
2024-05-29 19:23:00+03:00    0.560469
2024-05-30 19:23:00+03:00    0.829216
2024-05-31 19:23:00+03:00    0.471337
2024-06-01 19:23:00+03:00    0.278529
2024-06-02 19:23:00+03:00    1.382931
2024-06-03 19:23:00+03:00   -0.549667
2024-06-04 19:23:00+03:00   -1.621545
Freq: D, dtype: float64

In [100]:
ts_cairo = ts_nairobi.tz_convert("Africa/Cairo")
ts_cairo

2024-05-26 19:23:00+03:00   -0.228969
2024-05-27 19:23:00+03:00   -0.105087
2024-05-28 19:23:00+03:00   -1.394841
2024-05-29 19:23:00+03:00    0.560469
2024-05-30 19:23:00+03:00    0.829216
2024-05-31 19:23:00+03:00    0.471337
2024-06-01 19:23:00+03:00    0.278529
2024-06-02 19:23:00+03:00    1.382931
2024-06-03 19:23:00+03:00   -0.549667
2024-06-04 19:23:00+03:00   -1.621545
Freq: D, dtype: float64

In [101]:
ts_cairo.tz_convert("Africa/Kampala")

2024-05-26 19:23:00+03:00   -0.228969
2024-05-27 19:23:00+03:00   -0.105087
2024-05-28 19:23:00+03:00   -1.394841
2024-05-29 19:23:00+03:00    0.560469
2024-05-30 19:23:00+03:00    0.829216
2024-05-31 19:23:00+03:00    0.471337
2024-06-01 19:23:00+03:00    0.278529
2024-06-02 19:23:00+03:00    1.382931
2024-06-03 19:23:00+03:00   -0.549667
2024-06-04 19:23:00+03:00   -1.621545
Freq: D, dtype: float64

<h4>Operations with Time Zone-Aware Timestamp Objects</h4>

In [102]:
stamp = pd.Timestamp("2024-05-27 17:32:04")
stamp

Timestamp('2024-05-27 17:32:04')

In [103]:
stamp_utc = stamp.tz_localize("utc")
stamp_utc

Timestamp('2024-05-27 17:32:04+0000', tz='UTC')

In [104]:
# using tz in definition
stamp_kampala = pd.Timestamp(
    "2024-05-27 17:48:49",
    tz="Africa/Kampala",
)
stamp_kampala

Timestamp('2024-05-27 17:48:49+0300', tz='Africa/Kampala')

In [105]:
# timestamps store utc value since the Unix epoch
stamp_kampala.value

1716821329000000000

In [106]:
stamp_utc.value

1716831124000000000

In [107]:
# Daylight Saving Transition
# 30 minutes before transitioning into DST
stamp = pd.Timestamp(
    "2012-03-11 01:30",
    tz="US/Eastern",
)
stamp

Timestamp('2012-03-11 01:30:00-0500', tz='US/Eastern')

In [108]:
stamp + Hour()

Timestamp('2012-03-11 03:30:00-0400', tz='US/Eastern')

In [109]:
# 90 minutes before transitioning out of DST
stamp = pd.Timestamp(
    "2012-11-04 00:30",
    tz="US/Eastern",
    )
stamp

Timestamp('2012-11-04 00:30:00-0400', tz='US/Eastern')

In [110]:
# Adding 2 hours
stamp + 2*Hour()

Timestamp('2012-11-04 01:30:00-0500', tz='US/Eastern')

<h4>Operations between different time zones</h4>

In [111]:
dates = pd.date_range(
    "2024-05-27 10:43",
    periods=20,
    freq="ME",
)
ts = pd.Series(
    np.random.standard_normal(
        len(dates)),
    index=dates,
)
ts

2024-05-31 10:43:00    0.771016
2024-06-30 10:43:00   -0.637498
2024-07-31 10:43:00   -1.459613
2024-08-31 10:43:00    0.748989
2024-09-30 10:43:00   -1.875977
2024-10-31 10:43:00   -0.458500
2024-11-30 10:43:00    0.275812
2024-12-31 10:43:00    0.739959
2025-01-31 10:43:00   -0.002749
2025-02-28 10:43:00    0.475721
2025-03-31 10:43:00   -0.969331
2025-04-30 10:43:00    0.782284
2025-05-31 10:43:00   -1.275612
2025-06-30 10:43:00    1.040039
2025-07-31 10:43:00   -1.418424
2025-08-31 10:43:00    0.519804
2025-09-30 10:43:00    0.093049
2025-10-31 10:43:00   -0.717466
2025-11-30 10:43:00   -0.110871
2025-12-31 10:43:00    0.210359
Freq: ME, dtype: float64

In [112]:
ts1 = ts[:10].tz_localize("Africa/Kampala")
ts2 = ts[4:].tz_localize("Africa/Cairo")
result = ts1 + ts2
result.index

DatetimeIndex(['2024-05-31 07:43:00+00:00', '2024-06-30 07:43:00+00:00',
               '2024-07-31 07:43:00+00:00', '2024-08-31 07:43:00+00:00',
               '2024-09-30 07:43:00+00:00', '2024-10-31 07:43:00+00:00',
               '2024-11-30 07:43:00+00:00', '2024-11-30 08:43:00+00:00',
               '2024-12-31 07:43:00+00:00', '2024-12-31 08:43:00+00:00',
               '2025-01-31 07:43:00+00:00', '2025-01-31 08:43:00+00:00',
               '2025-02-28 07:43:00+00:00', '2025-02-28 08:43:00+00:00',
               '2025-03-31 08:43:00+00:00', '2025-04-30 07:43:00+00:00',
               '2025-05-31 07:43:00+00:00', '2025-06-30 07:43:00+00:00',
               '2025-07-31 07:43:00+00:00', '2025-08-31 07:43:00+00:00',
               '2025-09-30 07:43:00+00:00', '2025-10-31 08:43:00+00:00',
               '2025-11-30 08:43:00+00:00', '2025-12-31 08:43:00+00:00'],
              dtype='datetime64[ns, UTC]', freq=None)

In [113]:
# Operations between zone-aware and zone-naive will raise an exception

<h4>Periods and Period Arithmetics</h4>

In [114]:
p = pd.Period("2025",
              freq="Y-MAY",
             )
p

Period('2025', 'Y-MAY')

In [115]:
p + 4

Period('2029', 'Y-MAY')

In [116]:
p - 14

Period('2011', 'Y-MAY')

In [117]:
p

Period('2025', 'Y-MAY')

In [118]:
pd.Period("2030",
          freq="Y-MAY",
         ) - p

<5 * YearEnds: month=5>

In [119]:
# using periods range
periods = pd.period_range(
    "2024-05-27",
    "2030-05-27",
    freq="M",
)
periods

PeriodIndex(['2024-05', '2024-06', '2024-07', '2024-08', '2024-09', '2024-10',
             '2024-11', '2024-12', '2025-01', '2025-02', '2025-03', '2025-04',
             '2025-05', '2025-06', '2025-07', '2025-08', '2025-09', '2025-10',
             '2025-11', '2025-12', '2026-01', '2026-02', '2026-03', '2026-04',
             '2026-05', '2026-06', '2026-07', '2026-08', '2026-09', '2026-10',
             '2026-11', '2026-12', '2027-01', '2027-02', '2027-03', '2027-04',
             '2027-05', '2027-06', '2027-07', '2027-08', '2027-09', '2027-10',
             '2027-11', '2027-12', '2028-01', '2028-02', '2028-03', '2028-04',
             '2028-05', '2028-06', '2028-07', '2028-08', '2028-09', '2028-10',
             '2028-11', '2028-12', '2029-01', '2029-02', '2029-03', '2029-04',
             '2029-05', '2029-06', '2029-07', '2029-08', '2029-09', '2029-10',
             '2029-11', '2029-12', '2030-01', '2030-02', '2030-03', '2030-04',
             '2030-05'],
            dtype='period[M

In [120]:
# using periods for index
pd.Series(
    np.random.standard_normal(73),
    index=periods,
)

2024-05    1.485318
2024-06   -1.614323
2024-07    1.234443
2024-08   -0.344677
2024-09    0.691319
             ...   
2030-01   -0.595056
2030-02    0.896323
2030-03   -0.915311
2030-04    1.549743
2030-05    0.610539
Freq: M, Length: 73, dtype: float64

In [121]:
# using strings
values = [
    "2024Q1",
    "2025Q3",
    "2026Q1",
]
values

['2024Q1', '2025Q3', '2026Q1']

In [122]:
# 
index = pd.PeriodIndex(values,
                       freq="Q-DEC",
                      )
index

PeriodIndex(['2024Q1', '2025Q3', '2026Q1'], dtype='period[Q-DEC]')

<h4>Period Frequency Conversions</h4>

In [123]:
p = pd.Period(
    "2025",
    freq="Y-DEC",
)
p

Period('2025', 'Y-DEC')

In [124]:
# conversion
p.asfreq("M",
         how="start",
        )

Period('2025-01', 'M')

In [125]:
# conversion
p.asfreq("M",
         how="end",
        )

Period('2025-12', 'M')

In [126]:
# Fiscal year
p = pd.Period(
    "2024",
    freq="Y-JUN",
)
p

Period('2024', 'Y-JUN')

In [127]:
p.asfreq("M",
         how="start"
        )

Period('2023-07', 'M')

In [129]:
p.asfreq("M",
         how="end",
        )

Period('2024-06', 'M')

In [133]:
# period conversions
# June 2024 is the end of the period
p = pd.Period(
    "2024",
    "Y-JUN"
)
p

Period('2024', 'Y-JUN')

In [131]:
p.asfreq(
    "M",
    how="start"
)

Period('2023-07', 'M')

In [132]:
p.asfreq(
    "M",
    how="end",
)

Period('2024-06', 'M')

In [136]:
# conversion
p.asfreq(
    "Y-AUG"
)

Period('2024', 'Y-AUG')

In [138]:
# more examples
periods = pd.period_range(
    "2035", "2040",
    freq="Y-JUN"
)

In [139]:
periods

PeriodIndex(['2035', '2036', '2037', '2038', '2039', '2040'], dtype='period[Y-JUN]')

In [140]:
ts = pd.Series(np.random.standard_normal(
    len(periods)),
               index=periods,
              )
ts

2035    0.179127
2036   -0.506356
2037   -0.157151
2038   -0.209745
2039    0.922853
2040   -0.728288
Freq: Y-JUN, dtype: float64

In [141]:
# conversion to months frequency
ts.asfreq(
    "M",
    how="start"
)

2034-07    0.179127
2035-07   -0.506356
2036-07   -0.157151
2037-07   -0.209745
2038-07    0.922853
2039-07   -0.728288
Freq: M, dtype: float64

In [142]:
ts.asfreq(
    "M",
    how="end"
)

2035-06    0.179127
2036-06   -0.506356
2037-06   -0.157151
2038-06   -0.209745
2039-06    0.922853
2040-06   -0.728288
Freq: M, dtype: float64

In [149]:
# last business day of the month
ts.asfreq(
    freq="B",
    how="end"
)

  ts.asfreq(


2035-06-29    0.179127
2036-06-30   -0.506356
2037-06-30   -0.157151
2038-06-30   -0.209745
2039-06-30    0.922853
2040-06-29   -0.728288
Freq: B, dtype: float64

 <h4>Quarterly Period Frequencies</h4>

In [151]:
p = pd.Period(
    "2024Q4",
    freq="Q-JAN",
)

In [152]:
p

Period('2024Q4', 'Q-JAN')

In [154]:
p.asfreq(freq="Q-JUN")

Period('2024Q3', 'Q-JUN')

In [161]:
# converting to daily frequencies
p.asfreq("D",
         how="start",
        )

Period('2023-11-01', 'D')

In [170]:
# fiscal periods
fs = pd.period_range(
    "2023Q1",
    "2030Q4",
    freq="Q-MAY"
)
fs

PeriodIndex(['2023Q1', '2023Q2', '2023Q3', '2023Q4', '2024Q1', '2024Q2',
             '2024Q3', '2024Q4', '2025Q1', '2025Q2', '2025Q3', '2025Q4',
             '2026Q1', '2026Q2', '2026Q3', '2026Q4', '2027Q1', '2027Q2',
             '2027Q3', '2027Q4', '2028Q1', '2028Q2', '2028Q3', '2028Q4',
             '2029Q1', '2029Q2', '2029Q3', '2029Q4', '2030Q1', '2030Q2',
             '2030Q3', '2030Q4'],
            dtype='period[Q-MAY]')

In [169]:
fs.asfreq("M",
          how="start")

PeriodIndex(['2023-06', '2023-09', '2023-12', '2024-03', '2024-06', '2024-09',
             '2024-12', '2025-03', '2025-06', '2025-09', '2025-12', '2026-03',
             '2026-06', '2026-09', '2026-12', '2027-03', '2027-06', '2027-09',
             '2027-12', '2028-03', '2028-06', '2028-09', '2028-12', '2029-03',
             '2029-06', '2029-09', '2029-12', '2030-03'],
            dtype='period[M]')

In [171]:
fs.asfreq(
    "D",
    how="end",
)

PeriodIndex(['2022-08-31', '2022-11-30', '2023-02-28', '2023-05-31',
             '2023-08-31', '2023-11-30', '2024-02-29', '2024-05-31',
             '2024-08-31', '2024-11-30', '2025-02-28', '2025-05-31',
             '2025-08-31', '2025-11-30', '2026-02-28', '2026-05-31',
             '2026-08-31', '2026-11-30', '2027-02-28', '2027-05-31',
             '2027-08-31', '2027-11-30', '2028-02-29', '2028-05-31',
             '2028-08-31', '2028-11-30', '2029-02-28', '2029-05-31',
             '2029-08-31', '2029-11-30', '2030-02-28', '2030-05-31'],
            dtype='period[D]')

In [193]:
(fs.asfreq(
    freq="B",
    how="end",
) - 1).asfreq(
    "min",
    how="start",
) + 16 * 60

  (fs.asfreq(


PeriodIndex(['2022-08-30 16:00', '2022-11-29 16:00', '2023-02-27 16:00',
             '2023-05-30 16:00', '2023-08-30 16:00', '2023-11-29 16:00',
             '2024-02-28 16:00', '2024-05-30 16:00', '2024-08-29 16:00',
             '2024-11-28 16:00', '2025-02-27 16:00', '2025-05-29 16:00',
             '2025-08-28 16:00', '2025-11-27 16:00', '2026-02-26 16:00',
             '2026-05-28 16:00', '2026-08-28 16:00', '2026-11-27 16:00',
             '2027-02-25 16:00', '2027-05-28 16:00', '2027-08-30 16:00',
             '2027-11-29 16:00', '2028-02-28 16:00', '2028-05-30 16:00',
             '2028-08-30 16:00', '2028-11-29 16:00', '2029-02-27 16:00',
             '2029-05-30 16:00', '2029-08-30 16:00', '2029-11-29 16:00',
             '2030-02-27 16:00', '2030-05-30 16:00'],
            dtype='period[min]')

In [186]:
# More examples
periods = pd.period_range(
    "2024Q4",
    "2026Q1",
    freq="Q-JAN",
)

In [187]:
periods

PeriodIndex(['2024Q4', '2025Q1', '2025Q2', '2025Q3', '2025Q4', '2026Q1'], dtype='period[Q-JAN]')

In [188]:
ts = pd.Series(
    np.arange(len(periods)),
    index=periods)
ts

2024Q4    0
2025Q1    1
2025Q2    2
2025Q3    3
2025Q4    4
2026Q1    5
Freq: Q-JAN, dtype: int32

In [190]:
new_periods = (periods.asfreq("B", "end")-1).asfreq("h", "start") + 16
new_periods

  new_periods = (periods.asfreq("B", "end")-1).asfreq("h", "start") + 16


PeriodIndex(['2024-01-30 16:00', '2024-04-29 16:00', '2024-07-30 16:00',
             '2024-10-30 16:00', '2025-01-30 16:00', '2025-04-29 16:00'],
            dtype='period[h]')

In [195]:
ts.index = new_periods.to_timestamp()
ts

2024-01-30 16:00:00    0
2024-04-29 16:00:00    1
2024-07-30 16:00:00    2
2024-10-30 16:00:00    3
2025-01-30 16:00:00    4
2025-04-29 16:00:00    5
dtype: int32

<h4>Converting Timestamps to periods (and Back)</h4>

In [201]:
dates = pd.date_range(
    "2024-01-15",
    periods=10,
    freq="ME",
)
dates

DatetimeIndex(['2024-01-31', '2024-02-29', '2024-03-31', '2024-04-30',
               '2024-05-31', '2024-06-30', '2024-07-31', '2024-08-31',
               '2024-09-30', '2024-10-31'],
              dtype='datetime64[ns]', freq='ME')

In [200]:
ts = pd.Series(
    np.random.standard_normal(len(dates)),
    index=dates)
ts

2024-01-31    0.010285
2024-02-29   -1.110337
2024-03-31    0.163813
2024-04-30    1.082076
2024-05-31   -1.262454
2024-06-30    0.544247
2024-07-31   -1.303763
2024-08-31   -0.241825
2024-09-30   -0.551715
2024-10-31    0.864302
Freq: ME, dtype: float64

In [202]:
ts.to_period()

2024-01    0.010285
2024-02   -1.110337
2024-03    0.163813
2024-04    1.082076
2024-05   -1.262454
2024-06    0.544247
2024-07   -1.303763
2024-08   -0.241825
2024-09   -0.551715
2024-10    0.864302
Freq: M, dtype: float64

In [204]:
dates = pd.date_range(
    "2024-01-29",
    periods=10,
)
ts2 = pd.Series(
    np.random.standard_normal(len(dates)),
    index=dates)
ts2

2024-01-29    1.002581
2024-01-30    0.577513
2024-01-31   -1.022327
2024-02-01   -1.530004
2024-02-02   -0.306660
2024-02-03    0.533775
2024-02-04   -0.675688
2024-02-05   -0.132129
2024-02-06    0.846808
2024-02-07   -0.099524
Freq: D, dtype: float64

In [207]:
pts = ts2.to_period("M")
pts

2024-01    1.002581
2024-01    0.577513
2024-01   -1.022327
2024-02   -1.530004
2024-02   -0.306660
2024-02    0.533775
2024-02   -0.675688
2024-02   -0.132129
2024-02    0.846808
2024-02   -0.099524
Freq: M, dtype: float64

In [209]:
# converting back to timestamp
pts.to_timestamp(
    how="end")

2024-01-31 23:59:59.999999999    1.002581
2024-01-31 23:59:59.999999999    0.577513
2024-01-31 23:59:59.999999999   -1.022327
2024-02-29 23:59:59.999999999   -1.530004
2024-02-29 23:59:59.999999999   -0.306660
2024-02-29 23:59:59.999999999    0.533775
2024-02-29 23:59:59.999999999   -0.675688
2024-02-29 23:59:59.999999999   -0.132129
2024-02-29 23:59:59.999999999    0.846808
2024-02-29 23:59:59.999999999   -0.099524
dtype: float64

In [None]:
data = pd.read_cs