In [1]:
import pandas as pd
import numpy as np

<h3>Date and Time Data Types and Tools</h3>

In [2]:
from datetime import datetime
from datetime import timedelta

In [3]:
now = datetime.now()

In [4]:
now

datetime.datetime(2024, 5, 27, 18, 12, 41, 505963)

In [5]:
now.year, now.month, now.day, now.hour, now.minute

(2024, 5, 27, 18, 12)

In [6]:
now.year

2024

In [7]:
# delta
delta = datetime(2024, 5, 21) - datetime(2016, 8, 26, 8, 54)
delta

datetime.timedelta(days=2824, seconds=54360)

In [8]:
# numerical evaluations
start = datetime(2024, 1, 1)
start + timedelta(3000)

datetime.datetime(2032, 3, 19, 0, 0)

<h4>Converting between String and Datetime</h4>

In [9]:
stamp = datetime(2024, 5, 22)

In [10]:
str(stamp)

'2024-05-22 00:00:00'

In [11]:
# Conversion to a specified string format
stamp.strftime("%Y-%B-%d-%u-%A-%p")

'2024-May-22-3-Wednesday-AM'

In [12]:
value = "2024-12-03"

In [13]:
datetime.strptime(value, "%Y-%m-%d")

datetime.datetime(2024, 12, 3, 0, 0)

In [14]:
# converting a list of string dates to datetime format
datestrs = ["7/6/2025", "8/6/2025"]
[datetime.strptime(x, "%m/%d/%Y") for x in datestrs]

[datetime.datetime(2025, 7, 6, 0, 0), datetime.datetime(2025, 8, 6, 0, 0)]

In [15]:
# pandas to_datetime
datestrs = ["2011-07-06 12:00:00", "2025-08-06 00:00:00"]
pd.to_datetime(datestrs)

DatetimeIndex(['2011-07-06 12:00:00', '2025-08-06 00:00:00'], dtype='datetime64[ns]', freq=None)

In [16]:
# dataframe to datetime
df = pd.DataFrame(
    {"year": [2025, 2024, 2023],
     "month": [2, 4, 8],
     "day": [5, 4, 5]
    })

In [17]:
df

Unnamed: 0,year,month,day
0,2025,2,5
1,2024,4,4
2,2023,8,5


In [18]:
pd.to_datetime(df)

0   2025-02-05
1   2024-04-04
2   2023-08-05
dtype: datetime64[ns]

In [19]:
datestr = ["2025-07-06 12:00:00",
           "2034-08-04 13:45:34"]

In [20]:
# Missing values, Not a Time values are also accepted
idx = pd.to_datetime(datestrs + [None])

In [21]:
idx

DatetimeIndex(['2011-07-06 12:00:00', '2025-08-06 00:00:00', 'NaT'], dtype='datetime64[ns]', freq=None)

In [22]:
# indexing idx
idx[2]

NaT

In [23]:
pd.isna(idx)

array([False, False,  True])

<h4>Time Series Basics</h4>

In [24]:
# time series
dates = [datetime(2024, 6, 2),
        datetime(2025, 10, 21),
        datetime(2024, 4, 12),
        datetime(2023, 10, 5),
        datetime(2016, 11, 3),
        datetime(2017, 3, 15),
        datetime(2019, 6, 3),
        datetime(2020, 5, 8),
       ]

In [25]:
ts =pd.Series(np.random.standard_normal(8),
              index=dates,
             )

In [26]:
ts

2024-06-02   -0.055120
2025-10-21    0.374353
2024-04-12    0.851770
2023-10-05   -0.076624
2016-11-03   -0.614916
2017-03-15    0.622371
2019-06-03    0.641033
2020-05-08   -0.812665
dtype: float64

In [27]:
ts + ts[::3]

2016-11-03         NaN
2017-03-15         NaN
2019-06-03    1.282066
2020-05-08         NaN
2023-10-05   -0.153248
2024-04-12         NaN
2024-06-02   -0.110240
2025-10-21         NaN
dtype: float64

In [28]:
ts[::3]

2024-06-02   -0.055120
2023-10-05   -0.076624
2019-06-03    0.641033
dtype: float64

<h4>indexing, Selecting and Subsetting</h4>

In [29]:
# same as indexing a series
stamp = ts.index[2]

In [30]:
stamp

Timestamp('2024-04-12 00:00:00')

In [31]:
ts.iat[0]

-0.055120164228174603

In [32]:
# indexing by year
longer_ts = pd.Series(np.random.standard_normal(2000),
                      index=pd.date_range("2024-05-22",
                                          periods=2000))

In [33]:
longer_ts["2024"]

2024-05-22    0.637904
2024-05-23    0.825819
2024-05-24   -0.509952
2024-05-25    1.448112
2024-05-26   -0.166532
                ...   
2024-12-27    0.144548
2024-12-28   -1.419106
2024-12-29   -0.035873
2024-12-30   -1.686581
2024-12-31    1.309312
Freq: D, Length: 224, dtype: float64

In [34]:
# selecting by year-month
longer_ts["2024-06"]

2024-06-01   -2.031233
2024-06-02   -1.988803
2024-06-03    1.240905
2024-06-04    0.428916
2024-06-05    1.614945
2024-06-06   -0.457510
2024-06-07    0.750308
2024-06-08    3.133969
2024-06-09   -0.414553
2024-06-10   -1.587691
2024-06-11   -1.103833
2024-06-12   -0.506455
2024-06-13   -1.447872
2024-06-14    0.342051
2024-06-15   -0.376296
2024-06-16    0.802557
2024-06-17    1.399019
2024-06-18   -0.429715
2024-06-19    0.311208
2024-06-20    0.732564
2024-06-21   -0.573867
2024-06-22    0.911472
2024-06-23    1.138425
2024-06-24   -1.368320
2024-06-25    0.198475
2024-06-26   -0.051755
2024-06-27   -0.105005
2024-06-28   -1.407910
2024-06-29    0.426609
2024-06-30   -0.092036
Freq: D, dtype: float64

In [35]:
# Slicing by datetime
longer_ts[datetime(2024,5,22):
datetime(2024,8,1)
]

2024-05-22    0.637904
2024-05-23    0.825819
2024-05-24   -0.509952
2024-05-25    1.448112
2024-05-26   -0.166532
                ...   
2024-07-28   -0.674485
2024-07-29    0.160470
2024-07-30   -1.505582
2024-07-31    0.402367
2024-08-01    0.738296
Freq: D, Length: 72, dtype: float64

In [36]:
# Slicing with a timestamp not contained in the ts
longer_ts[datetime(2016, 8, 26):
datetime(2024, 8, 26)
]

2024-05-22    0.637904
2024-05-23    0.825819
2024-05-24   -0.509952
2024-05-25    1.448112
2024-05-26   -0.166532
                ...   
2024-08-22    0.814294
2024-08-23   -0.141054
2024-08-24   -1.303573
2024-08-25   -1.695053
2024-08-26    0.972840
Freq: D, Length: 97, dtype: float64

In [37]:
longer_ts

2024-05-22    0.637904
2024-05-23    0.825819
2024-05-24   -0.509952
2024-05-25    1.448112
2024-05-26   -0.166532
                ...   
2029-11-07    2.327455
2029-11-08   -2.919006
2029-11-09    0.874210
2029-11-10    1.208639
2029-11-11   -0.167343
Freq: D, Length: 2000, dtype: float64

In [38]:
# truncating after a specific date
longer_ts.truncate(after="2025-05-22")

2024-05-22    0.637904
2024-05-23    0.825819
2024-05-24   -0.509952
2024-05-25    1.448112
2024-05-26   -0.166532
                ...   
2025-05-18    0.358396
2025-05-19    0.409047
2025-05-20   -0.264889
2025-05-21   -0.279046
2025-05-22    1.778511
Freq: D, Length: 366, dtype: float64

In [39]:
dates = pd.date_range("2024-01-01",
                      periods=100,
                      freq="W-WED"
                     )

In [40]:
long_df = pd.DataFrame(np.random.standard_normal((100,4)),
                       index=dates,
                       columns=["Iganga", "Jinja", "Kampala", "Busia"])

In [41]:
long_df.loc["2024"]

Unnamed: 0,Iganga,Jinja,Kampala,Busia
2024-01-03,0.645867,0.315554,-0.960299,1.22973
2024-01-10,0.539391,-1.562555,0.003057,0.236926
2024-01-17,2.327321,0.322698,-0.979169,-0.256654
2024-01-24,-1.092041,-0.257613,0.048517,0.703745
2024-01-31,-1.417302,0.823545,-0.645806,-2.290968
2024-02-07,0.546879,0.514685,-1.599138,0.962765
2024-02-14,-2.435092,0.793312,-1.523017,0.65209
2024-02-21,-0.584838,0.868044,0.069981,1.310033
2024-02-28,0.247283,-0.411813,0.77481,0.349948
2024-03-06,-0.758793,-0.77229,-0.189217,0.422135


<h4>Time Series with Duplicates</h4>

In [42]:
dates = pd.DatetimeIndex(["2000-01-01",
                          "2000-01-02",
                          "2000-01-02",
                          "2000-01-02",
                          "2000-01-03"])

In [43]:
dup_ts = pd.Series(np.arange(5),
                   index=dates)

In [44]:
dup_ts

2000-01-01    0
2000-01-02    1
2000-01-02    2
2000-01-02    3
2000-01-03    4
dtype: int32

In [45]:
dup_ts["2000-01-03"]

4

In [46]:
dup_ts["2000-01-02"]

2000-01-02    1
2000-01-02    2
2000-01-02    3
dtype: int32

In [47]:
# Aggregating
grouped = dup_ts.groupby(level=0)

In [48]:
grouped.mean()

2000-01-01    0.0
2000-01-02    2.0
2000-01-03    4.0
dtype: float64

In [49]:
grouped.count()

2000-01-01    1
2000-01-02    3
2000-01-03    1
dtype: int64

<h4>Date Ranges, Frequencies and Shifting</h4>

In [50]:
ts

2024-06-02   -0.055120
2025-10-21    0.374353
2024-04-12    0.851770
2023-10-05   -0.076624
2016-11-03   -0.614916
2017-03-15    0.622371
2019-06-03    0.641033
2020-05-08   -0.812665
dtype: float64

In [51]:
resampler = ts.resample("D")

In [52]:
resampler

<pandas.core.resample.DatetimeIndexResampler object at 0x000002241530C080>

<h4>Generating Date Ranges</h4>

In [53]:
index = pd.date_range("2024-05-01",
                      "2024-12-31")

In [54]:
index

DatetimeIndex(['2024-05-01', '2024-05-02', '2024-05-03', '2024-05-04',
               '2024-05-05', '2024-05-06', '2024-05-07', '2024-05-08',
               '2024-05-09', '2024-05-10',
               ...
               '2024-12-22', '2024-12-23', '2024-12-24', '2024-12-25',
               '2024-12-26', '2024-12-27', '2024-12-28', '2024-12-29',
               '2024-12-30', '2024-12-31'],
              dtype='datetime64[ns]', length=245, freq='D')

In [55]:
# using start date, end date
pd.date_range(start="2024-05-25", periods=10)

DatetimeIndex(['2024-05-25', '2024-05-26', '2024-05-27', '2024-05-28',
               '2024-05-29', '2024-05-30', '2024-05-31', '2024-06-01',
               '2024-06-02', '2024-06-03'],
              dtype='datetime64[ns]', freq='D')

In [56]:
# using end
pd.date_range(end="2024-12-31",
              periods=200,
             tz='Africa/Kampala',
             freq="MS",
              inclusive="both",
             )

DatetimeIndex(['2008-05-01 00:00:00+03:00', '2008-06-01 00:00:00+03:00',
               '2008-07-01 00:00:00+03:00', '2008-08-01 00:00:00+03:00',
               '2008-09-01 00:00:00+03:00', '2008-10-01 00:00:00+03:00',
               '2008-11-01 00:00:00+03:00', '2008-12-01 00:00:00+03:00',
               '2009-01-01 00:00:00+03:00', '2009-02-01 00:00:00+03:00',
               ...
               '2024-03-01 00:00:00+03:00', '2024-04-01 00:00:00+03:00',
               '2024-05-01 00:00:00+03:00', '2024-06-01 00:00:00+03:00',
               '2024-07-01 00:00:00+03:00', '2024-08-01 00:00:00+03:00',
               '2024-09-01 00:00:00+03:00', '2024-10-01 00:00:00+03:00',
               '2024-11-01 00:00:00+03:00', '2024-12-01 00:00:00+03:00'],
              dtype='datetime64[ns, Africa/Kampala]', length=200, freq='MS')

In [57]:
import pytz

In [58]:
# time zone samples
pytz.all_timezones[:50]

['Africa/Abidjan',
 'Africa/Accra',
 'Africa/Addis_Ababa',
 'Africa/Algiers',
 'Africa/Asmara',
 'Africa/Asmera',
 'Africa/Bamako',
 'Africa/Bangui',
 'Africa/Banjul',
 'Africa/Bissau',
 'Africa/Blantyre',
 'Africa/Brazzaville',
 'Africa/Bujumbura',
 'Africa/Cairo',
 'Africa/Casablanca',
 'Africa/Ceuta',
 'Africa/Conakry',
 'Africa/Dakar',
 'Africa/Dar_es_Salaam',
 'Africa/Djibouti',
 'Africa/Douala',
 'Africa/El_Aaiun',
 'Africa/Freetown',
 'Africa/Gaborone',
 'Africa/Harare',
 'Africa/Johannesburg',
 'Africa/Juba',
 'Africa/Kampala',
 'Africa/Khartoum',
 'Africa/Kigali',
 'Africa/Kinshasa',
 'Africa/Lagos',
 'Africa/Libreville',
 'Africa/Lome',
 'Africa/Luanda',
 'Africa/Lubumbashi',
 'Africa/Lusaka',
 'Africa/Malabo',
 'Africa/Maputo',
 'Africa/Maseru',
 'Africa/Mbabane',
 'Africa/Mogadishu',
 'Africa/Monrovia',
 'Africa/Nairobi',
 'Africa/Ndjamena',
 'Africa/Niamey',
 'Africa/Nouakchott',
 'Africa/Ouagadougou',
 'Africa/Porto-Novo',
 'Africa/Sao_Tome']

In [59]:
# using localize
pd.date_range(
    start=pd.to_datetime("1/1/2024", dayfirst=True).tz_localize("Africa/Kampala"),
    end=pd.to_datetime("31/12/2025", dayfirst=True).tz_localize("Africa/Kampala"),
    )

DatetimeIndex(['2024-01-01 00:00:00+03:00', '2024-01-02 00:00:00+03:00',
               '2024-01-03 00:00:00+03:00', '2024-01-04 00:00:00+03:00',
               '2024-01-05 00:00:00+03:00', '2024-01-06 00:00:00+03:00',
               '2024-01-07 00:00:00+03:00', '2024-01-08 00:00:00+03:00',
               '2024-01-09 00:00:00+03:00', '2024-01-10 00:00:00+03:00',
               ...
               '2025-12-22 00:00:00+03:00', '2025-12-23 00:00:00+03:00',
               '2025-12-24 00:00:00+03:00', '2025-12-25 00:00:00+03:00',
               '2025-12-26 00:00:00+03:00', '2025-12-27 00:00:00+03:00',
               '2025-12-28 00:00:00+03:00', '2025-12-29 00:00:00+03:00',
               '2025-12-30 00:00:00+03:00', '2025-12-31 00:00:00+03:00'],
              dtype='datetime64[ns, Africa/Kampala]', length=731, freq='D')

In [60]:
pd.date_range(start="1/1/2024",
              periods=5,
              freq="3ME"
             )

DatetimeIndex(['2024-01-31', '2024-04-30', '2024-07-31', '2024-10-31',
               '2025-01-31'],
              dtype='datetime64[ns]', freq='3ME')

In [61]:
# specifing unit
pd.date_range(start="2017-01-01",
              periods=10,
              freq="100YS",
              unit='s'
             )

DatetimeIndex(['2017-01-01', '2117-01-01', '2217-01-01', '2317-01-01',
               '2417-01-01', '2517-01-01', '2617-01-01', '2717-01-01',
               '2817-01-01', '2917-01-01'],
              dtype='datetime64[s]', freq='100YS-JAN')

In [62]:
# Normalizing datetime
pd.date_range("2024-05-02 12:54:21",
              periods=5,
              normalize=True
             )

DatetimeIndex(['2024-05-02', '2024-05-03', '2024-05-04', '2024-05-05',
               '2024-05-06'],
              dtype='datetime64[ns]', freq='D')

<h4>Frequencies and Date Offsets</h4>

In [63]:
pd.date_range("2025-02-01",
              "2025-05-02 23:03:02",
              freq="4h"
             )

DatetimeIndex(['2025-02-01 00:00:00', '2025-02-01 04:00:00',
               '2025-02-01 08:00:00', '2025-02-01 12:00:00',
               '2025-02-01 16:00:00', '2025-02-01 20:00:00',
               '2025-02-02 00:00:00', '2025-02-02 04:00:00',
               '2025-02-02 08:00:00', '2025-02-02 12:00:00',
               ...
               '2025-05-01 08:00:00', '2025-05-01 12:00:00',
               '2025-05-01 16:00:00', '2025-05-01 20:00:00',
               '2025-05-02 00:00:00', '2025-05-02 04:00:00',
               '2025-05-02 08:00:00', '2025-05-02 12:00:00',
               '2025-05-02 16:00:00', '2025-05-02 20:00:00'],
              dtype='datetime64[ns]', length=546, freq='4h')

In [64]:
pd.date_range(
    "2000-01-01",
    periods=10,
    freq="1h30min3s",
)

DatetimeIndex(['2000-01-01 00:00:00', '2000-01-01 01:30:03',
               '2000-01-01 03:00:06', '2000-01-01 04:30:09',
               '2000-01-01 06:00:12', '2000-01-01 07:30:15',
               '2000-01-01 09:00:18', '2000-01-01 10:30:21',
               '2000-01-01 12:00:24', '2000-01-01 13:30:27'],
              dtype='datetime64[ns]', freq='5403s')

<h5>Week of month</h5>

In [65]:
monthly_meetups = pd.date_range(
    "2024-06-01",
    "2024-12-31",
    freq="WOM-3SAT"
)

In [66]:
monthly_meetups

DatetimeIndex(['2024-06-15', '2024-07-20', '2024-08-17', '2024-09-21',
               '2024-10-19', '2024-11-16', '2024-12-21'],
              dtype='datetime64[ns]', freq='WOM-3SAT')

<h4>Shifting (Leading and Lagging) Data</h4>

In [67]:
ts = pd.Series(
    np.random.standard_normal(10),
    index=pd.date_range(start="2030-01-01",
                        periods=10,
                        freq="ME",
                       )
)

In [68]:
(-1.491246--1.609596)/(-1.609596)

-0.07352776721612128

In [69]:
ts

2030-01-31    1.023336
2030-02-28    0.929407
2030-03-31    0.077000
2030-04-30   -0.460476
2030-05-31   -0.263135
2030-06-30   -0.695906
2030-07-31   -0.906637
2030-08-31   -0.120694
2030-09-30   -1.211898
2030-10-31    0.184416
Freq: ME, dtype: float64

In [70]:
ts.shift(2)

2030-01-31         NaN
2030-02-28         NaN
2030-03-31    1.023336
2030-04-30    0.929407
2030-05-31    0.077000
2030-06-30   -0.460476
2030-07-31   -0.263135
2030-08-31   -0.695906
2030-09-30   -0.906637
2030-10-31   -0.120694
Freq: ME, dtype: float64

In [71]:
ts.shift(-2)

2030-01-31    0.077000
2030-02-28   -0.460476
2030-03-31   -0.263135
2030-04-30   -0.695906
2030-05-31   -0.906637
2030-06-30   -0.120694
2030-07-31   -1.211898
2030-08-31    0.184416
2030-09-30         NaN
2030-10-31         NaN
Freq: ME, dtype: float64

In [72]:
# Percentage change
(ts-ts.shift(1))/ts

2030-01-31          NaN
2030-02-28    -0.101064
2030-03-31   -11.070271
2030-04-30     1.167217
2030-05-31    -0.749963
2030-06-30     0.621882
2030-07-31     0.232431
2030-08-31    -6.511887
2030-09-30     0.900409
2030-10-31     7.571530
Freq: ME, dtype: float64

In [73]:
# Shifting index two
ts.shift(2,
         freq="ME",
        )

2030-03-31    1.023336
2030-04-30    0.929407
2030-05-31    0.077000
2030-06-30   -0.460476
2030-07-31   -0.263135
2030-08-31   -0.695906
2030-09-30   -0.906637
2030-10-31   -0.120694
2030-11-30   -1.211898
2030-12-31    0.184416
Freq: ME, dtype: float64

In [74]:
ts.shift(
    3,
    freq="ME",
)

2030-04-30    1.023336
2030-05-31    0.929407
2030-06-30    0.077000
2030-07-31   -0.460476
2030-08-31   -0.263135
2030-09-30   -0.695906
2030-10-31   -0.906637
2030-11-30   -0.120694
2030-12-31   -1.211898
2031-01-31    0.184416
Freq: ME, dtype: float64

In [75]:
ts.shift(
    3,
    freq="D",
)

2030-02-03    1.023336
2030-03-03    0.929407
2030-04-03    0.077000
2030-05-03   -0.460476
2030-06-03   -0.263135
2030-07-03   -0.695906
2030-08-03   -0.906637
2030-09-03   -0.120694
2030-10-03   -1.211898
2030-11-03    0.184416
dtype: float64

In [76]:
ts.shift(
    1,
    freq="90min",
)

2030-01-31 01:30:00    1.023336
2030-02-28 01:30:00    0.929407
2030-03-31 01:30:00    0.077000
2030-04-30 01:30:00   -0.460476
2030-05-31 01:30:00   -0.263135
2030-06-30 01:30:00   -0.695906
2030-07-31 01:30:00   -0.906637
2030-08-31 01:30:00   -0.120694
2030-09-30 01:30:00   -1.211898
2030-10-31 01:30:00    0.184416
dtype: float64

In [77]:
from pandas.tseries.offsets import Day, MonthEnd, Hour

In [78]:
# Using rollback and rollforward
now = datetime(2024,5,26)

In [79]:
now

datetime.datetime(2024, 5, 26, 0, 0)

In [80]:
# Shifting to the end of month
now + MonthEnd()

Timestamp('2024-05-31 00:00:00')

In [81]:
now + MonthEnd(3)

Timestamp('2024-07-31 00:00:00')

In [82]:
offset = MonthEnd()

In [83]:
offset.rollback(now)

Timestamp('2024-04-30 00:00:00')

In [84]:
offset.rollforward(now)

Timestamp('2024-05-31 00:00:00')

In [85]:
now

datetime.datetime(2024, 5, 26, 0, 0)

In [86]:
# using offset creatively

In [87]:
ts.groupby(MonthEnd().rollforward).mean()

2030-01-31    1.023336
2030-02-28    0.929407
2030-03-31    0.077000
2030-04-30   -0.460476
2030-05-31   -0.263135
2030-06-30   -0.695906
2030-07-31   -0.906637
2030-08-31   -0.120694
2030-09-30   -1.211898
2030-10-31    0.184416
dtype: float64

In [88]:
# Using resample
ts.resample("ME").mean()

2030-01-31    1.023336
2030-02-28    0.929407
2030-03-31    0.077000
2030-04-30   -0.460476
2030-05-31   -0.263135
2030-06-30   -0.695906
2030-07-31   -0.906637
2030-08-31   -0.120694
2030-09-30   -1.211898
2030-10-31    0.184416
Freq: ME, dtype: float64

<h4>Time Zone Localization and Conversion</h4>

In [89]:
pytz.common_timezones[:5]

['Africa/Abidjan',
 'Africa/Accra',
 'Africa/Addis_Ababa',
 'Africa/Algiers',
 'Africa/Asmara']

In [90]:
# Timezone objects
ugandaTime = pytz.timezone("Africa/Kampala")

In [91]:
ugandaTime

<DstTzInfo 'Africa/Kampala' LMT+2:27:00 STD>

In [92]:
dates = pd.date_range(
    "2024-05-26 19:23",
    periods=10,
    tz="Africa/Kampala",
)

In [93]:
dates

DatetimeIndex(['2024-05-26 19:23:00+03:00', '2024-05-27 19:23:00+03:00',
               '2024-05-28 19:23:00+03:00', '2024-05-29 19:23:00+03:00',
               '2024-05-30 19:23:00+03:00', '2024-05-31 19:23:00+03:00',
               '2024-06-01 19:23:00+03:00', '2024-06-02 19:23:00+03:00',
               '2024-06-03 19:23:00+03:00', '2024-06-04 19:23:00+03:00'],
              dtype='datetime64[ns, Africa/Kampala]', freq='D')

In [94]:
# time series are time zone naive 
ts = pd.Series(
    np.random.standard_normal(len(dates)),
    index=dates,
)

In [95]:
ts

2024-05-26 19:23:00+03:00   -0.953344
2024-05-27 19:23:00+03:00   -0.587704
2024-05-28 19:23:00+03:00   -0.583090
2024-05-29 19:23:00+03:00   -2.123868
2024-05-30 19:23:00+03:00    0.025105
2024-05-31 19:23:00+03:00    1.151331
2024-06-01 19:23:00+03:00   -0.510724
2024-06-02 19:23:00+03:00   -0.918448
2024-06-03 19:23:00+03:00   -2.278808
2024-06-04 19:23:00+03:00   -1.041189
Freq: D, dtype: float64

In [96]:
# printing time zone
print(ts.index.tz)

Africa/Kampala


In [97]:
# Generating datetime ranges with a tz attribute
dates = pd.date_range(
    "2024-05-26 19:23",
    periods=10,
    tz="Africa/Kampala",
)
dates

DatetimeIndex(['2024-05-26 19:23:00+03:00', '2024-05-27 19:23:00+03:00',
               '2024-05-28 19:23:00+03:00', '2024-05-29 19:23:00+03:00',
               '2024-05-30 19:23:00+03:00', '2024-05-31 19:23:00+03:00',
               '2024-06-01 19:23:00+03:00', '2024-06-02 19:23:00+03:00',
               '2024-06-03 19:23:00+03:00', '2024-06-04 19:23:00+03:00'],
              dtype='datetime64[ns, Africa/Kampala]', freq='D')

In [98]:
# conversion
ts_utc = ts.tz_convert("utc")
ts_utc

2024-05-26 16:23:00+00:00   -0.953344
2024-05-27 16:23:00+00:00   -0.587704
2024-05-28 16:23:00+00:00   -0.583090
2024-05-29 16:23:00+00:00   -2.123868
2024-05-30 16:23:00+00:00    0.025105
2024-05-31 16:23:00+00:00    1.151331
2024-06-01 16:23:00+00:00   -0.510724
2024-06-02 16:23:00+00:00   -0.918448
2024-06-03 16:23:00+00:00   -2.278808
2024-06-04 16:23:00+00:00   -1.041189
Freq: D, dtype: float64

In [99]:
ts_nairobi = ts_utc.tz_convert("Africa/Nairobi")
ts_nairobi

2024-05-26 19:23:00+03:00   -0.953344
2024-05-27 19:23:00+03:00   -0.587704
2024-05-28 19:23:00+03:00   -0.583090
2024-05-29 19:23:00+03:00   -2.123868
2024-05-30 19:23:00+03:00    0.025105
2024-05-31 19:23:00+03:00    1.151331
2024-06-01 19:23:00+03:00   -0.510724
2024-06-02 19:23:00+03:00   -0.918448
2024-06-03 19:23:00+03:00   -2.278808
2024-06-04 19:23:00+03:00   -1.041189
Freq: D, dtype: float64

In [100]:
ts_cairo = ts_nairobi.tz_convert("Africa/Cairo")
ts_cairo

2024-05-26 19:23:00+03:00   -0.953344
2024-05-27 19:23:00+03:00   -0.587704
2024-05-28 19:23:00+03:00   -0.583090
2024-05-29 19:23:00+03:00   -2.123868
2024-05-30 19:23:00+03:00    0.025105
2024-05-31 19:23:00+03:00    1.151331
2024-06-01 19:23:00+03:00   -0.510724
2024-06-02 19:23:00+03:00   -0.918448
2024-06-03 19:23:00+03:00   -2.278808
2024-06-04 19:23:00+03:00   -1.041189
Freq: D, dtype: float64

In [101]:
ts_cairo.tz_convert("Africa/Kampala")

2024-05-26 19:23:00+03:00   -0.953344
2024-05-27 19:23:00+03:00   -0.587704
2024-05-28 19:23:00+03:00   -0.583090
2024-05-29 19:23:00+03:00   -2.123868
2024-05-30 19:23:00+03:00    0.025105
2024-05-31 19:23:00+03:00    1.151331
2024-06-01 19:23:00+03:00   -0.510724
2024-06-02 19:23:00+03:00   -0.918448
2024-06-03 19:23:00+03:00   -2.278808
2024-06-04 19:23:00+03:00   -1.041189
Freq: D, dtype: float64

<h4>Operations with Time Zone-Aware Timestamp Objects</h4>

In [102]:
stamp = pd.Timestamp("2024-05-27 17:32:04")
stamp

Timestamp('2024-05-27 17:32:04')

In [103]:
stamp_utc = stamp.tz_localize("utc")
stamp_utc

Timestamp('2024-05-27 17:32:04+0000', tz='UTC')

In [104]:
# using tz in definition
stamp_kampala = pd.Timestamp(
    "2024-05-27 17:48:49",
    tz="Africa/Kampala",
)
stamp_kampala

Timestamp('2024-05-27 17:48:49+0300', tz='Africa/Kampala')

In [105]:
# timestamps store utc value since the Unix epoch
stamp_kampala.value

1716821329000000000

In [106]:
stamp_utc.value

1716831124000000000

In [109]:
# Daylight Saving Transition
# 30 minutes before transitioning into DST
stamp = pd.Timestamp(
    "2012-03-11 01:30",
    tz="US/Eastern",
)
stamp

Timestamp('2012-03-11 01:30:00-0500', tz='US/Eastern')

In [108]:
stamp + Hour()

Timestamp('2012-03-11 03:30:00-0400', tz='US/Eastern')

In [112]:
# 90 minutes before transitioning out of DST
stamp = pd.Timestamp(
    "2012-11-04 00:30",
    tz="US/Eastern",
    )
stamp

Timestamp('2012-11-04 00:30:00-0400', tz='US/Eastern')

In [113]:
# Adding 2 hours
stamp + 2*Hour()

Timestamp('2012-11-04 01:30:00-0500', tz='US/Eastern')

<h4>Operations between different time zones</h4>

In [116]:
dates = pd.date_range(
    "2024-05-27 10:43",
    periods=20,
    freq="ME",
)
ts = pd.Series(
    np.random.standard_normal(
        len(dates)),
    index=dates,
)
ts

2024-05-31 10:43:00   -1.038097
2024-06-30 10:43:00   -0.081041
2024-07-31 10:43:00   -0.627819
2024-08-31 10:43:00   -0.351016
2024-09-30 10:43:00   -0.301941
2024-10-31 10:43:00   -0.997994
2024-11-30 10:43:00    1.134255
2024-12-31 10:43:00    0.955406
2025-01-31 10:43:00   -1.685222
2025-02-28 10:43:00    0.392146
2025-03-31 10:43:00   -0.150720
2025-04-30 10:43:00    0.626548
2025-05-31 10:43:00    0.291390
2025-06-30 10:43:00   -2.479878
2025-07-31 10:43:00    2.419575
2025-08-31 10:43:00    0.196056
2025-09-30 10:43:00    0.826058
2025-10-31 10:43:00   -1.900992
2025-11-30 10:43:00   -0.384296
2025-12-31 10:43:00    0.177505
Freq: ME, dtype: float64

In [118]:
ts1 = ts[:10].tz_localize("Africa/Kampala")
ts2 = ts[4:].tz_localize("Africa/Cairo")
result = ts1 + ts2
result.index

DatetimeIndex(['2024-05-31 07:43:00+00:00', '2024-06-30 07:43:00+00:00',
               '2024-07-31 07:43:00+00:00', '2024-08-31 07:43:00+00:00',
               '2024-09-30 07:43:00+00:00', '2024-10-31 07:43:00+00:00',
               '2024-11-30 07:43:00+00:00', '2024-11-30 08:43:00+00:00',
               '2024-12-31 07:43:00+00:00', '2024-12-31 08:43:00+00:00',
               '2025-01-31 07:43:00+00:00', '2025-01-31 08:43:00+00:00',
               '2025-02-28 07:43:00+00:00', '2025-02-28 08:43:00+00:00',
               '2025-03-31 08:43:00+00:00', '2025-04-30 07:43:00+00:00',
               '2025-05-31 07:43:00+00:00', '2025-06-30 07:43:00+00:00',
               '2025-07-31 07:43:00+00:00', '2025-08-31 07:43:00+00:00',
               '2025-09-30 07:43:00+00:00', '2025-10-31 08:43:00+00:00',
               '2025-11-30 08:43:00+00:00', '2025-12-31 08:43:00+00:00'],
              dtype='datetime64[ns, UTC]', freq=None)

In [119]:
# Operations between zone-aware and zone-naive will raise an exception

<h4>Periods and Period Arithmetics</h4>

In [125]:
p = pd.Period("2025",
              freq="Y-MAY",
             )
p

Period('2025', 'Y-MAY')

In [126]:
p + 4

Period('2029', 'Y-MAY')

In [128]:
p - 14

Period('2011', 'Y-MAY')

In [127]:
p

Period('2025', 'Y-MAY')

In [129]:
pd.Period("2030",
          freq="Y-MAY",
         ) - p

<5 * YearEnds: month=5>

In [134]:
# using periods range
periods = pd.period_range(
    "2024-05-27",
    "2030-05-27",
    freq="M",
)
periods

PeriodIndex(['2024-05', '2024-06', '2024-07', '2024-08', '2024-09', '2024-10',
             '2024-11', '2024-12', '2025-01', '2025-02', '2025-03', '2025-04',
             '2025-05', '2025-06', '2025-07', '2025-08', '2025-09', '2025-10',
             '2025-11', '2025-12', '2026-01', '2026-02', '2026-03', '2026-04',
             '2026-05', '2026-06', '2026-07', '2026-08', '2026-09', '2026-10',
             '2026-11', '2026-12', '2027-01', '2027-02', '2027-03', '2027-04',
             '2027-05', '2027-06', '2027-07', '2027-08', '2027-09', '2027-10',
             '2027-11', '2027-12', '2028-01', '2028-02', '2028-03', '2028-04',
             '2028-05', '2028-06', '2028-07', '2028-08', '2028-09', '2028-10',
             '2028-11', '2028-12', '2029-01', '2029-02', '2029-03', '2029-04',
             '2029-05', '2029-06', '2029-07', '2029-08', '2029-09', '2029-10',
             '2029-11', '2029-12', '2030-01', '2030-02', '2030-03', '2030-04',
             '2030-05'],
            dtype='period[M

In [136]:
# using periods for index
pd.Series(
    np.random.standard_normal(73),
    index=periods,
)

2024-05   -0.575321
2024-06   -0.252755
2024-07   -0.560423
2024-08    0.795612
2024-09   -0.058234
             ...   
2030-01    0.516361
2030-02   -1.531843
2030-03   -1.176563
2030-04    0.174339
2030-05   -1.054271
Freq: M, Length: 73, dtype: float64

In [137]:
# using strings
values = [
    "2024Q1",
    "2025Q3",
    "2026Q1",
]
values

['2024Q1', '2025Q3', '2026Q1']

In [138]:
# 
index = pd.PeriodIndex(values,
                       freq="Q-DEC",
                      )
index

PeriodIndex(['2024Q1', '2025Q3', '2026Q1'], dtype='period[Q-DEC]')

<h4>Period Frequency Conversions</h4>

In [139]:
p = pd.Period(
    "2025",
    freq="Y-DEC",
)
p

Period('2025', 'Y-DEC')

In [141]:
# conversion
p.asfreq("M",
         how="start",
        )

Period('2025-12', 'M')

In [143]:
# conversion
p.asfreq("M",
         how="end",
        )

Period('2025-12', 'M')

In [145]:
# Fiscal year
p = pd.Period(
    "2024",
    freq="Y-JUN",
)
p

Period('2024', 'Y-JUN')

In [146]:
p.asfreq("M",
         how="start"
        )

Period('2023-07', 'M')

In [147]:
p.asfreq("M",
         how="end",
        )

Period('2024-06', 'M')