In [2]:
import pandas as pd
import numpy as np

<h3>Date and Time Data Types and Tools</h3>

In [3]:
from datetime import datetime
from datetime import timedelta

In [4]:
now = datetime.now()

In [5]:
now

datetime.datetime(2024, 5, 23, 19, 35, 23, 17452)

In [6]:
now.year, now.month, now.day, now.hour, now.minute

(2024, 5, 23, 19, 35)

In [7]:
now.year

2024

In [8]:
# delta
delta = datetime(2024, 5, 21) - datetime(2016, 8, 26, 8, 54)
delta

datetime.timedelta(days=2824, seconds=54360)

In [9]:
# numerical evaluations
start = datetime(2024, 1, 1)
start + timedelta(3000)

datetime.datetime(2032, 3, 19, 0, 0)

<h4>Converting between String and Datetime</h4>

In [10]:
stamp = datetime(2024, 5, 22)

In [11]:
str(stamp)

'2024-05-22 00:00:00'

In [12]:
# Conversion to a specified string format
stamp.strftime("%Y-%B-%d-%u-%A-%p")

'2024-May-22-3-Wednesday-AM'

In [13]:
value = "2024-12-03"

In [14]:
datetime.strptime(value, "%Y-%m-%d")

datetime.datetime(2024, 12, 3, 0, 0)

In [15]:
# converting a list of string dates to datetime format
datestrs = ["7/6/2025", "8/6/2025"]
[datetime.strptime(x, "%m/%d/%Y") for x in datestrs]

[datetime.datetime(2025, 7, 6, 0, 0), datetime.datetime(2025, 8, 6, 0, 0)]

In [16]:
# pandas to_datetime
datestrs = ["2011-07-06 12:00:00", "2025-08-06 00:00:00"]
pd.to_datetime(datestrs)

DatetimeIndex(['2011-07-06 12:00:00', '2025-08-06 00:00:00'], dtype='datetime64[ns]', freq=None)

In [17]:
# dataframe to datetime
df = pd.DataFrame(
    {"year": [2025, 2024, 2023],
     "month": [2, 4, 8],
     "day": [5, 4, 5]
    })

In [18]:
df

Unnamed: 0,year,month,day
0,2025,2,5
1,2024,4,4
2,2023,8,5


In [19]:
pd.to_datetime(df)

0   2025-02-05
1   2024-04-04
2   2023-08-05
dtype: datetime64[ns]

In [20]:
datestr = ["2025-07-06 12:00:00",
           "2034-08-04 13:45:34"]

In [21]:
# Missing values, Not a Time values are also accepted
idx = pd.to_datetime(datestrs + [None])

In [22]:
idx

DatetimeIndex(['2011-07-06 12:00:00', '2025-08-06 00:00:00', 'NaT'], dtype='datetime64[ns]', freq=None)

In [23]:
# indexing idx
idx[2]

NaT

In [24]:
pd.isna(idx)

array([False, False,  True])

<h4>Time Series Basics</h4>

In [25]:
# time series
dates = [datetime(2024, 6, 2),
        datetime(2025, 10, 21),
        datetime(2024, 4, 12),
        datetime(2023, 10, 5),
        datetime(2016, 11, 3),
        datetime(2017, 3, 15),
        datetime(2019, 6, 3),
        datetime(2020, 5, 8),
       ]

In [26]:
ts =pd.Series(np.random.standard_normal(8),
              index=dates,
             )

In [27]:
ts

2024-06-02    0.213264
2025-10-21   -0.185569
2024-04-12    0.182388
2023-10-05    0.196072
2016-11-03   -0.230419
2017-03-15   -0.157625
2019-06-03    0.052849
2020-05-08    0.267221
dtype: float64

In [28]:
ts + ts[::3]

2016-11-03         NaN
2017-03-15         NaN
2019-06-03    0.105699
2020-05-08         NaN
2023-10-05    0.392144
2024-04-12         NaN
2024-06-02    0.426529
2025-10-21         NaN
dtype: float64

In [29]:
ts[::3]

2024-06-02    0.213264
2023-10-05    0.196072
2019-06-03    0.052849
dtype: float64

<h4>indexing, Selecting and Subsetting</h4>

In [30]:
# same as indexing a series
stamp = ts.index[2]

In [31]:
stamp

Timestamp('2024-04-12 00:00:00')

In [32]:
ts.iat[0]

0.21326441112883648

In [33]:
# indexing by year
longer_ts = pd.Series(np.random.standard_normal(2000),
                      index=pd.date_range("2024-05-22",
                                          periods=2000))

In [34]:
longer_ts["2024"]

2024-05-22    0.888045
2024-05-23    0.308801
2024-05-24   -0.611889
2024-05-25   -1.355504
2024-05-26   -0.623805
                ...   
2024-12-27   -1.450267
2024-12-28    1.030865
2024-12-29   -1.047969
2024-12-30   -0.105716
2024-12-31   -0.529283
Freq: D, Length: 224, dtype: float64

In [35]:
# selecting by year-month
longer_ts["2024-06"]

2024-06-01   -0.721884
2024-06-02    0.179313
2024-06-03    0.114105
2024-06-04    1.004096
2024-06-05    1.033414
2024-06-06    0.568836
2024-06-07   -0.276858
2024-06-08   -0.572352
2024-06-09   -2.396891
2024-06-10   -1.182333
2024-06-11   -1.806658
2024-06-12    0.790065
2024-06-13   -1.825670
2024-06-14   -1.816582
2024-06-15    0.181086
2024-06-16    1.087165
2024-06-17   -0.533102
2024-06-18   -1.012590
2024-06-19    0.134260
2024-06-20   -0.735115
2024-06-21    1.595760
2024-06-22   -0.170303
2024-06-23    0.181479
2024-06-24    0.582189
2024-06-25   -0.415368
2024-06-26    0.978739
2024-06-27   -0.537481
2024-06-28   -0.728823
2024-06-29    0.380556
2024-06-30   -0.943512
Freq: D, dtype: float64

In [36]:
# Slicing by datetime
longer_ts[datetime(2024,5,22):
datetime(2024,8,1)
]

2024-05-22    0.888045
2024-05-23    0.308801
2024-05-24   -0.611889
2024-05-25   -1.355504
2024-05-26   -0.623805
                ...   
2024-07-28   -0.271999
2024-07-29   -1.178797
2024-07-30   -0.798791
2024-07-31    0.018890
2024-08-01    1.155198
Freq: D, Length: 72, dtype: float64

In [37]:
# Slicing with a timestamp not contained in the ts
longer_ts[datetime(2016, 8, 26):
datetime(2024, 8, 26)
]

2024-05-22    0.888045
2024-05-23    0.308801
2024-05-24   -0.611889
2024-05-25   -1.355504
2024-05-26   -0.623805
                ...   
2024-08-22   -0.850604
2024-08-23   -0.363636
2024-08-24   -0.131431
2024-08-25   -1.752736
2024-08-26   -0.606783
Freq: D, Length: 97, dtype: float64

In [38]:
longer_ts

2024-05-22    0.888045
2024-05-23    0.308801
2024-05-24   -0.611889
2024-05-25   -1.355504
2024-05-26   -0.623805
                ...   
2029-11-07   -1.817091
2029-11-08    1.618764
2029-11-09   -0.177241
2029-11-10    0.760249
2029-11-11    0.723129
Freq: D, Length: 2000, dtype: float64

In [39]:
# truncating after a specific date
longer_ts.truncate(after="2025-05-22")

2024-05-22    0.888045
2024-05-23    0.308801
2024-05-24   -0.611889
2024-05-25   -1.355504
2024-05-26   -0.623805
                ...   
2025-05-18    1.131796
2025-05-19    0.420651
2025-05-20   -1.490575
2025-05-21   -0.161611
2025-05-22    0.437621
Freq: D, Length: 366, dtype: float64

In [40]:
dates = pd.date_range("2024-01-01",
                      periods=100,
                      freq="W-WED"
                     )

In [41]:
long_df = pd.DataFrame(np.random.standard_normal((100,4)),
                       index=dates,
                       columns=["Iganga", "Jinja", "Kampala", "Busia"])

In [45]:
long_df.loc["2024"]

Unnamed: 0,Iganga,Jinja,Kampala,Busia
2024-01-03,-1.499353,-2.136799,-0.098557,-0.956905
2024-01-10,0.272291,0.629934,-1.866105,-0.397772
2024-01-17,0.176997,-0.115254,-0.119011,-0.761398
2024-01-24,2.195403,-1.823289,-1.48348,0.332055
2024-01-31,-1.355002,0.326344,-1.339733,-0.159754
2024-02-07,0.217617,-1.646863,0.060874,-0.429657
2024-02-14,1.138525,0.34798,-0.671941,-0.170572
2024-02-21,0.659408,0.684733,1.069887,0.353939
2024-02-28,0.001701,-0.96353,-1.153121,1.199911
2024-03-06,-0.855045,-2.396617,-2.005608,-0.447509


<h4>Time Series with Duplicates</h4>

In [47]:
dates = pd.DatetimeIndex(["2000-01-01",
                          "2000-01-02",
                          "2000-01-02",
                          "2000-01-02",
                          "2000-01-03"])

In [49]:
dup_ts = pd.Series(np.arange(5),
                   index=dates)

In [50]:
dup_ts

2000-01-01    0
2000-01-02    1
2000-01-02    2
2000-01-02    3
2000-01-03    4
dtype: int32