In [1]:
import pandas as pd
import numpy as np

<h3>Date and Time Data Types and Tools</h3>

In [2]:
from datetime import datetime
from datetime import timedelta

In [3]:
now = datetime.now()

In [4]:
now

datetime.datetime(2024, 5, 22, 17, 44, 34, 153763)

In [5]:
now.year, now.month, now.day, now.hour, now.minute

(2024, 5, 22, 17, 44)

In [6]:
now.year

2024

In [7]:
# delta
delta = datetime(2024, 5, 21) - datetime(2016, 8, 26, 8, 54)
delta

datetime.timedelta(days=2824, seconds=54360)

In [8]:
# numerical evaluations
start = datetime(2024, 1, 1)
start + timedelta(3000)

datetime.datetime(2032, 3, 19, 0, 0)

<h4>Converting between String and Datetime</h4>

In [14]:
stamp = datetime(2024, 5, 22)

In [15]:
str(stamp)

'2024-05-22 00:00:00'

In [67]:
# Conversion to a specified string format
stamp.strftime("%Y-%B-%d-%u-%A-%p")

'2024-April-12-5-Friday-AM'

In [19]:
value = "2024-12-03"

In [20]:
datetime.strptime(value, "%Y-%m-%d")

datetime.datetime(2024, 12, 3, 0, 0)

In [68]:
# converting a list of string dates to datetime format
datestrs = ["7/6/2025", "8/6/2025"]
[datetime.strptime(x, "%m/%d/%Y") for x in datestrs]

[datetime.datetime(2025, 7, 6, 0, 0), datetime.datetime(2025, 8, 6, 0, 0)]

In [24]:
# pandas to_datetime
datestrs = ["2011-07-06 12:00:00", "2025-08-06 00:00:00"]
pd.to_datetime(datestrs)

DatetimeIndex(['2011-07-06 12:00:00', '2025-08-06 00:00:00'], dtype='datetime64[ns]', freq=None)

In [27]:
# dataframe to datetime
df = pd.DataFrame(
    {"year": [2025, 2024, 2023],
     "month": [2, 4, 8],
     "day": [5, 4, 5]
    })

In [28]:
df

Unnamed: 0,year,month,day
0,2025,2,5
1,2024,4,4
2,2023,8,5


In [30]:
pd.to_datetime(df)

0   2025-02-05
1   2024-04-04
2   2023-08-05
dtype: datetime64[ns]

In [31]:
datestr = ["2025-07-06 12:00:00",
           "2034-08-04 13:45:34"]

In [69]:
# Missing values, Not a Time values are also accepted
idx = pd.to_datetime(datestrs + [None])

In [70]:
idx

DatetimeIndex(['2025-07-06', '2025-08-06', 'NaT'], dtype='datetime64[ns]', freq=None)

In [34]:
# indexing idx
idx[2]

NaT

In [35]:
pd.isna(idx)

array([False, False,  True])

<h4>Time Series Basics</h4>

In [49]:
# time series
dates = [datetime(2024, 6, 2),
        datetime(2025, 10, 21),
        datetime(2024, 4, 12),
        datetime(2023, 10, 5),
        datetime(2016, 11, 3),
        datetime(2017, 3, 15),
        datetime(2019, 6, 3),
        datetime(2020, 5, 8),
       ]

In [52]:
ts =pd.Series(np.random.standard_normal(8),
              index=dates,
             )

In [53]:
ts

2024-06-02   -0.018296
2025-10-21    1.354557
2024-04-12   -0.403573
2023-10-05   -0.515219
2016-11-03    1.584786
2017-03-15   -0.715544
2019-06-03    0.446754
2020-05-08   -0.251034
dtype: float64

In [61]:
ts + ts[::3]

2016-11-03         NaN
2017-03-15         NaN
2019-06-03    0.893509
2020-05-08         NaN
2023-10-05   -1.030437
2024-04-12         NaN
2024-06-02   -0.036592
2025-10-21         NaN
dtype: float64

In [60]:
ts[::3]

2024-06-02   -0.018296
2023-10-05   -0.515219
2019-06-03    0.446754
dtype: float64

<h4>indexing, Selecting and Subsetting</h4>

In [63]:
# same as indexing a series
stamp = ts.index[2]

In [64]:
stamp

Timestamp('2024-04-12 00:00:00')

In [66]:
ts.iat[0]

-0.018296174679102075

In [73]:
# indexing by year
longer_ts = pd.Series(np.random.standard_normal(2000),
                      index=pd.date_range("2024-05-22",
                                          periods=2000))

In [76]:
longer_ts["2024"]

2024-05-22   -0.662722
2024-05-23    0.707959
2024-05-24    0.408518
2024-05-25    0.060742
2024-05-26   -2.127251
                ...   
2024-12-27   -0.768944
2024-12-28    0.929023
2024-12-29    0.705880
2024-12-30    0.240432
2024-12-31   -1.005324
Freq: D, Length: 224, dtype: float64

In [77]:
# selecting by year-month
longer_ts["2024-06"]

2024-06-01   -0.915799
2024-06-02   -1.034259
2024-06-03   -1.828211
2024-06-04   -0.011842
2024-06-05   -1.121998
2024-06-06   -0.252519
2024-06-07    1.441098
2024-06-08    2.307765
2024-06-09    1.979294
2024-06-10    1.472147
2024-06-11    0.972631
2024-06-12   -0.465573
2024-06-13   -0.307500
2024-06-14    0.281660
2024-06-15    0.517487
2024-06-16    0.071317
2024-06-17    0.441356
2024-06-18   -0.437611
2024-06-19    0.987432
2024-06-20   -0.487918
2024-06-21    0.016853
2024-06-22    0.127042
2024-06-23    0.837609
2024-06-24    0.313568
2024-06-25    1.533555
2024-06-26    0.424615
2024-06-27   -0.627871
2024-06-28    2.177756
2024-06-29    0.432929
2024-06-30   -0.252241
Freq: D, dtype: float64

In [83]:
# Slicing by datetime
longer_ts[datetime(2024,5,22):
datetime(2024,8,1)
]

2024-05-22   -0.662722
2024-05-23    0.707959
2024-05-24    0.408518
2024-05-25    0.060742
2024-05-26   -2.127251
                ...   
2024-07-28   -2.411230
2024-07-29    0.999168
2024-07-30    0.423904
2024-07-31   -1.064251
2024-08-01    1.392535
Freq: D, Length: 72, dtype: float64

In [85]:
# Slicing with a timestamp not contained in the ts
longer_ts[datetime(2016, 8, 26):
datetime(2024, 8, 26)
]

2024-05-22   -0.662722
2024-05-23    0.707959
2024-05-24    0.408518
2024-05-25    0.060742
2024-05-26   -2.127251
                ...   
2024-08-22    0.977636
2024-08-23    0.380892
2024-08-24    1.578981
2024-08-25   -1.515578
2024-08-26   -0.288554
Freq: D, Length: 97, dtype: float64

In [86]:
longer_ts

2024-05-22   -0.662722
2024-05-23    0.707959
2024-05-24    0.408518
2024-05-25    0.060742
2024-05-26   -2.127251
                ...   
2029-11-07   -0.695426
2029-11-08    0.404501
2029-11-09   -0.641172
2029-11-10   -0.802467
2029-11-11    2.327478
Freq: D, Length: 2000, dtype: float64

In [87]:
# truncating after a specific date
longer_ts.truncate(after="2025-05-22")

2024-05-22   -0.662722
2024-05-23    0.707959
2024-05-24    0.408518
2024-05-25    0.060742
2024-05-26   -2.127251
                ...   
2025-05-18    1.576586
2025-05-19   -0.493120
2025-05-20   -0.059988
2025-05-21    0.795982
2025-05-22    1.705959
Freq: D, Length: 366, dtype: float64

In [88]:
dates = pd.date_range("2024-01-01",
                      periods=100,
                      freq="W-WED"
                     )

In [90]:
long_df = pd.DataFrame(np.random.standard_normal((100,4)),
                       index=dates,
                       columns=["Iganga", "Jinja", "Kampala", "Busia"])

In [95]:
long_df.loc["2024-01"]

Unnamed: 0,Iganga,Jinja,Kampala,Busia
2024-01-03,1.605029,0.681984,0.172085,0.235513
2024-01-10,-1.576422,-0.360531,-1.388304,1.365153
2024-01-17,1.368761,-1.005712,0.090506,-0.760583
2024-01-24,0.429564,-0.068828,-1.23619,0.084886
2024-01-31,-1.832692,0.828218,-0.495548,1.319774
