# Time Series

In [1]:
import numpy as np
import pandas as pd

### Date and Time Data Types and Tools

In [2]:
# datetime is a built-in Python module for date and time data
from datetime import datetime

now = datetime.now()
now

datetime.datetime(2025, 3, 7, 13, 25, 50, 885093)

In [3]:
now.year, now.month, now.day

(2025, 3, 7)

In [4]:
# timedelta represents the temporal difference between two datetime objects
delta = now - datetime(1993, 8, 30)
delta

datetime.timedelta(11512, 48350, 885093)

In [5]:
delta.days

11512

In [6]:
# Adding a timedelta to a datetime object yields a new datetime object
from datetime import timedelta
start = datetime(1996, 5, 16)
start + timedelta(8417)

datetime.datetime(2019, 6, 2, 0, 0)

Converting Between String and Datetime

In [7]:
# You can format datetime objects as strings
stamp = datetime(2012, 5, 20)
str(stamp)

'2012-05-20 00:00:00'

In [8]:
stamp.strftime("%m-%d-%Y")

'05-20-2012'

In [9]:
# strptime converts strings to datetime objects
value = "2011-01-03"
datetime.strptime(value, "%Y-%m-%d")

datetime.datetime(2011, 1, 3, 0, 0)

In [10]:
# The pandas method to_datetime is able to parse many different kinds of date representations
datestrs = ["2011-07-06 12:00:00", "2011-08-06 00:01:00"]
pd.to_datetime(datestrs)

DatetimeIndex(['2011-07-06 12:00:00', '2011-08-06 00:01:00'], dtype='datetime64[ns]', freq=None)

In [11]:
# to_datetime also handles missing values
idx = pd.to_datetime(datestrs + [None])
idx

DatetimeIndex(['2011-07-06 12:00:00', '2011-08-06 00:01:00', 'NaT'], dtype='datetime64[ns]', freq=None)

In [12]:
# A basic time series object is a Series indexed by time stamps
dates = [datetime(2011, 1, 2), datetime(2011, 1, 5),
         datetime(2011, 1, 7), datetime(2011, 1, 8),
         datetime(2011, 1, 10), datetime(2011, 1, 12)]

ts = pd.Series(np.random.standard_normal(6), index=dates)
ts

2011-01-02    1.300250
2011-01-05    1.791890
2011-01-07   -0.741286
2011-01-08   -0.445033
2011-01-10    0.279504
2011-01-12   -0.500034
dtype: float64

In [13]:
# The index for ts has been put in a DatetimeIndex
ts.index

DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-07', '2011-01-08',
               '2011-01-10', '2011-01-12'],
              dtype='datetime64[ns]', freq=None)

In [14]:
# Arithmetic operations align on the dates
ts + ts[::2]

2011-01-02    2.600500
2011-01-05         NaN
2011-01-07   -1.482571
2011-01-08         NaN
2011-01-10    0.559008
2011-01-12         NaN
dtype: float64

In [15]:
# Check data type of the time stamps
ts.index.dtype

dtype('<M8[ns]')

In [16]:
# Scalar values from the index are pandas Timestamp objects
stamp = ts.index[0]
stamp

Timestamp('2011-01-02 00:00:00')

Indexing, Selection, Subsetting

In [17]:
# Indexing and selecting work like normal
stamp = ts.index[2]
ts[stamp]

-0.7412855512285581

In [18]:
# Can also pass a string
ts["2011-01-10"]

0.27950389084843474

In [19]:
# With long time series, you can select a year or a year and a month
longer_ts = pd.Series(np.random.standard_normal(1000), index=pd.date_range("2000-01-01", periods=1000))
longer_ts

2000-01-01    0.497693
2000-01-02    0.029211
2000-01-03    1.004710
2000-01-04    0.666687
2000-01-05    3.061966
                ...   
2002-09-22   -1.100471
2002-09-23   -1.171046
2002-09-24    0.018129
2002-09-25   -3.109536
2002-09-26    1.401633
Freq: D, Length: 1000, dtype: float64

In [20]:
longer_ts["2002"]

2002-01-01   -0.433147
2002-01-02    0.050249
2002-01-03    1.453103
2002-01-04    0.147872
2002-01-05    1.213494
                ...   
2002-09-22   -1.100471
2002-09-23   -1.171046
2002-09-24    0.018129
2002-09-25   -3.109536
2002-09-26    1.401633
Freq: D, Length: 269, dtype: float64

In [21]:
longer_ts["2002-03"]

2002-03-01    0.447770
2002-03-02   -1.488827
2002-03-03    2.483540
2002-03-04   -0.065426
2002-03-05   -0.876956
2002-03-06    1.762056
2002-03-07   -0.392100
2002-03-08   -1.336748
2002-03-09   -1.522183
2002-03-10   -0.276785
2002-03-11    0.869460
2002-03-12    0.935783
2002-03-13    0.171912
2002-03-14   -1.692298
2002-03-15    0.447979
2002-03-16    1.500213
2002-03-17    1.117239
2002-03-18   -0.754951
2002-03-19    0.142284
2002-03-20    1.710086
2002-03-21    0.380919
2002-03-22    0.685490
2002-03-23   -1.571999
2002-03-24    0.028199
2002-03-25    0.510799
2002-03-26   -0.575814
2002-03-27    0.869445
2002-03-28    0.810554
2002-03-29   -0.508499
2002-03-30   -1.421418
2002-03-31    0.720200
Freq: D, dtype: float64

In [22]:
# Can also slice with datetime objects
ts[datetime(2011,1,7):]

2011-01-07   -0.741286
2011-01-08   -0.445033
2011-01-10    0.279504
2011-01-12   -0.500034
dtype: float64

In [23]:
ts[datetime(2011,1,7):datetime(2011,8,10)]

2011-01-07   -0.741286
2011-01-08   -0.445033
2011-01-10    0.279504
2011-01-12   -0.500034
dtype: float64

In [24]:
# Can also slice with time stamps not in the time series
ts["2011-01-06":"2011-01-11"]

2011-01-07   -0.741286
2011-01-08   -0.445033
2011-01-10    0.279504
dtype: float64

In [25]:
# truncate slices a Series between two dates
ts.truncate(after="2011-01-08")

2011-01-02    1.300250
2011-01-05    1.791890
2011-01-07   -0.741286
2011-01-08   -0.445033
dtype: float64

In [26]:
# All of this also applies to DataFrames when acting on the rows
dates = pd.date_range("2000-01-01", periods=100, freq="W-WED")
dates

DatetimeIndex(['2000-01-05', '2000-01-12', '2000-01-19', '2000-01-26',
               '2000-02-02', '2000-02-09', '2000-02-16', '2000-02-23',
               '2000-03-01', '2000-03-08', '2000-03-15', '2000-03-22',
               '2000-03-29', '2000-04-05', '2000-04-12', '2000-04-19',
               '2000-04-26', '2000-05-03', '2000-05-10', '2000-05-17',
               '2000-05-24', '2000-05-31', '2000-06-07', '2000-06-14',
               '2000-06-21', '2000-06-28', '2000-07-05', '2000-07-12',
               '2000-07-19', '2000-07-26', '2000-08-02', '2000-08-09',
               '2000-08-16', '2000-08-23', '2000-08-30', '2000-09-06',
               '2000-09-13', '2000-09-20', '2000-09-27', '2000-10-04',
               '2000-10-11', '2000-10-18', '2000-10-25', '2000-11-01',
               '2000-11-08', '2000-11-15', '2000-11-22', '2000-11-29',
               '2000-12-06', '2000-12-13', '2000-12-20', '2000-12-27',
               '2001-01-03', '2001-01-10', '2001-01-17', '2001-01-24',
      

In [27]:
long_df = pd.DataFrame(np.random.standard_normal((100, 4)), index=dates, columns=["Colorado", "Texas", "New York", "Ohio"])
long_df

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,0.508699,-0.255436,-0.908414,-0.590658
2000-01-12,0.268033,-0.404980,0.145762,-0.018338
2000-01-19,-1.998909,-1.297812,-0.048914,1.004660
2000-01-26,-0.234127,0.516572,1.567692,0.607064
2000-02-02,-0.350352,0.546201,0.376394,0.419440
...,...,...,...,...
2001-10-31,-1.034822,0.794559,-0.285735,-1.249776
2001-11-07,0.915790,-1.019521,-0.669813,1.397330
2001-11-14,-0.998763,-0.930168,0.002972,-0.534039
2001-11-21,-1.317440,0.227047,2.241553,-1.238690


In [28]:
long_df.loc["2001-06"]

Unnamed: 0,Colorado,Texas,New York,Ohio
2001-06-06,0.225503,2.702113,0.212293,-1.158209
2001-06-13,-0.732527,1.939059,-0.841269,-0.400712
2001-06-20,0.26138,1.177061,0.451566,1.125236
2001-06-27,-0.623115,0.128024,1.403821,0.000107


Time Series with Duplicate Indices

In [29]:
dates = pd.DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-02", "2000-01-02", "2000-01-03"])
dup_ts = pd.Series(np.arange(5), index=dates)
dup_ts

2000-01-01    0
2000-01-02    1
2000-01-02    2
2000-01-02    3
2000-01-03    4
dtype: int64

In [30]:
# Check if the index is unique
dup_ts.index.is_unique

False

In [31]:
# Indexing this object will either produce a scalar or a slice (if you hit a repeated index)
dup_ts["2000-01-03"]

4

In [32]:
dup_ts["2000-01-02"]

2000-01-02    1
2000-01-02    2
2000-01-02    3
dtype: int64

In [33]:
# We can aggregate the info from non-unique indices
grouped = dup_ts.groupby(level=0)
grouped.mean()

2000-01-01    0
2000-01-02    2
2000-01-03    4
dtype: int64

In [34]:
grouped.sum()

2000-01-01    0
2000-01-02    6
2000-01-03    4
dtype: int64

### Date Ranges, Frequencies, and Shifting

In [35]:
# Given a time series, we can resample to a fixed frequency.
# For instance, resample every day:
ts

2011-01-02    1.300250
2011-01-05    1.791890
2011-01-07   -0.741286
2011-01-08   -0.445033
2011-01-10    0.279504
2011-01-12   -0.500034
dtype: float64

In [36]:
resampler = ts.resample("D")
resampler

<pandas.core.resample.DatetimeIndexResampler object at 0x7fd511db02e8>

Generating Date Ranges

In [37]:
# The date_range function generates a DatetimeIndex with an indicated length at a particular frequency
index = pd.date_range("2012-04-01", "2012-06-01")
index

DatetimeIndex(['2012-04-01', '2012-04-02', '2012-04-03', '2012-04-04',
               '2012-04-05', '2012-04-06', '2012-04-07', '2012-04-08',
               '2012-04-09', '2012-04-10', '2012-04-11', '2012-04-12',
               '2012-04-13', '2012-04-14', '2012-04-15', '2012-04-16',
               '2012-04-17', '2012-04-18', '2012-04-19', '2012-04-20',
               '2012-04-21', '2012-04-22', '2012-04-23', '2012-04-24',
               '2012-04-25', '2012-04-26', '2012-04-27', '2012-04-28',
               '2012-04-29', '2012-04-30', '2012-05-01', '2012-05-02',
               '2012-05-03', '2012-05-04', '2012-05-05', '2012-05-06',
               '2012-05-07', '2012-05-08', '2012-05-09', '2012-05-10',
               '2012-05-11', '2012-05-12', '2012-05-13', '2012-05-14',
               '2012-05-15', '2012-05-16', '2012-05-17', '2012-05-18',
               '2012-05-19', '2012-05-20', '2012-05-21', '2012-05-22',
               '2012-05-23', '2012-05-24', '2012-05-25', '2012-05-26',
      

In [38]:
# Can also pass a start/end date and a period of time
pd.date_range(start="2012-04-01", periods=20)

DatetimeIndex(['2012-04-01', '2012-04-02', '2012-04-03', '2012-04-04',
               '2012-04-05', '2012-04-06', '2012-04-07', '2012-04-08',
               '2012-04-09', '2012-04-10', '2012-04-11', '2012-04-12',
               '2012-04-13', '2012-04-14', '2012-04-15', '2012-04-16',
               '2012-04-17', '2012-04-18', '2012-04-19', '2012-04-20'],
              dtype='datetime64[ns]', freq='D')

In [39]:
# Can also pass a custom frequency
# Here BM means business end-of-month
pd.date_range("2000-01-01", "2000-12-01", freq="BM")

DatetimeIndex(['2000-01-31', '2000-02-29', '2000-03-31', '2000-04-28',
               '2000-05-31', '2000-06-30', '2000-07-31', '2000-08-31',
               '2000-09-29', '2000-10-31', '2000-11-30'],
              dtype='datetime64[ns]', freq='BM')

In [40]:
# date_range preserves the start or end timestamp if you pass one
pd.date_range("2012-05-02 12:56:38", periods=5)

DatetimeIndex(['2012-05-02 12:56:38', '2012-05-03 12:56:38',
               '2012-05-04 12:56:38', '2012-05-05 12:56:38',
               '2012-05-06 12:56:38'],
              dtype='datetime64[ns]', freq='D')

In [41]:
# Can also normalize to midnight
pd.date_range("2012-05-02 12:56:31", periods=5, normalize=True)

DatetimeIndex(['2012-05-02', '2012-05-03', '2012-05-04', '2012-05-05',
               '2012-05-06'],
              dtype='datetime64[ns]', freq='D')

Frequencies and Date Offsets

In [42]:
# pandas allows or base frequencies and multipliers
from pandas.tseries.offsets import Hour, Minute
# For instance, we can create an hour offset
hour = Hour()
hour

<Hour>

In [43]:
# Or a four hour offset
four_hours = Hour(4)
four_hours

<4 * Hours>

In [45]:
# Typicall we don't create these objects though, and instead just pass a frequency to date_range
pd.date_range("2000-01-01", "2000-01-03 23:59", freq = "4H")

DatetimeIndex(['2000-01-01 00:00:00', '2000-01-01 04:00:00',
               '2000-01-01 08:00:00', '2000-01-01 12:00:00',
               '2000-01-01 16:00:00', '2000-01-01 20:00:00',
               '2000-01-02 00:00:00', '2000-01-02 04:00:00',
               '2000-01-02 08:00:00', '2000-01-02 12:00:00',
               '2000-01-02 16:00:00', '2000-01-02 20:00:00',
               '2000-01-03 00:00:00', '2000-01-03 04:00:00',
               '2000-01-03 08:00:00', '2000-01-03 12:00:00',
               '2000-01-03 16:00:00', '2000-01-03 20:00:00'],
              dtype='datetime64[ns]', freq='4H')

In [46]:
# Can also combine offsets
Hour(2) + Minute(30)

<150 * Minutes>

In [48]:
# A potentially useful frequency class is "week of month", which enables you to get dates like the third Friday of each month
monthly_dates = pd.date_range("2012-05-20", "2013-01-01", freq="WOM-3FRI")
list(monthly_dates)

[Timestamp('2012-06-15 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-07-20 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-08-17 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-09-21 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-10-19 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-11-16 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-12-21 00:00:00', freq='WOM-3FRI')]

Shifting (Leading and Lagging) Data

In [49]:
# Shifting means moving time-series data backward and forward in time
ts = pd.Series(np.random.standard_normal(4), index=pd.date_range("2000-01-01", periods=4, freq="M"))
ts

2000-01-31    0.522333
2000-02-29   -1.139288
2000-03-31    0.093550
2000-04-30    0.766541
Freq: M, dtype: float64

In [50]:
ts.shift(2)

2000-01-31         NaN
2000-02-29         NaN
2000-03-31    0.522333
2000-04-30   -1.139288
Freq: M, dtype: float64

In [51]:
ts.shift(-2)

2000-01-31    0.093550
2000-02-29    0.766541
2000-03-31         NaN
2000-04-30         NaN
Freq: M, dtype: float64

In [52]:
# The above shifts introduced NaNs, but we can pass the frequency to shift in order to prevent this
ts.shift(2, freq="M")

2000-03-31    0.522333
2000-04-30   -1.139288
2000-05-31    0.093550
2000-06-30    0.766541
Freq: M, dtype: float64

In [53]:
# Date offsets can also be used with datetime or Timestamp objects
from pandas.tseries.offsets import Day, MonthEnd

now = datetime(2011, 10, 19)
now + 3*Day()

Timestamp('2011-10-22 00:00:00')

In [54]:
# Can also use an onchored offset like MonthEnd
now + MonthEnd()

Timestamp('2011-10-31 00:00:00')

In [55]:
now + MonthEnd(2)

Timestamp('2011-11-30 00:00:00')

In [57]:
# Date offsets can be used with groupby
ts = pd.Series(np.random.standard_normal(20), index=pd.date_range("2000-01-15", periods=20, freq="4D"))
ts

2000-01-15   -1.841918
2000-01-19   -0.314371
2000-01-23    0.802750
2000-01-27    1.501515
2000-01-31   -0.096673
2000-02-04   -2.094723
2000-02-08    0.236500
2000-02-12   -1.698964
2000-02-16   -0.088690
2000-02-20    1.162982
2000-02-24    1.235554
2000-02-28   -1.092219
2000-03-03    1.264980
2000-03-07   -0.035852
2000-03-11    1.064331
2000-03-15    0.412744
2000-03-19    1.493567
2000-03-23   -0.334742
2000-03-27   -1.842264
2000-03-31    0.959897
Freq: 4D, dtype: float64

In [58]:
# Find the mean value for each month
ts.groupby(MonthEnd().rollforward).mean()

2000-01-31    0.010261
2000-02-29   -0.334223
2000-03-31    0.372833
dtype: float64

In [59]:
# Can also use "resample" to achieve this
ts.resample("M").mean()

2000-01-31    0.010261
2000-02-29   -0.334223
2000-03-31    0.372833
Freq: M, dtype: float64

### Time Zone Handline

In [61]:
# The pytz library exposes the Olson database, which is a compilation of world time zone information
import pytz
pytz.common_timezones[-5:]

['US/Eastern', 'US/Hawaii', 'US/Mountain', 'US/Pacific', 'UTC']

In [62]:
# Can pull a time zone object from pytz
tz = pytz.timezone("America/New_York")
tz

<DstTzInfo 'America/New_York' LMT-1 day, 19:04:00 STD>

Time Zone Localization and Conversion

In [64]:
# In general, time series in pandas are time zone naive
dates = pd.date_range("2012-03-09 09:30", periods=6)
ts = pd.Series(np.random.standard_normal(len(dates)), index=dates)
ts

2012-03-09 09:30:00    0.167948
2012-03-10 09:30:00    0.321374
2012-03-11 09:30:00    0.603697
2012-03-12 09:30:00   -0.391657
2012-03-13 09:30:00   -1.030945
2012-03-14 09:30:00    1.886537
Freq: D, dtype: float64

In [65]:
# Show timezone of the previous object
print(ts.index.tz)

None


In [66]:
# We can re-generate the date ranges with a time zone
pd.date_range("2012-03-09 09:30", periods=10, tz="UTC")

DatetimeIndex(['2012-03-09 09:30:00+00:00', '2012-03-10 09:30:00+00:00',
               '2012-03-11 09:30:00+00:00', '2012-03-12 09:30:00+00:00',
               '2012-03-13 09:30:00+00:00', '2012-03-14 09:30:00+00:00',
               '2012-03-15 09:30:00+00:00', '2012-03-16 09:30:00+00:00',
               '2012-03-17 09:30:00+00:00', '2012-03-18 09:30:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='D')

In [67]:
# the tz_localize method will convert a time series to a different time zone
ts

2012-03-09 09:30:00    0.167948
2012-03-10 09:30:00    0.321374
2012-03-11 09:30:00    0.603697
2012-03-12 09:30:00   -0.391657
2012-03-13 09:30:00   -1.030945
2012-03-14 09:30:00    1.886537
Freq: D, dtype: float64

In [68]:
ts_utc = ts.tz_localize("UTC")
ts_utc

2012-03-09 09:30:00+00:00    0.167948
2012-03-10 09:30:00+00:00    0.321374
2012-03-11 09:30:00+00:00    0.603697
2012-03-12 09:30:00+00:00   -0.391657
2012-03-13 09:30:00+00:00   -1.030945
2012-03-14 09:30:00+00:00    1.886537
Freq: D, dtype: float64

In [69]:
ts_utc.index

DatetimeIndex(['2012-03-09 09:30:00+00:00', '2012-03-10 09:30:00+00:00',
               '2012-03-11 09:30:00+00:00', '2012-03-12 09:30:00+00:00',
               '2012-03-13 09:30:00+00:00', '2012-03-14 09:30:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='D')

In [70]:
# Once the time series has one time zone, we can give it a different one
ts_utc.tz_convert("America/New_York")

2012-03-09 04:30:00-05:00    0.167948
2012-03-10 04:30:00-05:00    0.321374
2012-03-11 05:30:00-04:00    0.603697
2012-03-12 05:30:00-04:00   -0.391657
2012-03-13 05:30:00-04:00   -1.030945
2012-03-14 05:30:00-04:00    1.886537
Freq: D, dtype: float64

In [71]:
ts_eastern = ts_utc.tz_convert("America/New_York")
ts_eastern.tz_convert("Europe/Berlin")

2012-03-09 10:30:00+01:00    0.167948
2012-03-10 10:30:00+01:00    0.321374
2012-03-11 10:30:00+01:00    0.603697
2012-03-12 10:30:00+01:00   -0.391657
2012-03-13 10:30:00+01:00   -1.030945
2012-03-14 10:30:00+01:00    1.886537
Freq: D, dtype: float64

Operations with Time Zone-Aware Timestamp Objects

In [72]:
# Timestamp objects can also be localized and converted
stamp = pd.Timestamp("2011-03-12 04:00")
stamp_utc = stamp.tz_localize("utc")
stamp_utc.tz_convert("America/New_York")

Timestamp('2011-03-11 23:00:00-0500', tz='America/New_York')

In [74]:
# Can pass a time zone when creating the object
stamp_moscow = pd.Timestamp("2011-03-12 04:00", tz="Europe/Moscow")
stamp_moscow

Timestamp('2011-03-12 04:00:00+0300', tz='Europe/Moscow')

In [75]:
# FYI - Timestamp store the times as nanoseconds since Unix epoch
stamp_utc.value

1299902400000000000

In [76]:
stamp_utc.tz_convert("America/New_York").value

1299902400000000000

Operations Between Different Time Zones

In [77]:
# Combining time series with different time zones converts them to UTC
dates = pd.date_range("2012-03-07 09:30", periods=10, freq="B")
ts = pd.Series(np.random.standard_normal(len(dates)), index=dates)
ts

2012-03-07 09:30:00    0.760348
2012-03-08 09:30:00   -1.369373
2012-03-09 09:30:00    0.469974
2012-03-12 09:30:00   -0.360943
2012-03-13 09:30:00   -0.655090
2012-03-14 09:30:00   -0.604645
2012-03-15 09:30:00   -1.945762
2012-03-16 09:30:00   -1.307053
2012-03-19 09:30:00   -0.792652
2012-03-20 09:30:00   -0.672313
Freq: B, dtype: float64

In [78]:
ts1 = ts[:7].tz_localize("Europe/London")
ts2 = ts1[2:].tz_convert("Europe/Moscow")

result = ts1 + ts2
result.index

DatetimeIndex(['2012-03-07 09:30:00+00:00', '2012-03-08 09:30:00+00:00',
               '2012-03-09 09:30:00+00:00', '2012-03-12 09:30:00+00:00',
               '2012-03-13 09:30:00+00:00', '2012-03-14 09:30:00+00:00',
               '2012-03-15 09:30:00+00:00'],
              dtype='datetime64[ns, UTC]', freq=None)