# Time Series

In [1]:
import numpy as np
import pandas as pd

### Date and Time Data Types and Tools

In [2]:
# datetime is a built-in Python module for date and time data
from datetime import datetime

now = datetime.now()
now

datetime.datetime(2025, 3, 5, 14, 14, 59, 895235)

In [3]:
now.year, now.month, now.day

(2025, 3, 5)

In [4]:
# timedelta represents the temporal difference between two datetime objects
delta = now - datetime(1993, 8, 30)
delta

datetime.timedelta(11510, 51299, 895235)

In [5]:
delta.days

11510

In [6]:
# Adding a timedelta to a datetime object yields a new datetime object
from datetime import timedelta
start = datetime(1996, 5, 16)
start + timedelta(8417)

datetime.datetime(2019, 6, 2, 0, 0)

Converting Between String and Datetime

In [7]:
# You can format datetime objects as strings
stamp = datetime(2012, 5, 20)
str(stamp)

'2012-05-20 00:00:00'

In [8]:
stamp.strftime("%m-%d-%Y")

'05-20-2012'

In [9]:
# strptime converts strings to datetime objects
value = "2011-01-03"
datetime.strptime(value, "%Y-%m-%d")

datetime.datetime(2011, 1, 3, 0, 0)

In [10]:
# The pandas method to_datetime is able to parse many different kinds of date representations
datestrs = ["2011-07-06 12:00:00", "2011-08-06 00:01:00"]
pd.to_datetime(datestrs)

DatetimeIndex(['2011-07-06 12:00:00', '2011-08-06 00:01:00'], dtype='datetime64[ns]', freq=None)

In [11]:
# to_datetime also handles missing values
idx = pd.to_datetime(datestrs + [None])
idx

DatetimeIndex(['2011-07-06 12:00:00', '2011-08-06 00:01:00', 'NaT'], dtype='datetime64[ns]', freq=None)

In [13]:
# A basic time series object is a Series indexed by time stamps
dates = [datetime(2011, 1, 2), datetime(2011, 1, 5),
         datetime(2011, 1, 7), datetime(2011, 1, 8),
         datetime(2011, 1, 10), datetime(2011, 1, 12)]

ts = pd.Series(np.random.standard_normal(6), index=dates)
ts

2011-01-02   -0.399796
2011-01-05   -0.195137
2011-01-07    0.566215
2011-01-08   -0.567196
2011-01-10    0.660180
2011-01-12   -1.283986
dtype: float64

In [14]:
# The index for ts has been put in a DatetimeIndex
ts.index

DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-07', '2011-01-08',
               '2011-01-10', '2011-01-12'],
              dtype='datetime64[ns]', freq=None)

In [15]:
# Arithmetic operations align on the dates
ts + ts[::2]

2011-01-02   -0.799591
2011-01-05         NaN
2011-01-07    1.132429
2011-01-08         NaN
2011-01-10    1.320360
2011-01-12         NaN
dtype: float64

In [16]:
# Check data type of the time stamps
ts.index.dtype

dtype('<M8[ns]')

In [17]:
# Scalar values from the index are pandas Timestamp objects
stamp = ts.index[0]
stamp

Timestamp('2011-01-02 00:00:00')

Indexing, Selection, Subsetting

In [18]:
# Indexing and selecting work like normal
stamp = ts.index[2]
ts[stamp]

0.5662145157763183

In [19]:
# Can also pass a string
ts["2011-01-10"]

0.6601800271692849

In [20]:
# With long time series, you can select a year or a year and a month
longer_ts = pd.Series(np.random.standard_normal(1000), index=pd.date_range("2000-01-01", periods=1000))
longer_ts

2000-01-01   -0.174126
2000-01-02    0.352541
2000-01-03    0.113559
2000-01-04   -1.022098
2000-01-05    0.374102
                ...   
2002-09-22    0.028055
2002-09-23    2.372092
2002-09-24    0.941648
2002-09-25    0.826692
2002-09-26   -0.683522
Freq: D, Length: 1000, dtype: float64

In [22]:
longer_ts["2002"]

2002-01-01   -1.031735
2002-01-02    1.402316
2002-01-03    1.228141
2002-01-04    1.241446
2002-01-05    2.083028
                ...   
2002-09-22    0.028055
2002-09-23    2.372092
2002-09-24    0.941648
2002-09-25    0.826692
2002-09-26   -0.683522
Freq: D, Length: 269, dtype: float64

In [23]:
longer_ts["2002-03"]

2002-03-01   -0.499376
2002-03-02   -0.562038
2002-03-03   -1.149307
2002-03-04   -0.901481
2002-03-05   -0.483383
2002-03-06    0.475497
2002-03-07    0.249676
2002-03-08    0.169210
2002-03-09    1.570134
2002-03-10   -0.672725
2002-03-11   -2.162327
2002-03-12   -0.825350
2002-03-13   -0.510367
2002-03-14    1.491047
2002-03-15   -0.213421
2002-03-16   -1.209746
2002-03-17    0.800968
2002-03-18   -0.019403
2002-03-19   -0.825430
2002-03-20    1.123716
2002-03-21    0.054325
2002-03-22   -0.081192
2002-03-23    1.548375
2002-03-24   -0.065430
2002-03-25   -0.260355
2002-03-26   -1.683083
2002-03-27   -0.483307
2002-03-28   -0.900700
2002-03-29   -1.328814
2002-03-30    1.209376
2002-03-31   -0.644416
Freq: D, dtype: float64

In [24]:
# Can also slice with datetime objects
ts[datetime(2011,1,7):]

2011-01-07    0.566215
2011-01-08   -0.567196
2011-01-10    0.660180
2011-01-12   -1.283986
dtype: float64

In [25]:
ts[datetime(2011,1,7):datetime(2011,8,10)]

2011-01-07    0.566215
2011-01-08   -0.567196
2011-01-10    0.660180
2011-01-12   -1.283986
dtype: float64

In [26]:
# Can also slice with time stamps not in the time series
ts["2011-01-06":"2011-01-11"]

2011-01-07    0.566215
2011-01-08   -0.567196
2011-01-10    0.660180
dtype: float64

In [27]:
# truncate slices a Series between two dates
ts.truncate(after="2011-01-08")

2011-01-02   -0.399796
2011-01-05   -0.195137
2011-01-07    0.566215
2011-01-08   -0.567196
dtype: float64

In [28]:
# All of this also applies to DataFrames when acting on the rows
dates = pd.date_range("2000-01-01", periods=100, freq="W-WED")
dates

DatetimeIndex(['2000-01-05', '2000-01-12', '2000-01-19', '2000-01-26',
               '2000-02-02', '2000-02-09', '2000-02-16', '2000-02-23',
               '2000-03-01', '2000-03-08', '2000-03-15', '2000-03-22',
               '2000-03-29', '2000-04-05', '2000-04-12', '2000-04-19',
               '2000-04-26', '2000-05-03', '2000-05-10', '2000-05-17',
               '2000-05-24', '2000-05-31', '2000-06-07', '2000-06-14',
               '2000-06-21', '2000-06-28', '2000-07-05', '2000-07-12',
               '2000-07-19', '2000-07-26', '2000-08-02', '2000-08-09',
               '2000-08-16', '2000-08-23', '2000-08-30', '2000-09-06',
               '2000-09-13', '2000-09-20', '2000-09-27', '2000-10-04',
               '2000-10-11', '2000-10-18', '2000-10-25', '2000-11-01',
               '2000-11-08', '2000-11-15', '2000-11-22', '2000-11-29',
               '2000-12-06', '2000-12-13', '2000-12-20', '2000-12-27',
               '2001-01-03', '2001-01-10', '2001-01-17', '2001-01-24',
      

In [30]:
long_df = pd.DataFrame(np.random.standard_normal((100, 4)), index=dates, columns=["Colorado", "Texas", "New York", "Ohio"])
long_df

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,0.315524,-0.184355,-0.090671,-0.265871
2000-01-12,0.984907,0.922531,-0.035349,-1.321431
2000-01-19,0.292243,-0.034590,-1.246897,-1.412953
2000-01-26,-0.188758,1.524676,0.975212,1.846135
2000-02-02,0.235048,-0.117621,0.069873,0.231120
...,...,...,...,...
2001-10-31,-1.151944,-1.347425,1.448531,1.639323
2001-11-07,0.311945,0.773118,0.095852,-0.317246
2001-11-14,-2.063158,-0.198380,1.977131,-1.192748
2001-11-21,-0.197626,-0.477004,-0.303231,0.134074


In [31]:
long_df.loc["2001-06"]

Unnamed: 0,Colorado,Texas,New York,Ohio
2001-06-06,-0.722725,-1.006398,-1.987438,-0.740961
2001-06-13,-1.306838,2.791703,-0.875507,0.631654
2001-06-20,-0.076538,0.980101,0.062029,2.285252
2001-06-27,-1.181226,-1.373412,0.753736,2.683172


Time Series with Duplicate Indices

In [32]:
dates = pd.DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-02", "2000-01-02", "2000-01-03"])
dup_ts = pd.Series(np.arange(5), index=dates)
dup_ts

2000-01-01    0
2000-01-02    1
2000-01-02    2
2000-01-02    3
2000-01-03    4
dtype: int64

In [33]:
# Check if the index is unique
dup_ts.index.is_unique

False

In [34]:
# Indexing this object will either produce a scalar or a slice (if you hit a repeated index)
dup_ts["2000-01-03"]

4

In [35]:
dup_ts["2000-01-02"]

2000-01-02    1
2000-01-02    2
2000-01-02    3
dtype: int64

In [39]:
# We can aggregate the info from non-unique indices
grouped = dup_ts.groupby(level=0)
grouped.mean()

2000-01-01    0
2000-01-02    2
2000-01-03    4
dtype: int64

In [40]:
grouped.sum()

2000-01-01    0
2000-01-02    6
2000-01-03    4
dtype: int64

### Date Ranges, Frequencies, and Shifting

In [41]:
# Given a time series, we can resample to a fixed frequency.
# For instance, resample every day:
ts

2011-01-02   -0.399796
2011-01-05   -0.195137
2011-01-07    0.566215
2011-01-08   -0.567196
2011-01-10    0.660180
2011-01-12   -1.283986
dtype: float64

In [42]:
resampler = ts.resample("D")
resampler

<pandas.core.resample.DatetimeIndexResampler object at 0x7f95e43b63c8>

Generating Date Ranges

In [44]:
# The date_range function generates a DatetimeIndex with an indicated length at a particular frequency
index = pd.date_range("2012-04-01", "2012-06-01")
index

DatetimeIndex(['2012-04-01', '2012-04-02', '2012-04-03', '2012-04-04',
               '2012-04-05', '2012-04-06', '2012-04-07', '2012-04-08',
               '2012-04-09', '2012-04-10', '2012-04-11', '2012-04-12',
               '2012-04-13', '2012-04-14', '2012-04-15', '2012-04-16',
               '2012-04-17', '2012-04-18', '2012-04-19', '2012-04-20',
               '2012-04-21', '2012-04-22', '2012-04-23', '2012-04-24',
               '2012-04-25', '2012-04-26', '2012-04-27', '2012-04-28',
               '2012-04-29', '2012-04-30', '2012-05-01', '2012-05-02',
               '2012-05-03', '2012-05-04', '2012-05-05', '2012-05-06',
               '2012-05-07', '2012-05-08', '2012-05-09', '2012-05-10',
               '2012-05-11', '2012-05-12', '2012-05-13', '2012-05-14',
               '2012-05-15', '2012-05-16', '2012-05-17', '2012-05-18',
               '2012-05-19', '2012-05-20', '2012-05-21', '2012-05-22',
               '2012-05-23', '2012-05-24', '2012-05-25', '2012-05-26',
      

In [45]:
# Can also pass a start/end date and a period of time
pd.date_range(start="2012-04-01", periods=20)

DatetimeIndex(['2012-04-01', '2012-04-02', '2012-04-03', '2012-04-04',
               '2012-04-05', '2012-04-06', '2012-04-07', '2012-04-08',
               '2012-04-09', '2012-04-10', '2012-04-11', '2012-04-12',
               '2012-04-13', '2012-04-14', '2012-04-15', '2012-04-16',
               '2012-04-17', '2012-04-18', '2012-04-19', '2012-04-20'],
              dtype='datetime64[ns]', freq='D')

In [46]:
# Can also pass a custom frequency
# Here BM means business end-of-month
pd.date_range("2000-01-01", "2000-12-01", freq="BM")

DatetimeIndex(['2000-01-31', '2000-02-29', '2000-03-31', '2000-04-28',
               '2000-05-31', '2000-06-30', '2000-07-31', '2000-08-31',
               '2000-09-29', '2000-10-31', '2000-11-30'],
              dtype='datetime64[ns]', freq='BM')

In [48]:
# date_range preserves the start or end timestamp if you pass one
pd.date_range("2012-05-02 12:56:38", periods=5)

DatetimeIndex(['2012-05-02 12:56:38', '2012-05-03 12:56:38',
               '2012-05-04 12:56:38', '2012-05-05 12:56:38',
               '2012-05-06 12:56:38'],
              dtype='datetime64[ns]', freq='D')

In [49]:
# Can also normalize to midnight
pd.date_range("2012-05-02 12:56:31", periods=5, normalize=True)

DatetimeIndex(['2012-05-02', '2012-05-03', '2012-05-04', '2012-05-05',
               '2012-05-06'],
              dtype='datetime64[ns]', freq='D')