In [3]:
from datetime import datetime
import pandas as pd
import numpy as np

In [5]:
dates = [datetime(2011, 1, 2), datetime(2011, 1, 5),
         datetime(2011, 1, 7), datetime(2011, 1, 8),
         datetime(2011, 1, 10), datetime(2011, 1, 12)]
ts = pd.Series(np.random.randn(6), index=dates)
ts

2011-01-02   -1.801201
2011-01-05    0.861576
2011-01-07    0.083232
2011-01-08   -0.913351
2011-01-10    1.390370
2011-01-12   -0.677025
dtype: float64

Under the hood, these datetime objects have been put in a DatetimeIndex:

In [6]:
ts.index

DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-07', '2011-01-08',
               '2011-01-10', '2011-01-12'],
              dtype='datetime64[ns]', freq=None)

Like other Series, arithmetic operations between differently indexed time series auto‐
matically align on the dates:

In [7]:
ts + ts[::2]
# Recall that ts[::2] selects every second element in ts.

2011-01-02   -3.602402
2011-01-05         NaN
2011-01-07    0.166464
2011-01-08         NaN
2011-01-10    2.780740
2011-01-12         NaN
dtype: float64

In [8]:
# pandas stores timestamps using NumPy’s datetime64 data type at the nanosecond resolution:
ts.index.dtype

dtype('<M8[ns]')

In [9]:
# Scalar values from a DatetimeIndex are pandas Timestamp objects:
stamp = ts.index[0]
stamp

# A Timestamp can be substituted anywhere you would use a datetime object. 
# Additionally, it can store frequency information (if any) and understands 
# how to do time zone conversions and other kinds of manipulations. 
# More on both of these things later.

Timestamp('2011-01-02 00:00:00')

### Indexing, Selection, Subsetting

In [11]:
stamp = ts.index[2]
ts[stamp]

0.08323194973278498

In [12]:
ts['1/10/2011']

1.3903699612245497

In [13]:
ts['20110110']

1.3903699612245497

For longer time series, a year or only a year and month can be passed to easily select
slices of data:

In [14]:
longer_ts = pd.Series(np.random.randn(1000),
                      index=pd.date_range('1/1/2000', periods=1000))
longer_ts

2000-01-01   -0.341243
2000-01-02   -1.076505
2000-01-03    0.062173
2000-01-04    0.347057
2000-01-05    0.513411
                ...   
2002-09-22   -0.889498
2002-09-23   -0.640423
2002-09-24    0.405388
2002-09-25   -0.094983
2002-09-26   -0.053961
Freq: D, Length: 1000, dtype: float64

In [15]:
longer_ts['2001']
# Here, the string '2001' is interpreted as a year and selects that time period.

2001-01-01   -0.603363
2001-01-02   -0.432898
2001-01-03   -0.020367
2001-01-04   -0.814956
2001-01-05    0.036789
                ...   
2001-12-27    1.057444
2001-12-28    0.557453
2001-12-29   -0.554600
2001-12-30   -0.611728
2001-12-31    1.806160
Freq: D, Length: 365, dtype: float64

In [16]:
# This also works if you specify the month:
longer_ts['2001-05']

2001-05-01   -0.048873
2001-05-02    0.438102
2001-05-03   -1.549626
2001-05-04   -0.278033
2001-05-05   -0.913094
2001-05-06    1.280267
2001-05-07    0.685106
2001-05-08   -0.638284
2001-05-09   -1.512042
2001-05-10    0.454906
2001-05-11    0.481980
2001-05-12   -0.045652
2001-05-13    1.554120
2001-05-14   -0.012909
2001-05-15   -1.760707
2001-05-16   -0.776283
2001-05-17    1.575077
2001-05-18    1.097042
2001-05-19    0.847960
2001-05-20    0.561346
2001-05-21   -0.381041
2001-05-22   -1.686205
2001-05-23    0.524084
2001-05-24    2.117144
2001-05-25   -0.988429
2001-05-26    1.442961
2001-05-27    0.804714
2001-05-28   -0.204427
2001-05-29   -0.163918
2001-05-30    0.251735
2001-05-31   -0.006198
Freq: D, dtype: float64

In [17]:
# Slicing with datetime objects works as well:
ts[datetime(2011, 1, 7):]

2011-01-07    0.083232
2011-01-08   -0.913351
2011-01-10    1.390370
2011-01-12   -0.677025
dtype: float64

Because most time series data is ordered chronologically, you can slice with time‐
stamps not contained in a time series to perform a range query:

In [18]:
ts

2011-01-02   -1.801201
2011-01-05    0.861576
2011-01-07    0.083232
2011-01-08   -0.913351
2011-01-10    1.390370
2011-01-12   -0.677025
dtype: float64

In [19]:
ts['1/6/2011':'1/11/2011']

2011-01-07    0.083232
2011-01-08   -0.913351
2011-01-10    1.390370
dtype: float64

As before, you can pass either a string date, datetime, or timestamp. Remember that
slicing in this manner produces views on the source time series like slicing NumPy
arrays. This means that no data is copied and modifications on the slice will be reflec‐
ted in the original data.

There is an equivalent instance method, truncate, that slices a Series between two
dates:

In [20]:
ts.truncate(after='1/9/2011')

2011-01-02   -1.801201
2011-01-05    0.861576
2011-01-07    0.083232
2011-01-08   -0.913351
dtype: float64

In [21]:
# All of this holds true for DataFrame as well, indexing on its rows:
dates = pd.date_range('1/1/2000', periods=100, freq='W-WED')
long_df = pd.DataFrame(np.random.randn(100, 4),
                       index=dates,
                       columns=['Colorado', 'Texas','New York', 'Ohio'])
long_df.loc['5-2001']

Unnamed: 0,Colorado,Texas,New York,Ohio
2001-05-02,0.389302,-0.390235,-0.026952,0.14671
2001-05-09,-0.584412,1.744793,0.81987,-0.314243
2001-05-16,0.701275,-1.198219,-0.556533,0.261219
2001-05-23,1.418391,1.66491,-1.186649,-0.180119
2001-05-30,0.512927,0.300758,0.929713,-0.324876


### Time Series with Duplicate Indices

In [22]:
dates = pd.DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000','1/2/2000', '1/3/2000'])
dup_ts = pd.Series(np.arange(5), index=dates)
dup_ts

2000-01-01    0
2000-01-02    1
2000-01-02    2
2000-01-02    3
2000-01-03    4
dtype: int32

In [23]:
# We can tell that the index is not unique by checking its is_unique property:
dup_ts.index.is_unique

False

Indexing into this time series will now either produce scalar values or slices depend‐
ing on whether a timestamp is duplicated:

In [24]:
dup_ts['1/3/2000'] # not duplicated

4

In [25]:
dup_ts['1/2/2000'] # duplicated

2000-01-02    1
2000-01-02    2
2000-01-02    3
dtype: int32

Suppose you wanted to aggregate the data having non-unique timestamps. One way
to do this is to use groupby and pass level=0:

In [26]:
grouped = dup_ts.groupby(level=0)
grouped.mean()

2000-01-01    0.0
2000-01-02    2.0
2000-01-03    4.0
dtype: float64

In [27]:
grouped.count()

2000-01-01    1
2000-01-02    3
2000-01-03    1
dtype: int64