In [1]:
import pandas as pd
import numpy as np
from datetime import datetime

In [2]:
dates = [datetime(2011, 1, 2), datetime(2011, 1, 5),
          datetime(2011, 1, 7), datetime(2011, 1, 8),
          datetime(2011, 1, 10), datetime(2011, 1, 12)]
dates

[datetime.datetime(2011, 1, 2, 0, 0),
 datetime.datetime(2011, 1, 5, 0, 0),
 datetime.datetime(2011, 1, 7, 0, 0),
 datetime.datetime(2011, 1, 8, 0, 0),
 datetime.datetime(2011, 1, 10, 0, 0),
 datetime.datetime(2011, 1, 12, 0, 0)]

In [3]:
ts = pd.Series(np.random.randn(6), index=dates)
ts

2011-01-02   -0.636153
2011-01-05   -2.300329
2011-01-07    0.055867
2011-01-08   -0.313571
2011-01-10   -0.682626
2011-01-12   -1.894600
dtype: float64

In [4]:
ts.index

DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-07', '2011-01-08',
               '2011-01-10', '2011-01-12'],
              dtype='datetime64[ns]', freq=None)

In [5]:
# like other series, arithmetic operations between differntly indexed time series automatically align on the dates:
ts + ts[::2]
# Recall that  ts[::2]  selects every second element in  ts .

2011-01-02   -1.272306
2011-01-05         NaN
2011-01-07    0.111735
2011-01-08         NaN
2011-01-10   -1.365253
2011-01-12         NaN
dtype: float64

In [6]:
ts.index.dtype

dtype('<M8[ns]')

In [7]:
stamp = ts.index[0]
stamp

Timestamp('2011-01-02 00:00:00')

### Indexing, Selection, Subsetting

In [8]:
ts

2011-01-02   -0.636153
2011-01-05   -2.300329
2011-01-07    0.055867
2011-01-08   -0.313571
2011-01-10   -0.682626
2011-01-12   -1.894600
dtype: float64

In [9]:
stamp = ts.index[2]
ts[stamp]

0.05586749540857904

In [10]:
# you can also pass a string that is interpretable as a date:
ts['1/10/2011']

-0.682626329896845

In [11]:
ts['20110110']

-0.682626329896845

In [12]:
longer_ts = pd.Series(np.random.randn(1000),
                       index=pd.date_range('1/1/2000', periods=1000))
longer_ts.head()

2000-01-01    1.873378
2000-01-02   -0.641806
2000-01-03    2.980709
2000-01-04    0.719790
2000-01-05   -0.845008
Freq: D, dtype: float64

In [13]:
longer_ts['2001']

2001-01-01    0.818273
2001-01-02    2.331313
2001-01-03   -0.058021
2001-01-04   -0.367350
2001-01-05    0.341210
2001-01-06    0.879576
2001-01-07    0.961913
2001-01-08    0.552947
2001-01-09   -0.386834
2001-01-10   -0.377964
2001-01-11    0.457782
2001-01-12    1.063694
2001-01-13    0.668055
2001-01-14   -0.234558
2001-01-15    0.828045
2001-01-16   -0.515579
2001-01-17    0.085830
2001-01-18   -2.833469
2001-01-19    0.664051
2001-01-20   -1.194400
2001-01-21   -0.155312
2001-01-22    0.371744
2001-01-23    0.452660
2001-01-24    0.986963
2001-01-25    1.612715
2001-01-26   -1.304497
2001-01-27   -1.717970
2001-01-28    2.006967
2001-01-29   -1.285353
2001-01-30   -1.412324
                ...   
2001-12-02    0.744375
2001-12-03   -0.173689
2001-12-04   -0.320782
2001-12-05   -0.178408
2001-12-06   -0.955729
2001-12-07   -0.214993
2001-12-08    0.698850
2001-12-09   -0.936737
2001-12-10    1.524911
2001-12-11    0.470271
2001-12-12   -0.076322
2001-12-13   -0.775157
2001-12-14 

In [14]:
# this also works if you specify the month
longer_ts['2001-05']

2001-05-01    0.663486
2001-05-02    0.012270
2001-05-03   -0.895837
2001-05-04   -2.027207
2001-05-05    1.548834
2001-05-06    1.480829
2001-05-07   -0.166327
2001-05-08   -0.425043
2001-05-09    0.241040
2001-05-10    0.612667
2001-05-11    0.165151
2001-05-12    0.273764
2001-05-13    0.101350
2001-05-14   -1.428506
2001-05-15   -0.960387
2001-05-16   -0.620314
2001-05-17   -0.202825
2001-05-18    0.644530
2001-05-19    1.224208
2001-05-20    1.673453
2001-05-21    1.837587
2001-05-22    0.103364
2001-05-23   -0.895163
2001-05-24    0.136033
2001-05-25   -0.340770
2001-05-26   -1.248025
2001-05-27   -1.056746
2001-05-28    1.140258
2001-05-29    0.817164
2001-05-30    0.758041
2001-05-31   -3.088379
Freq: D, dtype: float64

In [15]:
# slicing with datetime objects works as well
ts[datetime(2011, 1, 7):]

2011-01-07    0.055867
2011-01-08   -0.313571
2011-01-10   -0.682626
2011-01-12   -1.894600
dtype: float64

In [16]:
# range query
ts

2011-01-02   -0.636153
2011-01-05   -2.300329
2011-01-07    0.055867
2011-01-08   -0.313571
2011-01-10   -0.682626
2011-01-12   -1.894600
dtype: float64

In [17]:
ts['1/6/2011':'1/11/2011']

2011-01-07    0.055867
2011-01-08   -0.313571
2011-01-10   -0.682626
dtype: float64

In [18]:
# slice a Series between two dates:
ts.truncate(after = '1/9/2011')

2011-01-02   -0.636153
2011-01-05   -2.300329
2011-01-07    0.055867
2011-01-08   -0.313571
dtype: float64

In [19]:
# indexing on DateFrame's rows
dates = pd.date_range('1/1/2000', periods=100, freq='W-WED')
long_df = pd.DataFrame(np.random.randn(100, 4),
                        index=dates,
                        columns=['Colorado', 'Texas',
                                 'New York', 'Ohio'])
long_df.head()

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,-0.254711,-1.421973,1.589101,0.484182
2000-01-12,0.46589,-0.181969,0.920234,0.985649
2000-01-19,-0.187776,0.00972,-1.23237,-0.593439
2000-01-26,0.477443,-0.140943,1.27929,-0.14567
2000-02-02,-0.751637,0.763858,-0.122931,0.237598


In [20]:
long_df.loc['5-2001']

Unnamed: 0,Colorado,Texas,New York,Ohio
2001-05-02,0.386389,-1.047686,0.567859,-1.141266
2001-05-09,-0.553261,0.43409,1.307415,-0.642575
2001-05-16,-1.076004,0.163651,-0.067564,-0.832595
2001-05-23,0.249052,-1.996883,0.592398,-1.28528
2001-05-30,1.177293,-0.101726,1.073611,-0.24244


### Time Series with Duplicate Indices

In [21]:
dates = pd.DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000',
                           '1/2/2000', '1/3/2000'])

In [22]:
dup_ts = pd.Series(np.arange(5), index=dates)
dup_ts

2000-01-01    0
2000-01-02    1
2000-01-02    2
2000-01-02    3
2000-01-03    4
dtype: int32

In [23]:
# check the index is not unique by checking its is_unique property
dup_ts.index.is_unique

False

In [24]:
dup_ts['1/3/2000']  # not duplicated

4

In [25]:
dup_ts['1/2/2000']  # duplicated

2000-01-02    1
2000-01-02    2
2000-01-02    3
dtype: int32

In [26]:
# suppose you want to aggregate the data having non-unique timestamps.
# one way to do this is to use groupby and pass level=0
grouped = dup_ts.groupby(level=0)
grouped.mean()

2000-01-01    0
2000-01-02    2
2000-01-03    4
dtype: int32

In [27]:
grouped.count()

2000-01-01    1
2000-01-02    3
2000-01-03    1
dtype: int64