# Pandas Timeseries 

In [44]:
from datetime import datetime
import pandas as pd
import numpy as np

In [2]:
now = datetime.now()
now

datetime.datetime(2020, 5, 15, 19, 26, 50, 172998)

In [3]:
now.year, now.month, now.day

(2020, 5, 15)

In [126]:
# Time difference

delta = datetime(1988, 6, 12) - datetime(2020, 5, 15)
delta

datetime.timedelta(days=-11660)

### Timedelta

In [9]:
from datetime import timedelta

In [10]:
timedelta(12)

datetime.timedelta(days=12)

### Converting between string and datetime

In [18]:
stamp = datetime(2012, 1, 3)
stamp

datetime.datetime(2012, 1, 3, 0, 0)

In [19]:
# Converting Date to string
str(stamp)


'2012-01-03 00:00:00'

In [20]:
# Converting  datetime to string

stamp.strftime('%Y-%m-%d')

'2012-01-03'

In [21]:
# Converting string to datetime

value = '2012-01-03'

In [22]:
datetime.strptime(value, '%Y-%m-%d')

datetime.datetime(2012, 1, 3, 0, 0)

In [25]:
datestrs = ['7/6/2011', '8/6/2011']
[datetime.strptime(value, '%d/%m/%Y') for value in datestrs ]

[datetime.datetime(2011, 6, 7, 0, 0), datetime.datetime(2011, 6, 8, 0, 0)]

### Dateutil Library

In [26]:
from dateutil.parser import parse

In [29]:
parse('2015-12-23')

datetime.datetime(2015, 12, 23, 0, 0)

In [30]:
parse('Jan 31, 1997 10:45 PM')

datetime.datetime(1997, 1, 31, 22, 45)

In [31]:
parse('6/12/2011', dayfirst=True)

datetime.datetime(2011, 12, 6, 0, 0)

dateutil.parser is a useful, but not perfect tool. Notably, it will recognize
some strings as dates that you might prefer that it didn’t, like
'42' will be parsed as the year 2042 with today’s calendar date.

### Pandas

In [38]:
datestrs = ['7/6/2011', '8/6/2011']
pd.to_datetime(datestrs)

DatetimeIndex(['2011-07-06', '2011-08-06'], dtype='datetime64[ns]', freq=None)

#### Null Empty String

In [39]:
idx = pd.to_datetime(datestrs + [None])
idx

DatetimeIndex(['2011-07-06', '2011-08-06', 'NaT'], dtype='datetime64[ns]', freq=None)

In [40]:
idx[2]

NaT

### Time Series Basics

In [41]:
from datetime import datetime

In [42]:
dates = [datetime(2011, 1, 2), datetime(2011, 1, 5), datetime(2011, 1, 7),
                  datetime(2011, 1, 8), datetime(2011, 1, 10), datetime(2011, 1, 12)]

ts = pd.Series(np.random.randn(6), index=dates)
ts

In [46]:
type(ts)

pandas.core.series.Series

In [47]:
ts.index

DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-07', '2011-01-08',
               '2011-01-10', '2011-01-12'],
              dtype='datetime64[ns]', freq=None)

In [50]:
ts.index.dtype

dtype('<M8[ns]')

### Indexing Selection and Subsetting

In [55]:
longer_ts = pd.Series(np.random.randn(1000),
                    index=pd.date_range('1/1/2000', periods=1000))    # Timeseries for 1000 days
longer_ts.head()

2000-01-01    1.502861
2000-01-02    0.861699
2000-01-03   -0.640865
2000-01-04    1.623017
2000-01-05   -0.030240
Freq: D, dtype: float64

In [56]:
longer_ts.tail()

2002-09-22    1.137652
2002-09-23    1.303313
2002-09-24   -0.300747
2002-09-25    0.746180
2002-09-26   -0.940266
Freq: D, dtype: float64

In [61]:
longer_ts['2001']

2001-01-01    0.014068
2001-01-02   -0.771940
2001-01-03    1.924551
2001-01-04   -0.995645
2001-01-05   -0.523183
2001-01-06    1.743793
2001-01-07   -0.040007
2001-01-08    0.496086
2001-01-09   -0.080951
2001-01-10    0.253613
2001-01-11    1.198349
2001-01-12   -0.219355
2001-01-13    1.419738
2001-01-14    0.087351
2001-01-15    0.659277
2001-01-16   -0.851941
2001-01-17    1.530257
2001-01-18    0.704709
2001-01-19   -0.078616
2001-01-20   -0.292326
2001-01-21    0.094934
2001-01-22   -0.953760
2001-01-23   -0.201170
2001-01-24   -0.914143
2001-01-25    1.710248
2001-01-26    1.548417
2001-01-27   -0.538389
2001-01-28    0.135750
2001-01-29    1.210342
2001-01-30   -2.156647
                ...   
2001-12-02   -2.065144
2001-12-03   -1.475150
2001-12-04   -0.002220
2001-12-05   -1.343527
2001-12-06   -0.120225
2001-12-07    0.263111
2001-12-08    0.219517
2001-12-09   -0.253055
2001-12-10   -0.737699
2001-12-11    1.038662
2001-12-12   -2.139407
2001-12-13   -0.060184
2001-12-14 

In [62]:
longer_ts['2001-05']

2001-05-01   -0.218149
2001-05-02    0.550300
2001-05-03    1.703142
2001-05-04   -0.122861
2001-05-05    0.061349
2001-05-06   -1.452990
2001-05-07   -1.380338
2001-05-08   -0.078140
2001-05-09    0.452233
2001-05-10   -0.219463
2001-05-11   -2.135583
2001-05-12    0.086609
2001-05-13    0.487116
2001-05-14   -0.537233
2001-05-15    1.670814
2001-05-16   -0.390042
2001-05-17   -0.682217
2001-05-18   -0.245536
2001-05-19    0.473359
2001-05-20    1.361060
2001-05-21   -0.910622
2001-05-22   -0.840339
2001-05-23   -1.287164
2001-05-24   -0.705591
2001-05-25    1.512404
2001-05-26   -1.537194
2001-05-27   -3.606412
2001-05-28    0.673170
2001-05-29    0.446054
2001-05-30    0.510814
2001-05-31   -0.417194
Freq: D, dtype: float64

In [84]:
longer_ts['2001-06-14': '2001-06-30' ]

2001-06-14    0.390164
2001-06-15    0.519456
2001-06-16   -0.924120
2001-06-17   -0.277019
2001-06-18    1.030446
2001-06-19   -0.542581
2001-06-20   -0.344086
2001-06-21   -1.318941
2001-06-22   -0.098607
2001-06-23    0.546155
2001-06-24   -0.892365
2001-06-25    0.294988
2001-06-26   -1.429935
2001-06-27    2.326353
2001-06-28   -0.657449
2001-06-29   -0.806903
2001-06-30   -0.641330
Freq: D, dtype: float64

### Working with DataFrame

In [88]:
dates = pd.date_range('1/1/2000', periods=100, freq='W-WED')

long_df = pd.DataFrame(np.random.randn(100, 4),
                    index=dates,
                    columns=['Colorado', 'Texas', 'New York', 'Ohio'])
long_df

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,-0.249666,0.396124,1.369759,0.961405
2000-01-12,0.645390,-0.249375,-1.070806,1.601788
2000-01-19,0.039035,0.337726,-1.434774,0.265199
2000-01-26,0.103588,-0.163668,0.611976,1.409763
2000-02-02,1.277341,0.801776,-0.091427,0.251126
2000-02-09,0.692484,0.142783,-0.159717,-1.846478
2000-02-16,1.629656,-0.200355,-0.778043,-0.001977
2000-02-23,1.533660,2.303186,1.750536,1.013524
2000-03-01,0.647928,-0.110431,-0.299634,-0.056911
2000-03-08,-0.872634,-1.780196,-0.194423,-0.346323


### Date Ranges, Frequencies, and Shifting

In [98]:
dates = [datetime(2011, 1, 2), datetime(2011, 1, 5), datetime(2011, 1, 7),
                  datetime(2011, 1, 8), datetime(2011, 1, 10), datetime(2011, 1, 12)]

ts = pd.Series(np.random.randn(6), index=dates)
ts

2011-01-02    2.006443
2011-01-05   -0.929721
2011-01-07   -0.569228
2011-01-08   -0.057928
2011-01-10    0.092588
2011-01-12    0.183275
dtype: float64

### Generating Date Ranges

In [104]:
index = pd.date_range('4/1/2012', '6/1/2012')
index

DatetimeIndex(['2012-04-01', '2012-04-02', '2012-04-03', '2012-04-04',
               '2012-04-05', '2012-04-06', '2012-04-07', '2012-04-08',
               '2012-04-09', '2012-04-10', '2012-04-11', '2012-04-12',
               '2012-04-13', '2012-04-14', '2012-04-15', '2012-04-16',
               '2012-04-17', '2012-04-18', '2012-04-19', '2012-04-20',
               '2012-04-21', '2012-04-22', '2012-04-23', '2012-04-24',
               '2012-04-25', '2012-04-26', '2012-04-27', '2012-04-28',
               '2012-04-29', '2012-04-30', '2012-05-01', '2012-05-02',
               '2012-05-03', '2012-05-04', '2012-05-05', '2012-05-06',
               '2012-05-07', '2012-05-08', '2012-05-09', '2012-05-10',
               '2012-05-11', '2012-05-12', '2012-05-13', '2012-05-14',
               '2012-05-15', '2012-05-16', '2012-05-17', '2012-05-18',
               '2012-05-19', '2012-05-20', '2012-05-21', '2012-05-22',
               '2012-05-23', '2012-05-24', '2012-05-25', '2012-05-26',
      

In [105]:
pd.date_range(start='4/1/2012', periods=20)

DatetimeIndex(['2012-04-01', '2012-04-02', '2012-04-03', '2012-04-04',
               '2012-04-05', '2012-04-06', '2012-04-07', '2012-04-08',
               '2012-04-09', '2012-04-10', '2012-04-11', '2012-04-12',
               '2012-04-13', '2012-04-14', '2012-04-15', '2012-04-16',
               '2012-04-17', '2012-04-18', '2012-04-19', '2012-04-20'],
              dtype='datetime64[ns]', freq='D')

In [106]:
pd.date_range(end='6/1/2012', periods=20)

DatetimeIndex(['2012-05-13', '2012-05-14', '2012-05-15', '2012-05-16',
               '2012-05-17', '2012-05-18', '2012-05-19', '2012-05-20',
               '2012-05-21', '2012-05-22', '2012-05-23', '2012-05-24',
               '2012-05-25', '2012-05-26', '2012-05-27', '2012-05-28',
               '2012-05-29', '2012-05-30', '2012-05-31', '2012-06-01'],
              dtype='datetime64[ns]', freq='D')

In [107]:
pd.date_range('1/1/2000', '12/1/2000', freq='BM')

DatetimeIndex(['2000-01-31', '2000-02-29', '2000-03-31', '2000-04-28',
               '2000-05-31', '2000-06-30', '2000-07-31', '2000-08-31',
               '2000-09-29', '2000-10-31', '2000-11-30'],
              dtype='datetime64[ns]', freq='BM')

In [108]:
rng = pd.date_range('1/1/2000', periods=100, freq='D')
rng

DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03', '2000-01-04',
               '2000-01-05', '2000-01-06', '2000-01-07', '2000-01-08',
               '2000-01-09', '2000-01-10', '2000-01-11', '2000-01-12',
               '2000-01-13', '2000-01-14', '2000-01-15', '2000-01-16',
               '2000-01-17', '2000-01-18', '2000-01-19', '2000-01-20',
               '2000-01-21', '2000-01-22', '2000-01-23', '2000-01-24',
               '2000-01-25', '2000-01-26', '2000-01-27', '2000-01-28',
               '2000-01-29', '2000-01-30', '2000-01-31', '2000-02-01',
               '2000-02-02', '2000-02-03', '2000-02-04', '2000-02-05',
               '2000-02-06', '2000-02-07', '2000-02-08', '2000-02-09',
               '2000-02-10', '2000-02-11', '2000-02-12', '2000-02-13',
               '2000-02-14', '2000-02-15', '2000-02-16', '2000-02-17',
               '2000-02-18', '2000-02-19', '2000-02-20', '2000-02-21',
               '2000-02-22', '2000-02-23', '2000-02-24', '2000-02-25',
      

In [110]:
rng = pd.date_range('1/1/2000', periods=36, freq='M')
rng

DatetimeIndex(['2000-01-31', '2000-02-29', '2000-03-31', '2000-04-30',
               '2000-05-31', '2000-06-30', '2000-07-31', '2000-08-31',
               '2000-09-30', '2000-10-31', '2000-11-30', '2000-12-31',
               '2001-01-31', '2001-02-28', '2001-03-31', '2001-04-30',
               '2001-05-31', '2001-06-30', '2001-07-31', '2001-08-31',
               '2001-09-30', '2001-10-31', '2001-11-30', '2001-12-31',
               '2002-01-31', '2002-02-28', '2002-03-31', '2002-04-30',
               '2002-05-31', '2002-06-30', '2002-07-31', '2002-08-31',
               '2002-09-30', '2002-10-31', '2002-11-30', '2002-12-31'],
              dtype='datetime64[ns]', freq='M')