# Time Series

In [None]:
import numpy as np
import pandas as pd
np.random.seed(12345)
import matplotlib.pyplot as plt
plt.rc('figure', figsize=(10, 6))
#PREVIOUS_MAX_ROWS = pd.options.display.max_rows
pd.options.display.max_rows = 20
np.set_printoptions(precision=4, suppress=True)

## Date and Time Data Types and Tools

In [None]:
from datetime import datetime
now = datetime.now()
now

In [None]:
now.year, now.month, now.day

In [None]:
delta = datetime(2021, 1, 7) - datetime(2018, 6, 24, 8, 15)
delta
#delta.days
#delta.seconds

In [None]:
from datetime import timedelta
start = datetime(2021, 1, 7)
#start + timedelta(days=12)
#start - 2 * timedelta(days=12)  #go 24 days in the past of start date

### Converting Between String and Datetime

In [None]:
#date to string conversion
stamp = datetime(2021, 1, 3)
str(stamp)
stamp.strftime('%Y-%m-%d')

In [None]:
#string to date conversion
value = '2021-01-03'   
datetime.strptime(value, '%Y-%m-%d')

In [None]:
datestrs = ['7/6/2021', '8/6/2021']
[datetime.strptime(x, '%m/%d/%Y') for x in datestrs]

In [None]:
from dateutil.parser import parse
parse('2021-01-03')

In [None]:
parse('Jan 31, 1997 10:45 PM')

In [None]:
parse('6/12/2021', dayfirst=True)

In [None]:
#pandas can also do conversions to datetime objects
datestrs = ['2021-07-06 12:00:00', '2021-08-06 00:00:00']
pd.to_datetime(datestrs)

In [None]:
idx = pd.to_datetime(datestrs + [None])  #example using a missing date
idx
#idx[2]
#pd.isnull(idx)

## Time Series Basics

In [None]:
from datetime import datetime
dates = [datetime(2021, 1, 2), datetime(2021, 1, 5),
         datetime(2021, 1, 7), datetime(2021, 1, 8),
         datetime(2021, 1, 10), datetime(2021, 1, 12)]
ts = pd.Series(np.random.randn(6), index=dates)
ts

In [None]:
ts.index

In [None]:
stamp = ts.index[0]
stamp

### Indexing, Selection, Subsetting

In [None]:
stamp = ts.index[2]
ts[stamp]

In [None]:
ts['1/10/2021']  #we can quiery the data using some of the most common date formats and pandas will try to parse it correctly
#ts['20210110']

In [None]:
longer_ts = pd.Series(np.random.randn(1000),
                      index=pd.date_range('1/1/2000', periods=1000))
longer_ts
#longer_ts['2001']

In [None]:
longer_ts['2001-05']

In [None]:
#going back to the small dataset
ts  

In [None]:
ts[datetime(2021, 1, 7):]   #taking a slice

In [None]:
ts['1/6/2021':'1/11/2021']  #slicing even though the timestamps are not in the dataset

In [None]:
ts.truncate(after='1/9/2021')

In [None]:
dates = pd.date_range('1/1/2000', periods=100, freq='W-WED')
long_df = pd.DataFrame(np.random.randn(100, 4),
                       index=dates,
                       columns=['Colorado', 'Texas',
                                'New York', 'Ohio'])
long_df

In [None]:
long_df.loc['5-2001']
#long_df.loc['2001-06']

### Time Series with Duplicate Indices

In [None]:
dates = pd.DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000',
                          '1/2/2000', '1/3/2000'])
dup_ts = pd.Series(np.arange(5), index=dates)
dup_ts

In [None]:
dup_ts.index.is_unique   #quick check to see if there are duplicate timestamps in the index

In [None]:
dup_ts['1/3/2000']  # not duplicated
#dup_ts['1/2/2000']  # duplicated

In [None]:
grouped = dup_ts.groupby(level=0)
grouped.mean()
#grouped.count()

## Date Ranges, Frequencies, and Shifting

### Generating Date Ranges

In [None]:
index = pd.date_range('2012-04-01', '2012-06-01')
index

In [None]:
pd.date_range(start='2012-04-01', periods=20)
#pd.date_range(end='2012-06-01', periods=20)

In [None]:
pd.date_range('2000-01-01', '2000-12-01', freq='M') 

In [None]:
pd.date_range('2012-05-02 12:56:31', periods=5)

In [None]:
pd.date_range('2012-05-02 12:56:31', periods=5, normalize=True)

## Time Zone Handling

### Time Zone Localization and Conversion

In [None]:
rng = pd.date_range('3/9/2012 9:30', periods=6, freq='D')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts

In [None]:
print(ts.index.tz)  #find the timezone for the index

In [None]:
pd.date_range('3/9/2012 9:30', periods=10, freq='D', tz='UTC')  #explicitly mentioning the timezone

In [None]:
ts

In [None]:
ts_utc = ts.tz_localize('UTC')  #localize index to UTC
ts_utc

In [None]:
ts_utc.index

In [None]:
ts_utc.tz_convert('America/New_York')

In [None]:
ts_eastern = ts.tz_localize('America/New_York')
ts_eastern.tz_convert('UTC')
#ts_eastern.tz_convert('Europe/Berlin')

## Resampling and Frequency Conversion

In [None]:
rng = pd.date_range('2000-01-01', periods=100, freq='D')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts

In [None]:
ts.resample('M').mean()


In [None]:
ts.resample('M', kind='period').mean()  #a bit better - each month is treated as a "period" of time

### Downsampling

In [None]:
rng = pd.date_range('2000-01-01', periods=12, freq='T')
ts = pd.Series(np.arange(12), index=rng)
ts

In [None]:
ts.resample('5min', closed='left').sum()

In [None]:
ts.resample('5min', closed='right').sum()

In [None]:
ts.resample('5min', closed='right', label='right').sum()

### Upsampling and Interpolation

In [None]:
frame = pd.DataFrame(np.random.randn(2, 4),
                     index=pd.date_range('1/1/2000', periods=2,
                                         freq='W-WED'),
                     columns=['Colorado', 'Texas', 'New York', 'Ohio'])
frame

In [None]:
df_daily = frame.resample('D').asfreq()
df_daily

In [None]:
frame.resample('D').ffill()

In [None]:
frame.resample('D').ffill(limit=2)

## Moving Window Functions

In [None]:
close_px_all = pd.read_csv('stock_px_2.csv',parse_dates=True, index_col=0)
close_px_all

In [None]:
close_px = close_px_all[['AAPL', 'MSFT', 'XOM']]
close_px


In [None]:
close_px = close_px.resample('B').ffill()  #resample to business day
close_px

In [None]:
close_px.AAPL.plot()  
close_px.AAPL.rolling(100).mean().plot()  #using the "rolling" operator. Grouping over a 100 day sliding window

In [None]:
appl_std250 = close_px.AAPL.rolling(250, min_periods=10).std()
#appl_std250 = close_px.AAPL.rolling(250).std()
appl_std250.plot()

In [None]:
close_px.rolling(60).mean().plot(logy=True)  #applying the rolling window to all columns, plot has a logarithmic y axis