In [1]:
import numpy as np
import pandas as pd
from datetime import datetime
from datetime import timedelta

pd.options.display.max_rows=10
pd.options.display.precision=4

# Date and Time Data Types and Tools

### Intro

In [None]:
from datetime import datetime
from datetime import timedelta

In [None]:
now = datetime.now()
now

In [None]:
now.year, now.month, now.day

In [None]:
now.hour, now.minute, now.second

In [None]:
datetime.now().hour, datetime.now().minute, datetime.now().second

In [None]:
delta = datetime(2011, 1, 7) - datetime(2008, 6, 24, 8, 15)
delta

In [None]:
delta.days, delta.seconds, delta.microseconds, delta.total_seconds()

In [None]:
datetime(2011, 1, 7)

In [None]:
print(now, '\n', delta, '\n', now + delta, ' // ', now - delta)

In [None]:
timedelta = datetime(2011, 1, 7) - datetime(2008, 6, 24, 8, 15)
timedelta

In [None]:
start = datetime(2011, 1, 7)

In [None]:
timedelta(12,3,2)

In [None]:
start + timedelta(12,3,2)

### Converting Between String and Datetime

In [None]:
stamp = datetime(2011, 1, 3)
stamp, str(stamp), stamp.strftime('%Y-%m-%d'), stamp.strftime('%Y-%m-%d %H:%M:%S')

In [None]:
value = '2011-01-03'
value, type(value)

In [None]:
datetime.strptime(value, '%Y-%m-%d')

In [None]:
datestrs = ["2011-07-06 12:00:00", "2011-08-06 00:00:00"]
datestrs

In [None]:
pd.to_datetime(datestrs)

In [None]:
idx = pd.to_datetime(datestrs + [None])
idx

In [None]:
idx[2]

In [None]:
stamp.strftime('%Y-%m-%d %X')

In [None]:
stamp2 = stamp + timedelta(12,50000)
stamp2.strftime('%Y-%m-%d %X'), stamp2.strftime('%Y-%m-%d %I:%M%p'), stamp2.strftime('%c')

# Time Series Basics

### Intro

In [None]:
dates = [datetime(2011, 1, 2), datetime(2011, 1, 5),
            datetime(2011, 1, 7), datetime(2011, 1, 8),
            datetime(2011, 1, 10), datetime(2011, 1, 12)]
ts = pd.Series(np.random.standard_normal(len(dates)), index=dates)
ts

In [None]:
ts + ts[::2]*0

In [None]:
ts + ts[1::2]*0

In [None]:
ts.index

In [None]:
type(ts.index), ts.index.dtype, type(ts.index[0])

In [None]:
stamp = ts.index[0]
stamp

### Indexing, Selection, Subsetting

In [None]:
stamp = ts.index[2]
stamp

In [None]:
ts[stamp]

In [None]:
ts['2011-01-07']

In [None]:
longer_ts = pd.Series(np.random.standard_normal(1000),
                        index=pd.date_range('2000-01-01', periods=1000))
longer_ts

In [None]:
longer_ts.index[-1] - longer_ts.index[0]

In [None]:
longer_ts['2001']

In [None]:
longer_ts['2001-05']

In [None]:
longer_ts['2001-05-01':'2001-05-10']

In [None]:
longer_ts['2001-05-01':]

In [None]:
longer_ts['2001-05-01':'2001-05-10'].index

In [None]:
longer_ts[datetime(2001, 5, 1):]

In [None]:
longer_ts['2001-05-01':'2001-05-10']

In [None]:
dates = pd.date_range('1/1/2000', periods=100, freq='W-WED')
long_df = pd.DataFrame(np.random.standard_normal((100, 4)),
                        index=dates,
                        columns=['Colorado', 'Texas', 'New York', 'Ohio'])
long_df

In [None]:
long_df.loc['2000-02']

### Time Series with Duplicate Indices

In [None]:
dates = pd.DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000',
                            '1/2/2000', '1/3/2000'])
dup_ts = pd.Series(np.arange(5), index=dates)
dup_ts

In [None]:
dup_ts.index.is_unique

In [None]:
dup_ts['1/2/2000']

In [None]:
grouped = dup_ts.groupby(level=0)
grouped.mean(), grouped.count()

# Date Ranges, Frequencies, and Shifting

### Intro

In [None]:
ts

In [None]:
resampler = ts.resample('D')
resampler

### Generating Date Ranges

In [None]:
index = pd.date_range('2012-04-01', '2012-06-01')
index

In [None]:
pd.date_range(start='2012-04-01', periods=20)

In [None]:
pd.date_range(end='2012-06-01', periods=20)

In [None]:
pd.date_range('2000-01-01', '2001-01-01', freq='BM')

In [None]:
pd.date_range('2012-05-02 12:56:31', periods=5, normalize=True)

### Frequencies and Date Offsets

In [None]:
from pandas.tseries.offsets import Hour, Minute

In [None]:
hour = Hour()
hour

In [None]:
four_hours = Hour(4)
four_hours

In [None]:
pd.date_range('2000-01-01', '2000-01-03 23:59', freq='4H')

In [None]:
Hour(2) + Minute(30)

In [None]:
pd.date_range('2000-01-01', periods=10, freq='1h30min')

In [None]:
monthly_dates = pd.date_range('2023-09-01', periods=10, freq='WOM-3WED')
monthly_dates

In [None]:
list(monthly_dates)

In [None]:
#print every third elements of monthly_dates
list(monthly_dates[::3])

### Shifting (Leading and Lagging) Data

In [None]:
ts = pd.Series(np.random.standard_normal(4),
                index=pd.date_range('2000-01-01', periods=4, freq='M'))
ts

In [None]:
ts.shift(2)

In [None]:
ts.shift(2, freq='M')

In [None]:
ts.shift(-2)

In [None]:
ts / ts.shift(1) - 1

In [None]:
ts.shift(3, freq='D')

In [None]:
ts.shift(1, freq='90min')

In [None]:
from pandas.tseries.offsets import Day, MonthEnd

In [None]:
now = datetime(2011, 11, 17)

In [None]:
now + 3 * Day()

In [None]:
now + Day(3)

In [None]:
now + MonthEnd()

In [None]:
now + MonthEnd(2)

In [None]:
MonthEnd().rollforward(now)

In [None]:
MonthEnd().rollback(now)

# Time Zone Handling

In [None]:
import pytz

In [None]:
pytz.common_timezones[-5:]

In [None]:
pytz.common_timezones[5:]

In [None]:
tz = pytz.timezone('America/New_York')
tz

### Time Zone Localization and Conversion

In [None]:
dates = pd.date_range('2012-03-09 9:30', periods=6, freq='D')
ts = pd.Series(np.random.randn(len(dates)), index=dates)
ts

In [None]:
print(ts.index.tz)

In [None]:
pd.date_range('2012-03-09 9:30', periods=10, freq='D', tz='UTC')

In [None]:
ts_utc = ts.tz_localize('UTC')
ts_utc

In [None]:
ts_utc.index

In [None]:
ts_utc.tz_convert('America/New_York')

In [None]:
print(ts_utc.index.tz)

In [None]:
ts_eastern = ts.tz_localize('America/New_York')
ts_eastern

In [None]:
ts_eastern.tz_convert('UTC')

In [None]:
ts_eastern.tz_convert('Europe/Berlin')

In [None]:
ts.index.tz_localize('Asia/Shanghai')

### Operations with Time Zone-Aware Timestamp Objects

In [None]:
stamp = pd.Timestamp('2011-03-12 04:00')
stamp_utc = stamp.tz_localize('utc')
stamp_utc.tz_convert('America/New_York')

In [None]:
stamp_moscow = pd.Timestamp('2011-03-12 04:00', tz='Europe/Moscow')
stamp_moscow

### Operations Between Different Time Zones

# Periods and Period Arithmetic

### Intro

In [None]:
p = pd.Period(2023, freq='A-DEC')
p

In [None]:
p + 5

In [None]:
p - 2

In [None]:
pd.Period('2014', freq='A-DEC') - p

In [None]:
periods = pd.period_range('2023-07-31', '2023-12-31', freq='M')
periods

In [None]:
pd.Series(np.random.randn(len(periods)), index=periods)

In [None]:
#generate a list of dates from 2022-12-31 to 2023-12-31 in business days
bd = pd.date_range('2022-12-31', '2023-12-31', freq='B')
windows_bd = 10
list = []
for i in range(len(bd)-windows_bd+1):
    list.append([x for x in bd[i:windows_bd+i]])
date_range_df = pd.DataFrame(list)
date_range_df.index = rolling_window_dates.iloc[:,-1]
rolling_window_dates.index.name = 'date'
rolling_window_dates.columns = range(1,windows_bd+1)
# rolling_window_dates.index = pd.to_datetime(rolling_window_dates.index)
rolling_window_dates

In [None]:
#convert the cell above in function
def rolling_window_dates(bd, windows_bd):
    list = []
    for i in range(len(bd)-windows_bd+1):
        list.append([x for x in bd[i:windows_bd+i]])
    rolling_window_dates = pd.DataFrame(list)
    rolling_window_dates.index = rolling_window_dates.iloc[:,-1]
    rolling_window_dates.index.name = 'date'
    rolling_window_dates.columns = range(1,windows_bd+1)
    return rolling_window_dates

# convert the bd in function with start, end and freq as argument
def bd(start, end, freq):
    bd = pd.date_range(start, end, freq=freq)
    return bd

# put together the two functions above
def rolling_window_dates(start:str, end:str, freq:str, windows_bd:int) -> pd.DataFrame:
    bd = pd.date_range(start, end, freq=freq)
    list = []
    for i in range(len(bd)-windows_bd+1):
        list.append([x for x in bd[i:windows_bd+i]])
    rolling_window_dates = pd.DataFrame(list)
    rolling_window_dates.index = rolling_window_dates.iloc[:,-1]
    rolling_window_dates.index.name = 'date'
    rolling_window_dates.columns = range(1,windows_bd+1)
    return rolling_window_dates




In [None]:
def rolling_window_dates(start:str, end:str, freq:str, windows_bd:int) -> pd.DataFrame:
    bd = pd.date_range(start, end, freq=freq)
    list = []
    for i in range(len(bd)-windows_bd+1):
        list.append([x for x in bd[i:windows_bd+i]])
    rolling_window_dates = pd.DataFrame(list)
    rolling_window_dates.index = rolling_window_dates.iloc[:,-1]
    rolling_window_dates.index.name = 'date'
    rolling_window_dates.columns = range(1,windows_bd+1)
    return rolling_window_dates

rolling_window_dates('2022-12-31', '2023-12-31', 'B', 15)

In [None]:
def rolling_window_dates_gen(start:str, end:str, freq:str, windows_len:int) -> pd.DataFrame:
    date_range = pd.date_range(start, end, freq=freq)
    for i in range(len(date_range)-windows_len+1):
        yield [x for x in date_range[i:windows_len+i]]
rw_gen = rolling_window_dates_gen('2022-12-31', '2023-12-31', 'B', 5)

In [None]:
print(next(rw_gen))

### Period Frequency Conversion

In [None]:
p = pd.Period('2023', freq='A-DEC')
p

In [None]:
p.asfreq('M', how='start')

In [None]:
p.asfreq('M', how='end')

In [None]:
p.asfreq('M')

In [None]:
p = pd.Period('2023', freq='A-JUN')
p

In [None]:
periods = pd.period_range('2006', '2009', freq='A-DEC')
ts = pd.Series(np.random.standard_normal(len(periods)), index=periods)
ts

In [None]:
ts.asfreq('M', how='start')

In [None]:
ts.asfreq('M', how='end')

In [None]:
ts.asfreq('B', how='end')

### Quarterly Period Frequencies

### Converting Timestamps to Periods (and Back)

### Creating a PeriodIndex from Arrays

# Resampling and Frequency Conversion

### Intro

### Downsampling

### Upsampling and Interpolation

### Resampling with Periods

### Grouped Time Resampling

# Moving Window Functions

### Intro

### Exponentially Weighted Functions

### Binary Moving Window Functions

### User-Defined Moving Window Functions