In [15]:
import pandas as pd
import numpy as np
from typing import Union, List

In [151]:
def lagger(
    dataset:pd.DataFrame, n_lags:int,
    price_columns : Union[str,List[str]]) -> pd.DataFrame:
    """
    Create columns of time lags

    Inputs
    ------
    dataset : dataframe to lag 
    n_lags : number of time lags
    price_columns :
    y_col : target column name(s)
    Returns
    ------
    result : lagged dataframe
    """
    from toolz.curried import reduce
    df = reduce(
        lambda df, lag: df.assign(**{col + '_' +str(lag): dataset[[col]].shift(lag).values for col in price_columns}),
        range(1, n_lags + 1),
        dataset[price_columns])

    result = df.assign(**{col: dataset[col] for col in dataset.drop(price_columns, axis=1).columns}).fillna(0)
    return result[sorted(result.columns)]

def complete_months(
    df:pd.DataFrame,
    start_date:str,
    end_date:str,
    )->pd.DataFrame:
    
    # TODO: Extend to more support more columns
    
    idx = pd.date_range(start_date,end_date)
    df = daily.reindex(idx)
    df.iloc[:,0]=df.iloc[:,0].interpolate(method='linear').fillna(0)
    
    return df


def process_data(
    df:pd.DataFrame,freq:str,
    start_date:str='1982-01-01',
    end_date:str='2020-07-01',
    )-> pd.DataFrame:
    
    if freq == 'monthly':
        n_lags = 2
    elif freq == 'daily':
        n_lags = 89
        column_name = df.columns[0]
        df = complete_months(df,
                            start_date=start_date,
                            end_date=end_date)
    elif freq == 'quarterly':
        df = df.pct_change().replace([np.inf, -np.inf, np.nan], 0)
        return df.loc[start_date:end_date,:]
        
    else:
        raise NameError('Please choose a frequency (monthly or daily)')
    
    df = df.pct_change().replace([np.inf, -np.inf, np.nan], 0)
    df = lagger(df,n_lags,df.columns).round(2)
    df.index = df.index - pd.Timedelta('1 days')
    df = df.resample('Q', convention='start').asfreq()
    df.index = df.index + pd.Timedelta('1 days')
    
    
    if freq == 'daily': # Make the order right, otherwise 10 will appear before 2
        column_order = [column_name] + [column_name+'_'+str(idx) for idx in range(1,n_lags+1)]
        df = df[column_order]
        
    return df.loc[start_date:end_date,:]
    

In [152]:
daily = pd.read_csv('../data/daily_data.csv',index_col='Date',parse_dates=True)
daily_final = process_data(daily,freq='daily')
monthly = pd.read_csv('../data/monthly_data.csv',index_col='Date',parse_dates=True)
monthly_final = process_data(monthly,freq='monthly')
quarterly = pd.read_csv('../data/quarterly_data.csv',index_col='Date',parse_dates=True)
quarterly_final = process_data(quarterly,freq='quarterly')

In [61]:
df = df.pct_change().replace([np.inf, -np.inf, np.nan], 0)
df = lagger(df,2,df.columns).round(2)
df.index = df.index - pd.Timedelta('1 days')
df = df.resample('Q', convention='start').asfreq()
df.index = df.index + pd.Timedelta('1 days')

In [119]:
daily = pd.read_csv('../data/daily_data.csv',index_col='Date',parse_dates=True)

In [120]:
idx = pd.date_range('1982-01-01','2020-10-30')
daily = daily.reindex(idx)
daily.iloc[:,0]=daily.iloc[:,0].interpolate(method='linear').fillna(0)

In [121]:
column_name = daily.columns[0]
daily_lagged = lagger(daily,89,[column_name])

In [122]:
column_order = [column_name] + [column_name+'_'+str(idx) for idx in range(1,90)]

In [123]:
daily_lagged = daily_lagged[column_order]

In [124]:
daily_lagged.index = daily_lagged.index - pd.Timedelta('1 days')
daily_lagged = daily_lagged.resample('Q', convention='start').asfreq()
daily_lagged.index = daily_lagged.index + pd.Timedelta('1 days')

In [130]:
daily_final = daily_lagged.loc['1982-01-01':'2020-07-01']

In [131]:
monthly_final = df.loc['1982-01-01':'2020-07-01']

In [132]:
quarterly_final = quarterly.loc['1982-01-01':'2020-07-01']

In [135]:
monthly_final

Unnamed: 0_level_0,CPIAUCSL_PC1,CPIAUCSL_PC1_1,CPIAUCSL_PC1_2,FEDFUNDS,FEDFUNDS_1,FEDFUNDS_2,INDPRO,INDPRO_1,INDPRO_2,PAYNSA_PC1,PAYNSA_PC1_1,PAYNSA_PC1_2
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1982-01-01,8.3,8.9,9.6,13.22,12.37,13.31,50.30,51.33,51.89,-0.6,-0.1,0.5
1982-04-01,6.6,6.9,7.6,14.94,14.68,14.78,50.46,50.91,51.30,-1.3,-0.9,-0.6
1982-07-01,6.6,7.2,6.9,12.59,14.15,14.45,49.81,49.97,50.14,-2.2,-1.8,-1.3
1982-10-01,5.0,4.9,6.0,9.71,10.31,10.12,48.79,49.23,49.38,-2.7,-2.4,-2.4
1983-01-01,3.7,3.8,4.5,8.68,8.95,9.20,49.18,48.24,48.59,-1.8,-2.4,-2.6
...,...,...,...,...,...,...,...,...,...,...,...,...
2019-07-01,1.8,1.7,1.8,2.40,2.38,2.39,109.09,109.28,109.23,1.3,1.2,1.2
2019-10-01,1.8,1.7,1.7,1.83,2.04,2.13,109.03,109.47,109.85,1.3,1.3,1.3
2020-01-01,2.5,2.3,2.0,1.55,1.55,1.55,109.18,109.65,110.04,1.5,1.4,1.4
2020-04-01,0.4,1.5,2.3,0.05,0.65,1.58,91.27,104.52,109.30,-13.4,0.5,1.6
