### Import dependencies

In [1]:
import pandas as pd
from functools import reduce
import numpy as np
from collections import deque
import random
from sklearn import preprocessing
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
from tensorflow.keras.callbacks import ModelCheckpoint

### Merge datasets

In [2]:
# Read in data
cpi = pd.read_csv("../resources/cpi_final.csv")
gdp = pd.read_csv("../resources/gdp_final.csv")
gdp_pct = pd.read_csv("../resources/gdp_pct_chg_final.csv")
houst = pd.read_csv("../resources/housing_starts_final.csv")
opg = pd.read_csv("../resources/output_gap_final.csv")
rec_dt = pd.read_csv("../resources/recession_dates_final.csv")
unrate = pd.read_csv("../resources/unemployment_rate_final.csv")

In [3]:
# Combine all data sets into one data frame
dfs = [cpi, gdp, gdp_pct, houst, opg, rec_dt, unrate]
df = reduce(lambda  left,right: pd.merge(left,right,on=['quarter'],how='outer'), dfs)
df.head()

Unnamed: 0,quarter,avg_consumer_price_index,date_x,gdp,date_y,gdp_pct_change,avg_housing_starts,date_x.1,output_gap,date_y.1,target,avg_unemployment_rate
0,1947Q1,21.7,1947-01-01,243.164,,,,,,1947-01-01,0.0,
1,1947Q2,22.01,1947-04-01,245.968,1947-04-01,4.7,,,,1947-04-01,0.0,
2,1947Q3,22.49,1947-07-01,249.585,1947-07-01,6.0,,,,1947-07-01,0.0,
3,1947Q4,23.126667,1947-10-01,259.745,1947-10-01,17.3,,,,1947-10-01,0.0,
4,1948Q1,23.616667,1948-01-01,265.742,1948-01-01,9.6,,,,1948-01-01,0.0,4.4


In [4]:
# Drop date columns
df = df.iloc[:,[0,1,3,5,6,8,10,11]]

In [5]:
# Sort data frame by quarter
df = df.sort_values(by=['quarter'])

In [6]:
# Drop rows with missing values
df = df.dropna()

In [7]:
# Set index to quarter
df = df.set_index('quarter')

In [8]:
# Reorder columns
df = df[['avg_consumer_price_index', 'gdp', 'gdp_pct_change',
       'avg_housing_starts', 'output_gap', 'avg_unemployment_rate','target']]

In [9]:
df.head(10)

Unnamed: 0_level_0,avg_consumer_price_index,gdp,gdp_pct_change,avg_housing_starts,output_gap,avg_unemployment_rate,target
quarter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1959Q1,28.993333,510.33,8.9,1648.0,-0.082061,6.8,0.0
1959Q2,29.043333,522.653,10.0,1530.333333,1.229706,5.166667,0.0
1959Q3,29.193333,525.034,1.8,1505.666667,0.356642,4.9,0.0
1959Q4,29.37,528.6,2.7,1457.333333,-0.303127,5.033333,0.0
1960Q1,29.396667,542.648,11.1,1357.333333,0.957161,5.966667,0.0
1960Q2,29.573333,541.08,-1.2,1269.0,-0.564976,5.266667,1.0
1960Q3,29.59,545.604,3.4,1212.666667,-1.056845,5.133333,1.0
1960Q4,29.78,540.197,-3.9,1185.0,-3.291173,5.666667,1.0
1961Q1,29.84,545.018,3.6,1240.333333,-3.596054,7.833333,1.0
1961Q2,29.83,555.545,8.0,1258.666667,-2.914593,6.966667,0.0


# Create Model

#### Shift Data

In [10]:
# Shift by 1, 2, and 4 quarters (3, 6, 12 months)
df['future_1q'] = df['target'].shift(-1)
df['future_2q'] = df['target'].shift(-2)
df['future_4q'] = df['target'].shift(-4)

In [81]:
df.head(10)

Unnamed: 0_level_0,avg_consumer_price_index,gdp,gdp_pct_change,avg_housing_starts,output_gap,avg_unemployment_rate,target,future_1q,future_2q,future_4q
quarter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1959Q1,28.993333,510.33,8.9,1648.0,-0.082061,6.8,0.0,0.0,0.0,0.0
1959Q2,29.043333,522.653,10.0,1530.333333,1.229706,5.166667,0.0,0.0,0.0,1.0
1959Q3,29.193333,525.034,1.8,1505.666667,0.356642,4.9,0.0,0.0,0.0,1.0
1959Q4,29.37,528.6,2.7,1457.333333,-0.303127,5.033333,0.0,0.0,1.0,1.0
1960Q1,29.396667,542.648,11.1,1357.333333,0.957161,5.966667,0.0,1.0,1.0,1.0
1960Q2,29.573333,541.08,-1.2,1269.0,-0.564976,5.266667,1.0,1.0,1.0,0.0
1960Q3,29.59,545.604,3.4,1212.666667,-1.056845,5.133333,1.0,1.0,1.0,0.0
1960Q4,29.78,540.197,-3.9,1185.0,-3.291173,5.666667,1.0,1.0,0.0,0.0
1961Q1,29.84,545.018,3.6,1240.333333,-3.596054,7.833333,1.0,0.0,0.0,0.0
1961Q2,29.83,555.545,8.0,1258.666667,-2.914593,6.966667,0.0,0.0,0.0,0.0


#### Function to process data

In [40]:
# constants
import time

seq_len = 4
epochs = 100
batch_size = 64
name = f"{seq_len}SeqLen-1QLag-{int(time.time())}"

In [None]:
def preprocess_df(df):
    df = df.drop("target", 1)
    
    for col in df.columns:
        if col not in ['future_1q','future_2q','future_4q']:
            df[col] = preprocessing.scale(df[col].values) # scale each column between 0 and 1
            
    df.dropna(inplace=True) # drop any missing values
    
    seq_data = []
    prev_qtrs = deque(maxlen=seq_len)
    
    for i in df.values:
        prev_qtrs.append([n for n in i[:-3]])
        if len(prev_qtrs) == seq_len:
            seq_data.append([np.array(prev_qtrs), i[-3]]) # sequential data based on future_1q
            
    random.shuffle(seq_data)
    
    return(seq_data)

#### Split Data

In [None]:
# Separate data set into training and testing sets
times = sorted(df.index.values)
last_5pct = times[-int(0.05*len(times))]

test_df = df[df.index >= last_5pct] # Testing data = last 5% of data
train_df = df[df.index < last_5pct] # Training data = everything else

In [None]:
a = preprocess_df(train_df)

In [None]:
print(a[1])

In [57]:
X_train, y_train = preprocess_df(train_df)
X_test, y_test = preprocess_df(test_df)

ValueError: too many values to unpack (expected 2)

In [None]:
print(X_train)

In [None]:
print(y_train)

#### Build Model

In [None]:
model = Sequential()
model.add(LSTM(128, input_shape=(X_train.shape[1:]), return_sequences=True))