In [1]:
import pandas as pd
import numpy as np
from collections import deque
import random
from sklearn import preprocessing
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
# from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
df = pd.read_csv('../resources/all.csv')

In [3]:
# Move 'target' column to the end
df = df[['quarter', 'avg_consumer_price_index', 'gdp', 'gdp_pct_change',
       'avg_housing_starts', 'output_gap', 'avg_unemployment_rate',
       'fed_funds_avg_rate', 'fed_funds_percent_change_prev_quarter',
       'fed_funds_st_dev_rate', '10YT_minus_2YT_avg',
       '10YT_minus_2YT_percent_change_prev_quarter', 'cpi_US_total','target']]

In [4]:
# Rename target column
df = df.rename(columns={'target':'recession_actual'})

In [5]:
# Set index to 'quarter'
df = df.set_index('quarter')

In [6]:
# Shift data one quarter up
df['recession_1q_out'] = df['recession_actual'].shift(-1)

In [7]:
# Delete 'recession_actual' column and drop null values
del df['recession_actual']
df = df.dropna()

In [8]:
# Manually split dataframe into training and testing (while maintaining sequential order)
times = sorted(df.index.values)
last_npct = times[-int(0.2*len(times))]

train_df = df[(df.index < last_npct)]
test_df = df[(df.index >= last_npct)]

In [10]:
# Manually "train test split" for X and y
# X_train = train_df.iloc[:,:12]
# y_train = train_df['recession_1q_out']

# X_test = test_df.iloc[:,:12]
# y_test = test_df['recession_1q_out']

In [11]:
# Create scaler object
# X_scaler = StandardScaler().fit(X_train)

In [12]:
# Scale training and testing data
# X_train_scaled = X_scaler.transform(X_train)
# X_test_scaled = X_scaler.transform(X_test)

# reshape data if necessary at this stage (before sequences)

In [20]:
# Break data into sequences
seq_len = 4

def preprocess_df(df):
    for col in df.columns:
        if col != "recession_1q_out":
            df[col] = preprocessing.scale(df[col].values)
            
    seq_data = []
    prev_yrs = deque(maxlen=seq_len)

    for i in df.values:
        prev_yrs.append([n for n in i[:-1]]) # all data except last column (target)
        if len(prev_yrs) == seq_len:
            seq_data.append([np.array(prev_yrs), i[-1]]) # seq_data = [(tuple_of_features), target]

        random.shuffle(seq_data) # shuffle data sequences (shuffling by batches of 4 quarters)  
        
    X = []
    y = []
    
    for seq, target in seq_data:
        X.append(seq)
        y.append(target)
        
    return np.array(X),y
#     return X,y

In [21]:
# Split - scale - reshape
X_train, y_train = preprocess_df(train_df)
X_test, y_test = preprocess_df(test_df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#ind

In [None]:
# NORMALIZE DATA
# X_scaler = StandardScaler().fit(X_train) # Create scaler object
# X_train_scaled = X_scaler.transform(X_train) # Scale training data
# X_test_scaled = X_scaler.transform(X_test) # Scale testing data

In [18]:
# Method to reshape data
# def reshape_data(obj):
#     reshaped_obj = np.reshape(obj, (obj.shape[0], obj.shape[1], 1))
#     return reshaped_obj

In [None]:
# reshaped_X_train = reshape_data(X_train)
# reshaped_X_test = reshape_data(X_test)

In [22]:
model = Sequential()

In [23]:
# Add layers
model.add(LSTM(128, input_shape=(X_train.shape[1:]), return_sequences=True))
model.add(Dropout(0.4))
model.add(BatchNormalization())  # Normalize activation outputs

model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.4))
model.add(BatchNormalization())

model.add(LSTM(128))
model.add(Dropout(0.4))
model.add(BatchNormalization())

model.add(Dense(32, activation='relu'))
model.add(Dropout(0.4))

model.add(Dense(2, activation='softmax'))

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [24]:
# Compile model
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=['accuracy'])

In [25]:
# Fit the model to the training data
model.fit(X_train, y_train, epochs=100, shuffle=True, verbose=2)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Epoch 1/100
133/133 - 8s - loss: 0.8370 - acc: 0.6466
Epoch 2/100
133/133 - 0s - loss: 0.4949 - acc: 0.7444
Epoch 3/100
133/133 - 0s - loss: 0.4370 - acc: 0.8120
Epoch 4/100
133/133 - 0s - loss: 0.3710 - acc: 0.8120
Epoch 5/100
133/133 - 0s - loss: 0.2997 - acc: 0.8797
Epoch 6/100
133/133 - 0s - loss: 0.3112 - acc: 0.8346
Epoch 7/100
133/133 - 0s - loss: 0.2637 - acc: 0.8872
Epoch 8/100
133/133 - 0s - loss: 0.2374 - acc: 0.8947
Epoch 9/100
133/133 - 0s - loss: 0.2618 - acc: 0.9098
Epoch 10/100
133/133 - 0s - loss: 0.2538 - acc: 0.8797
Epoch 11/100
133/133 - 0s - loss: 0.2334 - acc: 0.8947
Epoch 12/100
133/133 - 0s - loss: 0.2436 - acc: 0.8872
Epoch 13/100
133/133 - 0s - loss: 0.2560 - acc: 0.8797
Epoch 14/100
133/133 - 0s - loss: 0.2001 - acc: 0.9323
Epoch 15/100
133/133 - 0s - loss: 0.2333 - acc: 0.9248
Epoch 16/100
133/133 - 0s - loss: 0.2022 - acc: 0.9173
Epoch 17/100
133/133 - 0s - loss: 0

<tensorflow.python.keras.callbacks.History at 0x1a422a3828>

In [26]:
# Validate model using test data
model_loss, model_accuracy = model.evaluate(X_test, y_test, verbose=2)

31/31 - 1s - loss: 0.3263 - acc: 0.9032


In [None]:
# Make predictions using test data
predictions = model.predict_classes(X_test)
predictions