In [2]:
import pandas as pd
import numpy as np
from collections import deque
import random
from sklearn import preprocessing
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
# from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [11]:
# Read in data
df = pd.read_csv('../resources/all.csv')

# Move 'target' column to the end
df = df[['quarter', 'avg_consumer_price_index', 'gdp', 'gdp_pct_change',
       'avg_housing_starts', 'output_gap', 'avg_unemployment_rate',
       'fed_funds_avg_rate', 'fed_funds_percent_change_prev_quarter',
       'fed_funds_st_dev_rate', '10YT_minus_2YT_avg',
       '10YT_minus_2YT_percent_change_prev_quarter', 'cpi_US_total','target']]

# Rename target column
df = df.rename(columns={'target':'recession_actual'})

# Set index to 'quarter'
df = df.set_index('quarter')

# Shift data one quarter up
df['recession_1q_out'] = df['recession_actual'].shift(-1)

# Delete 'recession_actual' column and drop null values
del df['recession_actual']
df = df.dropna()

# Split Data

In [12]:
# Manually split dataframe into training and testing (while maintaining sequential order)
times = sorted(df.index.values)
last_npct = times[-int(0.2*len(times))]

train_df = df[(df.index < last_npct)]
test_df = df[(df.index >= last_npct)]

In [9]:
# Split the training and testing dfs into X's and y's
# X_train = train_df.iloc[:,:12]
# y_train = train_df['recession_1q_out']

# X_test = test_df.iloc[:,:12]
# y_test = test_df['recession_1q_out']


# # Create scaler object
# X_scaler = StandardScaler().fit(X_train)


# # Scale training and testing data
# X_train_scaled = X_scaler.transform(X_train)
# X_test_scaled = X_scaler.transform(X_test)


# # Reshape data
# reshaped_X_train = np.reshape(X_train_scaled, (X_train_scaled.shape[0], X_train_scaled.shape[1], 1))
# reshaped_X_test = np.reshape(X_test_scaled, (X_test_scaled.shape[0], X_test_scaled.shape[1], 1))

In [13]:
# Break data into sequences
seq_len = 4

def preprocess_df(df):
    for col in df.columns:
        if col != "recession_1q_out":
            df[col] = preprocessing.scale(df[col].values)
            
    seq_data = []
    prev_yrs = deque(maxlen=seq_len)

    for i in df.values:
        prev_yrs.append([n for n in i[:-1]]) # all data except last column (target)
        if len(prev_yrs) == seq_len:
            seq_data.append([np.array(prev_yrs), i[-1]]) # seq_data = [(tuple_of_features), target]

        random.shuffle(seq_data) # shuffle data sequences (shuffling by batches of 4 quarters)  
        
    X = []
    y = []
    
    for seq, target in seq_data:
        X.append(seq)
        y.append(target)
        
    return np.array(X),y

In [14]:
X_train, y_train = preprocess_df(train_df)
X_test, y_test = preprocess_df(test_df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#ind

# Build Model

In [15]:
model = Sequential()

In [16]:
# Add layers
model.add(LSTM(128, input_shape=(X_train.shape[1:]), return_sequences=True))
model.add(Dropout(0.4))
model.add(BatchNormalization())  # Normalize activation outputs

model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.4))
model.add(BatchNormalization())

model.add(LSTM(128))
model.add(Dropout(0.4))
model.add(BatchNormalization())

model.add(Dense(32, activation='relu'))
model.add(Dropout(0.4))

model.add(Dense(2, activation='softmax'))

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [17]:
# Compile model
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=['accuracy'])

In [18]:
# Fit the model to the training data
model.fit(X_train, y_train, epochs=100, shuffle=True, verbose=2)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Epoch 1/100
133/133 - 12s - loss: 1.0578 - acc: 0.5414
Epoch 2/100
133/133 - 1s - loss: 0.8659 - acc: 0.6692
Epoch 3/100
133/133 - 1s - loss: 0.7289 - acc: 0.7368
Epoch 4/100
133/133 - 1s - loss: 0.4633 - acc: 0.8120
Epoch 5/100
133/133 - 1s - loss: 0.4765 - acc: 0.8045
Epoch 6/100
133/133 - 1s - loss: 0.3992 - acc: 0.8271
Epoch 7/100
133/133 - 1s - loss: 0.5132 - acc: 0.7820
Epoch 8/100
133/133 - 1s - loss: 0.3685 - acc: 0.8120
Epoch 9/100
133/133 - 0s - loss: 0.5171 - acc: 0.7970
Epoch 10/100
133/133 - 0s - loss: 0.2649 - acc: 0.8647
Epoch 11/100
133/133 - 0s - loss: 0.2970 - acc: 0.8571
Epoch 12/100
133/133 - 0s - loss: 0.3722 - acc: 0.8571
Epoch 13/100
133/133 - 0s - loss: 0.3219 - acc: 0.8571
Epoch 14/100
133/133 - 0s - loss: 0.2363 - acc: 0.8797
Epoch 15/100
133/133 - 0s - loss: 0.3001 - acc: 0.8647
Epoch 16/100
133/133 - 0s - loss: 0.3531 - acc: 0.8421
Epoch 17/100
133/133 - 0s - loss: 

<tensorflow.python.keras.callbacks.History at 0x1a47bbd908>

In [19]:
# Validate model using test data
model_loss, model_accuracy = model.evaluate(X_test, y_test, verbose=2)

31/31 - 1s - loss: 0.2372 - acc: 0.8387


In [20]:
# Make predictions using test data
predictions = model.predict_classes(X_test)
predictions

array([0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0])