In [20]:


import pandas as pd
import numpy as np
from model_team14 import select_features

## select imputed & transformed data
X=pd.read_csv('../data/X_data_tr.csv', index_col='date', parse_dates=True)
y=pd.read_csv('../data/y_data_tr.csv', index_col='date', parse_dates=True)
threshold=0.3
criteria=None

df_feature=pd.read_csv('../data/df_feature.csv')

selected_features=list(df_feature[df_feature.select==1]['variable'])

def get_data(y_type, test_year, features):
    X_train=X[features][:-(test_year*12)]
    y_train=y[y_type][:-(test_year*12)]           
    X_test=X[features][-(test_year*12):]   
    y_test=y[y_type][-(test_year*12):]

    return X_train, y_train, X_test, y_test

In [21]:
def create_dataset(X, y, time_steps=1):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        v = X.iloc[i:(i + time_steps)].values
        Xs.append(v)        
        ys.append(y.iloc[i + time_steps])
    return np.array(Xs), np.array(ys)
# Timesteps will define how many Elements we have
# TIME_STEPS = 5

# X_train, y_train = create_dataset(X_train, y_train, TIME_STEPS)
# X_test, y_test = create_dataset(X_test, y_test, TIME_STEPS)

# print(X_train.shape)

In [22]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout

def grid_search(trainX, trainy, testX, testy, y_type, epochs=[15,20], batch_sizes=[12,64], time_steps=[5]):
    results = []
    best_auc = 0
    best_params = None

    for time_step in time_steps:
        X_train, y_train = create_dataset(trainX, trainy, time_step)
        X_test, y_test = create_dataset(testX, testy, time_step)
        loss_type = 'categorical_crossentropy' if y_type == 'y_agg' else 'binary_crossentropy'

        for num_epochs in epochs:
            for batch_size in batch_sizes:
                # create the model
                model = Sequential()
                model.add(LSTM(100, input_shape=(X_train.shape[1], X_train.shape[2])))
                # model.add(Dropout(0.2))
                model.add(Dense(1, activation='sigmoid'))

                model.compile(loss=loss_type, optimizer='adam', metrics=['AUC'])
                model.fit(X_train, y_train, epochs=num_epochs, batch_size=batch_size)
                print(model.summary())

                # Final evaluation of the model
                scores = model.evaluate(X_test, y_test, verbose=0)
                auc = scores[1]*100
                print("AUC: %.2f%%" % (auc))

                current_result = {
                    'time_steps': time_step,
                    'num_epochs': num_epochs,
                    'batch_size': batch_size,
                    'AUC': "%.2f%%" % (auc)
                }
                results.append(current_result)

                if auc > best_auc:
                    best_auc = auc
                    best_params = current_result

    print("Best AUC: ", best_params)
    return results


In [23]:
y_type = 'y_agg'
X_train, y_train, X_test, y_test = get_data(y_type, 8, selected_features)

print(grid_search(X_train, y_train, X_test, y_test, y_type))

Epoch 1/15


  return dispatch_target(*args, **kwargs)


Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Model: "sequential_40"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_40 (LSTM)              (None, 100)               46800     
                                                                 
 dense_40 (Dense)            (None, 1)                 101       
                                                                 
Total params: 46901 (183.21 KB)
Trainable params: 46901 (183.21 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
AUC: 49.62%
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Model: "sequential_41"
________________________________________

In [24]:
y_type = 'y_nber'
X_train, y_train, X_test, y_test = get_data(y_type, 8, selected_features)

print(grid_search(X_train, y_train, X_test, y_test, y_type))

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Model: "sequential_44"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_44 (LSTM)              (None, 100)               46800     
                                                                 
 dense_44 (Dense)            (None, 1)                 101       
                                                                 
Total params: 46901 (183.21 KB)
Trainable params: 46901 (183.21 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
AUC: 87.92%
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Model: "sequential_45"
_____________________________

In [25]:
y_type = 'y_oecd'
X_train, y_train, X_test, y_test = get_data(y_type, 8, selected_features)

print(grid_search(X_train, y_train, X_test, y_test, y_type))

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Model: "sequential_48"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_48 (LSTM)              (None, 100)               46800     
                                                                 
 dense_48 (Dense)            (None, 1)                 101       
                                                                 
Total params: 46901 (183.21 KB)
Trainable params: 46901 (183.21 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
AUC: 84.26%
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Model: "sequential_49"
_____________________________