## Predicting Crashes in Financial Markets - RNN LSTM
- Datasets: S&P500 (USA), Nikkei225 (Japan), SSE (Shanghai/China), HSI (Hong Kong), BSESN (India), SMI (Switzerland), BVSP (Brazil)
- Model: RNN LSTM
- Response variable: Crash within 1 / 3 / 6 months (0: no, 1: yes)
- Crash definition: Drawdown in 99.5% quantile

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import gridspec
from keras import Sequential
from keras.layers import Dense, Activation, Dropout, LSTM
from sklearn import metrics
from datetime import datetime, timedelta
from pylab import rcParams
import os
import importlib
import sys
import warnings
if not sys.warnoptions:
    warnings.simplefilter('ignore')

In [None]:
# -------------------- data preparation -------------------- #
os.chdir('/home/roman/Documents/Projects/Bubbles/models')
from prepare_data_5 import DataLoader
os.chdir('/home/roman/Documents/Projects/Bubbles/data')

datasets_original = ['^GSPC.csv', '^N225.csv', 'SSE.csv','^HSI.csv', '^BSESN.csv',\
                     '^SSMI.csv', '^BVSP.csv']
dataset_names = ['S&P 500', 'N225', 'SSE', 'HSI', 'BSESN', 'SMI', 'BVSP']
data = DataLoader(datasets_original, dataset_names)

# specify drawdown thresholds for crashes (determined in exploration.ipynb):
crash_thresholds = [-0.091, -0.109, -0.120, -0.144, -0.166, -0.110, -0.233] # <-- Jacobsson
#crash_thresholds = [-0.1053, -0.1495, -0.1706, -0.2334, -0.1563, -0.1492, -0.2264] # <-- Sornette
df_combined, drawdowns, crashes = data.get_df_combined(crash_thresholds)

months = 3               # <-- predict if crash n months ahead
select_features = False  # <-- if True: 8 time windows for mean price change and vol year
sequence = 5             # <-- number of days lookback as input(only if select_features=False)
additional_feat = False  # <-- if True: add mean price change and volatility for 4 time widnows over 252 days
batch_size = 64          # <-- batch size needs to be specified to satisfy stateful=True
#vol = False             # <-- if True: include 10 day volatility for each day in sequence (only in prepare_data_2)
dfs_xy = data.get_df_xy(months=months, sequence=sequence, df_combined=df_combined, crashes=crashes, \
                        batch_size=batch_size, select_features=select_features, additional_feat=additional_feat)

In [None]:
# -------------------- RNN LSTM model -------------------- #
model_name = 'RNN LSTM'
neurons = 50
dropout = 0
optimizer = 'adam'
loss = 'mse'   # 'binary_crossentropy'
activation = 'linear'
stateful = True
inp_tsteps = sequence + 8 * additional_feat
def rnn_lstm(inp_tsteps, inp_dim, neurons, dropout):
    model = Sequential()
    #model.add(LSTM(neurons, input_shape=(inp_tsteps, inp_dim), return_sequences=True))
    model.add(LSTM(neurons, batch_input_shape=(batch_size, inp_tsteps, inp_dim), \
                   stateful=stateful, return_sequences=True))
    #model.add(Dropout(dropout))
    #model.add(LSTM(neurons, return_sequences=True))
    model.add(LSTM(neurons, stateful=stateful, return_sequences=False))
    #model.add(Dropout(dropout))
    model.add(Dense(1, activation=activation))   # 'sigmoid'
    return model
model = rnn_lstm(neurons=neurons, inp_tsteps=inp_tsteps, inp_dim=1, dropout=dropout)
model.compile(loss=loss, optimizer=optimizer)
model.summary()

In [None]:
# -------------------- Train and test RNN LSTM model -------------------- #
epochs = 100
y_train_all = []
y_test_all = []
y_pred_tr_all = []
y_pred_t_all = []
os.chdir('/home/roman/Documents/Projects/Bubbles/models/model_weights/')
for test_data in dataset_names:
    model = rnn_lstm(neurons=neurons, inp_tsteps=inp_tsteps, inp_dim=1, dropout=dropout)
    model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy']) 
    print('------------------------------------------------------')
    print('Testdata: ' + str(test_data))
    print('------------------------------------------------------')
    for e in range(epochs):
        np_train_l = data.get_train_stateful(dfs_xy, dataset_names, test_data=test_data)
        for i, np_tr in enumerate(np_train_l):
            print('epoch: ' + str(e + 1) + ' dataset: ' + str(i + 1))
            x_tr = np_tr[:, 0:-1]             
            x_tr = np.expand_dims(x_tr, axis=2)
            y_tr = np_tr[:, -1].astype(int)   
            model.fit(x_tr, y_tr, epochs=1, batch_size=batch_size, verbose=1, shuffle=False)
            model.reset_states()
            if (e + 1) % 5 == 0 and i == len(np_train_l) - 1:
                model.save_weights('stateful_{0}_{1}.hdf5'.format(test_data, e + 1))

In [None]:
# -------------------- Load weights and predict results -------------------- #
epochs = 2 # <-- change to load model weights from previous epochs
y_train_all = []
y_test_all = []
y_pred_tr_all = []
y_pred_t_all = []
for test_data in dataset_names:
    np_train, np_test = data.get_train_test(dfs_xy, dataset_names, test_data=test_data)
    x_train = np_train[:, 0:-1]             
    x_train = np.expand_dims(x_train, axis=2)
    y_train = np_train[:, -1].astype(int)   
    y_train_all.append(y_train)
    x_test = np_test[:, 0:-1]               
    x_test = np.expand_dims(x_test, axis=2)
    y_test = np_test[:, -1].astype(int) 
    y_test_all.append(y_test)
    model.load_weights('stateful_{0}_{1}.hdf5'.format(test_data, epochs))
    y_pred_tr = model.predict(x_train, batch_size=batch_size) 
    y_pred_tr_all.append(y_pred_tr)
    y_pred_t = model.predict(x_test, batch_size=batch_size)
    y_pred_t_all.append(y_pred_t)

In [None]:
# -------------------- Find best threshold -------------------- #
#pct_pos = 0.10   # <-- tune: increase leads to higher recall, less precision
beta = 2
precision_tr_all, recall_tr_all, accuracy_tr_all = [], [], []
precision_t_all, recall_t_all, accuracy_t_all = [], [], [] 
fbeta_tr_all, fbeta_t_all = [], []
thresholds = [0.02, 0.0225, 0.025, 0.0275, 0.03, 0.0325, 0.035, 0.0375, 0.040, 0.0425, 0.045]
for threshold in thresholds:
    precision_tr, recall_tr, accuracy_tr = [], [], []
    precision_t, recall_t, accuracy_t = [], [], []
    y_pred_t_bin_all, y_pred_tr_bin_all = [], []
    score_fbeta_tr, score_fbeta_t = [], []
    for y_train, y_test, y_pred_tr, y_pred_t in zip(y_train_all, y_test_all, \
                                                    y_pred_tr_all, y_pred_t_all):
        #y_pred_tr_bin = y_pred_tr > np.percentile(y_pred_tr, 100 * (1-pct_pos))
        y_pred_tr_bin = y_pred_tr > threshold
        y_pred_tr_bin = y_pred_tr_bin.astype(int)
        y_pred_tr_bin_all.append(y_pred_tr_bin)
        precision_tr.append(metrics.precision_score(y_train, y_pred_tr_bin))
        recall_tr.append(metrics.recall_score(y_train, y_pred_tr_bin))
        accuracy_tr.append(metrics.accuracy_score(y_train, y_pred_tr_bin))
        score_fbeta_tr.append(metrics.fbeta_score(y_train, y_pred_tr_bin, beta=beta))
        #y_pred_t_bin = y_pred_t > np.percentile(y_pred_t, 100 * (1-pct_pos))
        y_pred_t_bin = y_pred_t > threshold
        y_pred_t_bin = y_pred_t_bin.astype(int)
        y_pred_t_bin_all.append(y_pred_t_bin)
        precision_t.append(metrics.precision_score(y_test, y_pred_t_bin))
        recall_t.append(metrics.recall_score(y_test, y_pred_t_bin))
        accuracy_t.append(metrics.accuracy_score(y_test, y_pred_t_bin))
        score_fbeta_t.append(metrics.fbeta_score(y_test, y_pred_t_bin, beta=beta))
    precision_tr_all.append(np.mean(precision_tr)) 
    precision_t_all.append(np.mean(precision_t)) 
    recall_tr_all.append(np.mean(recall_tr)) 
    recall_t_all.append(np.mean(recall_t))
    accuracy_tr_all.append(np.mean(accuracy_tr)) 
    accuracy_t_all.append(np.mean(accuracy_t))
    fbeta_tr_all.append(np.mean(score_fbeta_tr))
    fbeta_t_all.append(np.mean(score_fbeta_t))
rcParams['figure.figsize'] = 14, 4
plt.subplot(1,3,1)
plt.plot(thresholds, precision_tr_all, color='blue')
plt.plot(thresholds, precision_t_all, color='red')
plt.title('Precision by threshold')
plt.xlabel('Threshold')
plt.ylabel('Precision')
plt.legend(['training set', 'test set'])
plt.grid()
plt.subplot(1,3,2)
plt.plot(thresholds, recall_tr_all, color='blue')
plt.plot(thresholds, recall_t_all, color='red')
plt.title('Recall by threshold')
plt.xlabel('Threshold')
plt.ylabel('Recall')
plt.legend(['training set', 'test set'])
plt.grid()
plt.subplot(1,3,3)
plt.plot(thresholds, fbeta_tr_all, color='blue')
plt.plot(thresholds, fbeta_t_all, color='red')
plt.title('F-beta score by threshold')
plt.xlabel('Threshold')
plt.ylabel('F-beta score')
plt.legend(['training set', 'test set'])
plt.grid()
plt.tight_layout()
plt.show()

In [None]:
# -------------------- Evaluate results -------------------- #
#pct_pos = 0.10   # <-- tune: increase leads to higher recall, less precision
threshold = 0.035
precision_tr, recall_tr, accuracy_tr = [], [], []
precision_t, recall_t, accuracy_t = [], [], []
y_pred_t_bin_all, y_pred_tr_bin_all = [], []
score_fbeta_tr, score_fbeta_t = [], []
for y_train, y_test, y_pred_tr, y_pred_t in zip(y_train_all, y_test_all, \
                                                y_pred_tr_all, y_pred_t_all):
    #y_pred_tr_bin = y_pred_tr > np.percentile(y_pred_tr, 100 * (1-pct_pos))
    y_pred_tr_bin = y_pred_tr > threshold
    y_pred_tr_bin = y_pred_tr_bin.astype(int)
    y_pred_tr_bin_all.append(y_pred_tr_bin)
    precision_tr.append(metrics.precision_score(y_train, y_pred_tr_bin))
    recall_tr.append(metrics.recall_score(y_train, y_pred_tr_bin))
    accuracy_tr.append(metrics.accuracy_score(y_train, y_pred_tr_bin))
    score_fbeta_tr.append(metrics.fbeta_score(y_train, y_pred_tr_bin, beta=beta))
    #y_pred_t_bin = y_pred_t > np.percentile(y_pred_t, 100 * (1-pct_pos))
    y_pred_t_bin = y_pred_t > threshold
    y_pred_t_bin = y_pred_t_bin.astype(int)
    y_pred_t_bin_all.append(y_pred_t_bin)
    precision_t.append(metrics.precision_score(y_test, y_pred_t_bin))
    recall_t.append(metrics.recall_score(y_test, y_pred_t_bin))
    accuracy_t.append(metrics.accuracy_score(y_test, y_pred_t_bin))
    score_fbeta_t.append(metrics.fbeta_score(y_test, y_pred_t_bin, beta=beta))

y_tr_pred_pos = [np.mean(y_pred) for y_pred in y_pred_tr_bin_all]
y_t_pred_pos = [np.mean(y_pred) for y_pred in y_pred_t_bin_all]
d = {'precision_tr': np.round(precision_tr,2), \
     'recall_tr': np.round(recall_tr,2), 'accuracy_tr': np.round(accuracy_tr,2), \
     'score_fbeta_tr': np.round(score_fbeta_tr,2), \
     'precision_t': np.round(precision_t,2), \
     'recall_t': np.round(recall_t,2), 'accuracy_t': np.round(accuracy_t,2), \
     'score_fbeta_t': np.round(score_fbeta_t,2)}
results = pd.DataFrame.from_dict(d, orient='index')
results.columns = dataset_names
print('Results - ' + model_name + ':')
print('\n')
print('Predict crash in:         ' + str(months) + ' months')
print('Number of epochs:         ' + str(epochs))
print('Sequence length:          ' + str(sequence))
print('Number of neurons/layer: ' + str(neurons))
print('Batch size:               ' + str(batch_size))
print('Optimizer:                ' + str(optimizer))
print('Loss function:            ' + str(loss))
print('\n')
print('Results for each train/test split:')
print(results)
print('\n')

# calculate precision, recall, accuracy for comparable random model
sum_tr = 0
sum_t = 0
pos_tr = 0
pos_t = 0
sum_tr_pred = 0
sum_t_pred = 0
pos_tr_pred = 0
pos_t_pred = 0
for y_tr, y_t, y_tr_pr, y_t_pr in zip(y_train_all, y_test_all, y_pred_tr_bin_all, \
                y_pred_t_bin_all):
    sum_tr += len(y_tr)
    pos_tr += sum(y_tr)
    sum_t += len(y_t)
    pos_t += sum(y_t)
    sum_tr_pred += len(y_tr_pr)
    sum_t_pred += len(y_t_pr)
    pos_tr_pred += sum(y_tr_pr)[0]
    pos_t_pred += sum(y_t_pr)[0]
y_train_pos_actual = pos_tr / sum_tr
y_train_pos_pred = pos_tr_pred / sum_tr_pred
rnd_TP = y_train_pos_pred * y_train_pos_actual
rnd_FP = y_train_pos_pred * (1 - y_train_pos_actual)
rnd_TN = (1 - y_train_pos_pred) * (1 - y_train_pos_actual)
rnd_FN = (1 - y_train_pos_pred) * y_train_pos_actual
rnd_pr_tr = rnd_TP / (rnd_TP + rnd_FP)
rnd_re_tr = rnd_TP / (rnd_TP + rnd_FN)
rnd_ac_tr = rnd_TP + rnd_TN
y_test_pos_actual = pos_t / sum_t
y_test_pos_pred = pos_t_pred / sum_t_pred
rnd_TP = y_test_pos_pred * y_test_pos_actual
rnd_FP = y_test_pos_pred * (1 - y_test_pos_actual)
rnd_TN = (1 - y_test_pos_pred) * (1 - y_test_pos_actual)
rnd_FN = (1 - y_test_pos_pred) * y_test_pos_actual
rnd_pr_t = rnd_TP / (rnd_TP + rnd_FP)
rnd_re_t = rnd_TP / (rnd_TP + rnd_FN)
rnd_ac_t = rnd_TP + rnd_TN

print('Results average over all train/test splits:')
print('Number of features: ' + str(sequence) + '; number of rows: ' \
      + str(sum_tr + sum_t))
print('Positive train cases actual:        '+ str(round(y_train_pos_actual, 2)))
print('Positive train cases predicted:     '+ str(round(y_train_pos_pred, 2)))
print('Avg precision train (model/random): '+ str(round(np.mean(precision_tr), 2)) +' / '+str(round(rnd_pr_tr, 2)))
print('Avg recall train (model/random):    '+ str(round(np.mean(recall_tr), 2))+' / '+str(round(rnd_re_tr, 2)))
print('Avg accuracy train (model/random):  '+ str(round(np.mean(accuracy_tr), 2))+' / '+str(round(rnd_ac_tr, 2)))
print('Score train fbeta:                  '+ str(round(np.mean(score_fbeta_tr), 2)))
print('Positive test cases actual:         '+ str(round(y_test_pos_actual, 2)))
print('Positive test cases predicted:      '+ str(round(y_test_pos_pred, 2)))
print('Avg precision test (model/random):  '+ str(round(np.mean(precision_t), 2))+' / '+str(round(rnd_pr_t, 2)))
print('Avg recall test (model/random):     '+ str(round(np.mean(recall_t), 2))+' / '+str(round(rnd_re_t, 2)))
print('Avg accuracy test (model/random):   '+ str(round(np.mean(accuracy_t), 2))+' / '+str(round(rnd_ac_t, 2)))
print('Score test fbeta:                   '+ str(round(np.mean(score_fbeta_t), 2)))

In [None]:
# -------------------- Plot results -------------------- #
test_data = 'S&P 500'
i = [i for i, name in enumerate(dataset_names) if name == test_data][0]
dfs_predict = data.split_results(df_combined, dfs_xy, dataset_names, test_data, \
            y_pred_t_bin_all[i], y_pred_tr_bin_all[i], y_train_all[i], y_test_all[i])
df = dfs_predict[i]
c = crashes[i]
t_start = [datetime.strptime('1984-01-01', '%Y-%m-%d'), datetime.strptime('1994-01-01', '%Y-%m-%d'), \
           datetime.strptime('2007-01-01', '%Y-%m-%d')]
t_end = [datetime.strptime('1991-01-01', '%Y-%m-%d'), \
           datetime.strptime('2002-01-01', '%Y-%m-%d'), datetime.strptime('2010-01-01', '%Y-%m-%d')]
rcParams['figure.figsize'] = 10, 6
for t1, t2 in zip(t_start, t_end):
    gs = gridspec.GridSpec(3, 1, height_ratios=[2.5, 1, 1])
    plt.subplot(gs[0])
    y_start = list(df[t1:t2][df.loc[t1:t2, 'y'].diff(-1) < 0].index)
    y_end = list(df[t1:t2][df.loc[t1:t2, 'y'].diff(-1) > 0].index)
    crash_st = list(filter(lambda x: x > t1 and x < t2, c['crash_st']))
    crash_end = list(filter(lambda x: x > t1 and x < t2, c['crash_end']))
    [plt.axvspan(x1, x2, alpha=0.2, color='red') for x1, x2 in zip(y_start, y_end)]
    [plt.axvspan(x1, x2, alpha=0.5, color='red') for x1, x2 in zip(crash_st, crash_end)]
    df_norm = df['price'][t1:t2] / df['price'][t1:t2].max()
    plt.plot(df_norm[t1:t2], color='blue') 
    plt.title(model_name + ' Testcase: ' + test_data + ' ' + str(t1.year) + '-' \
              + str(t2.year))
    plt.legend(['price', 'downturn / crash'])
    plt.xticks([])
    plt.grid()     
    plt.subplot(gs[1])
    plt.plot(df.loc[t1:t2, 'vol'])
    [plt.axvspan(x1, x2, alpha=0.2, color='red') for x1, x2 in zip(y_start, y_end)]
    [plt.axvspan(x1, x2, alpha=0.5, color='red') for x1, x2 in zip(crash_st, crash_end)]
    plt.legend(['Volatility'])
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.grid()
    plt.xticks([])
    plt.subplot(gs[2])
    plt.plot(df['y'][t1:t2], color='black')
    plt.plot(df['y_pred'][t1:t2].rolling(10).mean(), color='darkred', linewidth=0.8)
    [plt.axvspan(x1, x2, alpha=0.2, color='red') for x1, x2 in zip(y_start, y_end)]
    [plt.axvspan(x1, x2, alpha=0.5, color='red') for x1, x2 in zip(crash_st, crash_end)]
    plt.legend(['crash within 6m', 'crash predictor'])
    plt.ylim(0, 1.1)
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.grid()
    plt.show()  

test_data = 'N225'
i = [i for i, name in enumerate(dataset_names) if name == test_data][0]
dfs_predict = data.split_results(df_combined, dfs_xy, dataset_names, test_data, \
            y_pred_t_bin_all[i], y_pred_tr_bin_all[i], y_train_all[i], y_test_all[i])
df = dfs_predict[i]
c = crashes[i]
t_start = [datetime.strptime('1984-01-01', '%Y-%m-%d'), datetime.strptime('2005-01-01', '%Y-%m-%d'), \
           datetime.strptime('2013-01-01', '%Y-%m-%d')]
t_end = [datetime.strptime('1992-01-01', '%Y-%m-%d'), \
           datetime.strptime('2012-01-01', '%Y-%m-%d'), datetime.strptime('2017-01-01', '%Y-%m-%d')]
rcParams['figure.figsize'] = 10, 6
for t1, t2 in zip(t_start, t_end):
    gs = gridspec.GridSpec(3, 1, height_ratios=[2.5, 1, 1])
    plt.subplot(gs[0])
    y_start = list(df[t1:t2][df.loc[t1:t2, 'y'].diff(-1) < 0].index)
    y_end = list(df[t1:t2][df.loc[t1:t2, 'y'].diff(-1) > 0].index)
    crash_st = list(filter(lambda x: x > t1 and x < t2, c['crash_st']))
    crash_end = list(filter(lambda x: x > t1 and x < t2, c['crash_end']))
    [plt.axvspan(x1, x2, alpha=0.2, color='red') for x1, x2 in zip(y_start, y_end)]
    [plt.axvspan(x1, x2, alpha=0.5, color='red') for x1, x2 in zip(crash_st, crash_end)]
    df_norm = df['price'][t1:t2] / df['price'][t1:t2].max()
    plt.plot(df_norm[t1:t2], color='blue') 
    plt.title(model_name + ' Testcase: ' + test_data + ' ' + str(t1.year) + '-' \
              + str(t2.year))
    plt.legend(['price', 'downturn / crash'])
    plt.xticks([])
    plt.grid()     
    plt.subplot(gs[1])
    plt.plot(df.loc[t1:t2, 'vol'])
    [plt.axvspan(x1, x2, alpha=0.2, color='red') for x1, x2 in zip(y_start, y_end)]
    [plt.axvspan(x1, x2, alpha=0.5, color='red') for x1, x2 in zip(crash_st, crash_end)]
    plt.legend(['Volatility'])
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.grid()
    plt.xticks([])
    plt.subplot(gs[2])
    plt.plot(df['y'][t1:t2], color='black')
    plt.plot(df['y_pred'][t1:t2].rolling(10).mean(), color='darkred', linewidth=0.8)
    [plt.axvspan(x1, x2, alpha=0.2, color='red') for x1, x2 in zip(y_start, y_end)]
    [plt.axvspan(x1, x2, alpha=0.5, color='red') for x1, x2 in zip(crash_st, crash_end)]
    plt.legend(['crash within 6m', 'crash predictor'])
    plt.ylim(0, 1.1)
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.grid()
    plt.show()      
 
    
test_data = 'SSE'
i = [i for i, name in enumerate(dataset_names) if name == test_data][0]
dfs_predict = data.split_results(df_combined, dfs_xy, dataset_names, test_data, \
            y_pred_t_bin_all[i], y_pred_tr_bin_all[i], y_train_all[i], y_test_all[i])
df = dfs_predict[i]
c = crashes[i]
t_start = [df.index[0], datetime.strptime('2004-01-01', '%Y-%m-%d'), \
           datetime.strptime('2010-01-01', '%Y-%m-%d')]
t_end = [datetime.strptime('2002-01-01', '%Y-%m-%d'), \
           datetime.strptime('2010-01-01', '%Y-%m-%d'), df.index[-1]]
rcParams['figure.figsize'] = 10, 6
for t1, t2 in zip(t_start, t_end):
    gs = gridspec.GridSpec(3, 1, height_ratios=[2.5, 1, 1])
    plt.subplot(gs[0])
    y_start = list(df[t1:t2][df.loc[t1:t2, 'y'].diff(-1) < 0].index)
    y_end = list(df[t1:t2][df.loc[t1:t2, 'y'].diff(-1) > 0].index)
    crash_st = list(filter(lambda x: x > t1 and x < t2, c['crash_st']))
    crash_end = list(filter(lambda x: x > t1 and x < t2, c['crash_end']))
    [plt.axvspan(x1, x2, alpha=0.2, color='red') for x1, x2 in zip(y_start, y_end)]
    [plt.axvspan(x1, x2, alpha=0.5, color='red') for x1, x2 in zip(crash_st, crash_end)]
    df_norm = df['price'][t1:t2] / df['price'][t1:t2].max()
    plt.plot(df_norm[t1:t2], color='blue') 
    plt.title(model_name + ' Testcase: ' + test_data + ' ' + str(t1.year) + '-' \
              + str(t2.year))
    plt.legend(['price', 'downturn / crash'])
    plt.xticks([])
    plt.grid()     
    plt.subplot(gs[1])
    plt.plot(df.loc[t1:t2, 'vol'])
    [plt.axvspan(x1, x2, alpha=0.2, color='red') for x1, x2 in zip(y_start, y_end)]
    [plt.axvspan(x1, x2, alpha=0.5, color='red') for x1, x2 in zip(crash_st, crash_end)]
    plt.legend(['Volatility'])
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.grid()
    plt.xticks([])
    plt.subplot(gs[2])
    plt.plot(df['y'][t1:t2], color='black')
    plt.plot(df['y_pred'][t1:t2].rolling(10).mean(), color='darkred', linewidth=0.8)
    [plt.axvspan(x1, x2, alpha=0.2, color='red') for x1, x2 in zip(y_start, y_end)]
    [plt.axvspan(x1, x2, alpha=0.5, color='red') for x1, x2 in zip(crash_st, crash_end)]
    plt.legend(['crash within 6m', 'crash predictor'])
    plt.ylim(0, 1.1)
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.grid()
    plt.show()  
    
test_data = 'HSI'
i = [i for i, name in enumerate(dataset_names) if name == test_data][0]
dfs_predict = data.split_results(df_combined, dfs_xy, dataset_names, test_data, \
            y_pred_t_bin_all[i], y_pred_tr_bin_all[i], y_train_all[i], y_test_all[i])
df = dfs_predict[i]
c = crashes[i]
t_start = [df.index[0], datetime.strptime('1995-01-01', '%Y-%m-%d'), \
           datetime.strptime('2005-01-01', '%Y-%m-%d')]
t_end = [datetime.strptime('1990-01-01', '%Y-%m-%d'), \
           datetime.strptime('1999-01-01', '%Y-%m-%d'), datetime.strptime('2010-01-01', '%Y-%m-%d')]
rcParams['figure.figsize'] = 10, 6
for t1, t2 in zip(t_start, t_end):
    gs = gridspec.GridSpec(3, 1, height_ratios=[2.5, 1, 1])
    plt.subplot(gs[0])
    y_start = list(df[t1:t2][df.loc[t1:t2, 'y'].diff(-1) < 0].index)
    y_end = list(df[t1:t2][df.loc[t1:t2, 'y'].diff(-1) > 0].index)
    crash_st = list(filter(lambda x: x > t1 and x < t2, c['crash_st']))
    crash_end = list(filter(lambda x: x > t1 and x < t2, c['crash_end']))
    [plt.axvspan(x1, x2, alpha=0.2, color='red') for x1, x2 in zip(y_start, y_end)]
    [plt.axvspan(x1, x2, alpha=0.5, color='red') for x1, x2 in zip(crash_st, crash_end)]
    df_norm = df['price'][t1:t2] / df['price'][t1:t2].max()
    plt.plot(df_norm[t1:t2], color='blue') 
    plt.title(model_name + ' Testcase: ' + test_data + ' ' + str(t1.year) + '-' \
              + str(t2.year))
    plt.legend(['price', 'downturn / crash'])
    plt.xticks([])
    plt.grid()     
    plt.subplot(gs[1])
    plt.plot(df.loc[t1:t2, 'vol'])
    [plt.axvspan(x1, x2, alpha=0.2, color='red') for x1, x2 in zip(y_start, y_end)]
    [plt.axvspan(x1, x2, alpha=0.5, color='red') for x1, x2 in zip(crash_st, crash_end)]
    plt.legend(['Volatility'])
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.grid()
    plt.xticks([])
    plt.subplot(gs[2])
    plt.plot(df['y'][t1:t2], color='black')
    plt.plot(df['y_pred'][t1:t2].rolling(10).mean(), color='darkred', linewidth=0.8)
    [plt.axvspan(x1, x2, alpha=0.2, color='red') for x1, x2 in zip(y_start, y_end)]
    [plt.axvspan(x1, x2, alpha=0.5, color='red') for x1, x2 in zip(crash_st, crash_end)]
    plt.legend(['crash within 6m', 'crash predictor'])
    plt.ylim(0, 1.1)
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.grid()
    plt.show()  
    
test_data = 'BSESN'
i = [i for i, name in enumerate(dataset_names) if name == test_data][0]
dfs_predict = data.split_results(df_combined, dfs_xy, dataset_names, test_data, \
            y_pred_t_bin_all[i], y_pred_tr_bin_all[i], y_train_all[i], y_test_all[i])
df = dfs_predict[i]
c = crashes[i]
t_start = [datetime.strptime('1995-01-01', '%Y-%m-%d'), datetime.strptime('2005-01-01', '%Y-%m-%d')]
t_end = [datetime.strptime('2003-01-01', '%Y-%m-%d'), \
           datetime.strptime('2010-01-01', '%Y-%m-%d')]
rcParams['figure.figsize'] = 10, 6
for t1, t2 in zip(t_start, t_end):
    gs = gridspec.GridSpec(3, 1, height_ratios=[2.5, 1, 1])
    plt.subplot(gs[0])
    y_start = list(df[t1:t2][df.loc[t1:t2, 'y'].diff(-1) < 0].index)
    y_end = list(df[t1:t2][df.loc[t1:t2, 'y'].diff(-1) > 0].index)
    crash_st = list(filter(lambda x: x > t1 and x < t2, c['crash_st']))
    crash_end = list(filter(lambda x: x > t1 and x < t2, c['crash_end']))
    [plt.axvspan(x1, x2, alpha=0.2, color='red') for x1, x2 in zip(y_start, y_end)]
    [plt.axvspan(x1, x2, alpha=0.5, color='red') for x1, x2 in zip(crash_st, crash_end)]
    df_norm = df['price'][t1:t2] / df['price'][t1:t2].max()
    plt.plot(df_norm[t1:t2], color='blue') 
    plt.title(model_name + ' Testcase: ' + test_data + ' ' + str(t1.year) + '-' \
              + str(t2.year))
    plt.legend(['price', 'downturn / crash'])
    plt.xticks([])
    plt.grid()     
    plt.subplot(gs[1])
    plt.plot(df.loc[t1:t2, 'vol'])
    [plt.axvspan(x1, x2, alpha=0.2, color='red') for x1, x2 in zip(y_start, y_end)]
    [plt.axvspan(x1, x2, alpha=0.5, color='red') for x1, x2 in zip(crash_st, crash_end)]
    plt.legend(['Volatility'])
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.grid()
    plt.xticks([])
    plt.subplot(gs[2])
    plt.plot(df['y'][t1:t2], color='black')
    plt.plot(df['y_pred'][t1:t2].rolling(10).mean(), color='darkred', linewidth=0.8)
    [plt.axvspan(x1, x2, alpha=0.2, color='red') for x1, x2 in zip(y_start, y_end)]
    [plt.axvspan(x1, x2, alpha=0.5, color='red') for x1, x2 in zip(crash_st, crash_end)]
    plt.legend(['crash within 6m', 'crash predictor'])
    plt.ylim(0, 1.1)
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.grid()
    plt.show()  

test_data = 'SMI'
i = [i for i, name in enumerate(dataset_names) if name == test_data][0]
dfs_predict = data.split_results(df_combined, dfs_xy, dataset_names, test_data, \
            y_pred_t_bin_all[i], y_pred_tr_bin_all[i], y_train_all[i], y_test_all[i])
df = dfs_predict[i]
c = crashes[i]
t_start = [datetime.strptime('1994-01-01', '%Y-%m-%d'), datetime.strptime('2000-01-01', '%Y-%m-%d'), \
           datetime.strptime('2010-01-01', '%Y-%m-%d')]
t_end = [datetime.strptime('2000-01-01', '%Y-%m-%d'), \
           datetime.strptime('2006-01-01', '%Y-%m-%d'), datetime.strptime('2016-01-01', '%Y-%m-%d')]
rcParams['figure.figsize'] = 10, 6
for t1, t2 in zip(t_start, t_end):
    gs = gridspec.GridSpec(3, 1, height_ratios=[2.5, 1, 1])
    plt.subplot(gs[0])
    y_start = list(df[t1:t2][df.loc[t1:t2, 'y'].diff(-1) < 0].index)
    y_end = list(df[t1:t2][df.loc[t1:t2, 'y'].diff(-1) > 0].index)
    crash_st = list(filter(lambda x: x > t1 and x < t2, c['crash_st']))
    crash_end = list(filter(lambda x: x > t1 and x < t2, c['crash_end']))
    [plt.axvspan(x1, x2, alpha=0.2, color='red') for x1, x2 in zip(y_start, y_end)]
    [plt.axvspan(x1, x2, alpha=0.5, color='red') for x1, x2 in zip(crash_st, crash_end)]
    df_norm = df['price'][t1:t2] / df['price'][t1:t2].max()
    plt.plot(df_norm[t1:t2], color='blue') 
    plt.title(model_name + ' Testcase: ' + test_data + ' ' + str(t1.year) + '-' \
              + str(t2.year))
    plt.legend(['price', 'downturn / crash'])
    plt.xticks([])
    plt.grid()     
    plt.subplot(gs[1])
    plt.plot(df.loc[t1:t2, 'vol'])
    [plt.axvspan(x1, x2, alpha=0.2, color='red') for x1, x2 in zip(y_start, y_end)]
    [plt.axvspan(x1, x2, alpha=0.5, color='red') for x1, x2 in zip(crash_st, crash_end)]
    plt.legend(['Volatility'])
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.grid()
    plt.xticks([])
    plt.subplot(gs[2])
    plt.plot(df['y'][t1:t2], color='black')
    plt.plot(df['y_pred'][t1:t2].rolling(10).mean(), color='darkred', linewidth=0.8)
    [plt.axvspan(x1, x2, alpha=0.2, color='red') for x1, x2 in zip(y_start, y_end)]
    [plt.axvspan(x1, x2, alpha=0.5, color='red') for x1, x2 in zip(crash_st, crash_end)]
    plt.legend(['crash within 6m', 'crash predictor'])
    plt.ylim(0, 1.1)
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.grid()
    plt.show()  

test_data = 'BVSP'
i = [i for i, name in enumerate(dataset_names) if name == test_data][0]
dfs_predict = data.split_results(df_combined, dfs_xy, dataset_names, test_data, \
            y_pred_t_bin_all[i], y_pred_tr_bin_all[i], y_train_all[i], y_test_all[i])
df = dfs_predict[i]
c = crashes[i]
t_start = [df.index[0], datetime.strptime('2004-01-01', '%Y-%m-%d')]
t_end = [datetime.strptime('2000-01-01', '%Y-%m-%d'), \
           datetime.strptime('2010-01-01', '%Y-%m-%d')]
rcParams['figure.figsize'] = 10, 6
for t1, t2 in zip(t_start, t_end):
    gs = gridspec.GridSpec(3, 1, height_ratios=[2.5, 1, 1])
    plt.subplot(gs[0])
    y_start = list(df[t1:t2][df.loc[t1:t2, 'y'].diff(-1) < 0].index)
    y_end = list(df[t1:t2][df.loc[t1:t2, 'y'].diff(-1) > 0].index)
    crash_st = list(filter(lambda x: x > t1 and x < t2, c['crash_st']))
    crash_end = list(filter(lambda x: x > t1 and x < t2, c['crash_end']))
    [plt.axvspan(x1, x2, alpha=0.2, color='red') for x1, x2 in zip(y_start, y_end)]
    [plt.axvspan(x1, x2, alpha=0.5, color='red') for x1, x2 in zip(crash_st, crash_end)]
    df_norm = df['price'][t1:t2] / df['price'][t1:t2].max()
    plt.plot(df_norm[t1:t2], color='blue') 
    plt.title(model_name + ' Testcase: ' + test_data + ' ' + str(t1.year) + '-' \
              + str(t2.year))
    plt.legend(['price', 'downturn / crash'])
    plt.xticks([])
    plt.grid()     
    plt.subplot(gs[1])
    plt.plot(df.loc[t1:t2, 'vol'])
    [plt.axvspan(x1, x2, alpha=0.2, color='red') for x1, x2 in zip(y_start, y_end)]
    [plt.axvspan(x1, x2, alpha=0.5, color='red') for x1, x2 in zip(crash_st, crash_end)]
    plt.legend(['Volatility'])
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.grid()
    plt.xticks([])
    plt.subplot(gs[2])
    plt.plot(df['y'][t1:t2], color='black')
    plt.plot(df['y_pred'][t1:t2].rolling(10).mean(), color='darkred', linewidth=0.8)
    [plt.axvspan(x1, x2, alpha=0.2, color='red') for x1, x2 in zip(y_start, y_end)]
    [plt.axvspan(x1, x2, alpha=0.5, color='red') for x1, x2 in zip(crash_st, crash_end)]
    plt.legend(['crash within 6m', 'crash predictor'])
    plt.ylim(0, 1.1)
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.grid()
    plt.show()  