## Configuration

In [76]:
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn import linear_model
from random import shuffle
from math import floor
from sklearn.linear_model import LogisticRegression
import pickle

import unos

import matplotlib.pyplot as plt
plt.rcParams['figure.figsize']=(14,8)

global_config = {    
    'n_samples': 5000,
    'n_features': 47,
    'n_experiments': 1,
    
    'train_portion': 0.8,    
    'n_hidden_units_per_layer': 200,
    'batch_size': 128,
    'n_repetitions': 5000, 
    
    'dropout_keep_input': 0.8,
    'dropout_keep_hidden': 0.8,
    'log_level': 50
}

global_config['n_train'] = floor(global_config['n_samples'] * global_config['train_portion'])
global_config['n_test'] = global_config['n_samples'] - global_config['n_train']

## Helper Functions

In [77]:
def entropy(p):
    p = np.array(p)
    return -p * np.log(p) - (1.0-p) * np.log(1.0-p)

def init_weights(shape):
    return tf.Variable(tf.random_normal(shape, stddev=0.01))

def init_weights_xavier(shape):
    initializer = tf.contrib.layers.xavier_initializer_conv2d()
    return tf.Variable(initializer(shape=shape))

def compute_PEHE(TE_true, TE_predict):
    return np.sqrt(np.mean(np.abs(TE_true-TE_predict)**2))

def estimate_propensities(Dataset):
    X_train = Dataset.drop(['Treatment','Response','TE'],axis=1)
    y_train = Dataset['Treatment']
    logmodel     = LogisticRegression()
    logmodel.fit(X_train,y_train)
    PScores      = logmodel.predict_proba(X_train)
    Propensities = np.transpose(PScores)[1,]
    Dataset['Propensity'] = Propensities
    Dataset['Entropy']    = -Propensities*np.log(Propensities)-(1-Propensities)*np.log(1-Propensities)
    return Dataset

def parse_result_list(result_list, exp_name):
    avg_pehe_train = np.mean([np.min(res['pehe_train_vals']) for res in result_list])
    std_pehe_train = np.std([np.min(res['pehe_train_vals']) for res in result_list])
    avg_pehe_test = np.mean([np.min(res['pehe_test_vals']) for res in result_list])
    std_pehe_test = np.std([np.min(res['pehe_test_vals']) for res in result_list])

    print("{}:".format(exp_name))
    print("\t Train: Mean={0:.5f}, Std={1:.5f})\t\tTest: Mean={2:.5f} Std={3:.5f}".format(avg_pehe_train, std_pehe_train, avg_pehe_test, std_pehe_test))

## Define Models

### NN-4 Dropout (with 2 outputs)

In [78]:
def NN_4_multi_4s(X, weights, p_keep_input, p_keep_hidden):
    X = tf.nn.dropout(X, p_keep_input)
    
    # 1st hidden layer (Shared)
    h1 = tf.nn.relu(tf.matmul(X, weights['w_h1']))
    h1 = tf.nn.dropout(h1, p_keep_hidden)
    
    # 2nd hidden layer (Shared)
    h2 = tf.nn.relu(tf.matmul(h1, weights['w_h2']))
    h2 = tf.nn.dropout(h2, p_keep_hidden)
    
    # 3rd hidden layer (Shared)
    h3 = tf.nn.relu(tf.matmul(h2, weights['w_h3']))
    h3 = tf.nn.dropout(h3, p_keep_hidden)
    
    # 4th hidden layer (Shared)
    h4 = tf.nn.relu(tf.matmul(h3, weights['w_h4']))
    h4 = tf.nn.dropout(h4, p_keep_hidden)
    
    # Output
    Y_0_out = tf.matmul(h4, weights['w_out_0']) 
    Y_1_out = tf.matmul(h4, weights['w_out_1'])
    
    return Y_0_out, Y_1_out

def NN_4_multi_2s_2i(X, weights, p_keep_input, p_keep_hidden):
    X = tf.nn.dropout(X, p_keep_input)
    
    # 1st hidden layer (Shared)
    h1 = tf.nn.relu(tf.matmul(X, weights['w_h1']))
    h1 = tf.nn.dropout(h1, p_keep_hidden)
    
    # 2nd hidden layer (Shared)
    h2 = tf.nn.relu(tf.matmul(h1, weights['w_h2']))
    h2 = tf.nn.dropout(h2, p_keep_hidden)
    
    # 3rd hidden layer (idiosyncratic for Y0)
    h3 = tf.nn.relu(tf.matmul(h2, weights['w_h3']))
    h3 = tf.nn.dropout(h3, p_keep_hidden)
    
    # 4th hidden layer (idiosyncratic for Y1)
    h4 = tf.nn.relu(tf.matmul(h2, weights['w_h4']))
    h4 = tf.nn.dropout(h4, p_keep_hidden)
    
    # Output
    Y_0_out = tf.matmul(h3, weights['w_out_0']) 
    Y_1_out = tf.matmul(h4, weights['w_out_1'])
    
    return Y_0_out, Y_1_out

def NN_4_multi_2s_4i(X, weights, p_keep_input, p_keep_hidden):
    X = tf.nn.dropout(X, p_keep_input)
    
    # 1st hidden layer (Shared)
    h1 = tf.nn.relu(tf.matmul(X, weights['w_h1']))
    h1 = tf.nn.dropout(h1, p_keep_hidden)
    
    # 2nd hidden layer (Shared)
    h2 = tf.nn.relu(tf.matmul(h1, weights['w_h2']))
    h2 = tf.nn.dropout(h2, p_keep_hidden)
    
    # 3rd hidden layer  (idiosyncratic for Y0) -- 1
    h3 = tf.nn.relu(tf.matmul(h2, weights['w_h3']))
    h3 = tf.nn.dropout(h3, p_keep_hidden)
    
    # 3rd hidden layer (idiosyncratic for Y0)  -- 2
    h3_2 = tf.nn.relu(tf.matmul(h3, weights['w_h3_2']))
    h3_2 = tf.nn.dropout(h3_2, p_keep_hidden)
    
    # 4th hidden layer (idiosyncratic for Y1) -- 1
    h4 = tf.nn.relu(tf.matmul(h2, weights['w_h4']))
    h4 = tf.nn.dropout(h4, p_keep_hidden)
    
    # 4th hidden layer (idiosyncratic for Y1) -- 2
    h4_2 = tf.nn.relu(tf.matmul(h4, weights['w_h4_2']))
    h4_2 = tf.nn.dropout(h4_2, p_keep_hidden)
    
    
    # Output
    Y_0_out = tf.matmul(h3_2, weights['w_out_0']) 
    Y_1_out = tf.matmul(h4_2, weights['w_out_1'])
    
    return Y_0_out, Y_1_out

In [79]:
def run_nn4_multi(dataset_train, dataset_test, no_dropout=False, show_log=False, architecture='4s'):
    
    # Define Placeholders and Init Weights
    X = tf.placeholder("float", [None, global_config['n_features']])
    Y = tf.placeholder("float", [None, 1])

    p_keep_input = tf.placeholder("float")
    p_keep_hidden = tf.placeholder("float")

    Y_0         = tf.placeholder("float", shape=[None, 1])       # Task 1 output
    Y_1         = tf.placeholder("float", shape=[None, 1])       # Task 2 output

    weights = {
        'w_h1': init_weights_xavier([global_config['n_features'], global_config['n_hidden_units_per_layer']]),
        'w_h2': init_weights_xavier([global_config['n_hidden_units_per_layer'], global_config['n_hidden_units_per_layer']]),
        'w_h3': init_weights_xavier([global_config['n_hidden_units_per_layer'], global_config['n_hidden_units_per_layer']]),
        'w_h3_2': init_weights_xavier([global_config['n_hidden_units_per_layer'], global_config['n_hidden_units_per_layer']]),
        'w_h4': init_weights_xavier([global_config['n_hidden_units_per_layer'], global_config['n_hidden_units_per_layer']]),
        'w_h4_2': init_weights_xavier([global_config['n_hidden_units_per_layer'], global_config['n_hidden_units_per_layer']]),
        'w_out_0': init_weights_xavier([global_config['n_hidden_units_per_layer'], 1]),
        'w_out_1': init_weights_xavier([global_config['n_hidden_units_per_layer'], 1])
    }

    # Parse Shared Layers Flag
    if architecture == '4s':
        pred_Y0, pred_Y1 = NN_4_multi_4s(X, weights, p_keep_input, p_keep_hidden)
    elif architecture == '4s_2i':
        pred_Y0, pred_Y1 = NN_4_multi_2s_2i(X, weights, p_keep_input, p_keep_hidden) 
    elif architecture == '4s_4i':
        pred_Y0, pred_Y1 = NN_4_multi_2s_4i(X, weights, p_keep_input, p_keep_hidden) 
    else: 
        raise Exception('Invalid architecture passed ({})'.format(architecture))

    cost0     = tf.nn.l2_loss(Y_0-pred_Y0)
    cost1     = tf.nn.l2_loss(Y_1-pred_Y1)

    optim0    = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost0)
    optim1    = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost1)
    
    # Prepare Datasets
    X_train = dataset_train.drop(['Response', 'TE', 'Treatment', 'Propensity', 'Entropy'], axis=1).as_matrix() 
    TE_train = np.reshape(dataset_train['TE'].as_matrix(), [global_config['n_train'], 1])

    X_test = dataset_test.drop(['Response', 'TE', 'Treatment', 'Propensity', 'Entropy'], axis=1).as_matrix() 
    TE_test = np.reshape(dataset_test['TE'].as_matrix(), [global_config['n_test'], 1])


    
    Y_train = np.reshape(dataset_train['Response'].as_matrix(), [global_config['n_train'], 1])
    Y_test = np.reshape(dataset_test['Response'].as_matrix(), [global_config['n_test'], 1])
    
    # Parse Dropout Flag
    if no_dropout:
        dropout_keep_input = 1.0
        dropout_keep_hidden = 1.0
    else:
        dropout_keep_input = global_config['dropout_keep_input']
        dropout_keep_hidden = global_config['dropout_keep_hidden']
        
    
    # Start Training
    mses_train_0 = []
    mses_train_1 = []

    pehes_train = []
    pehes_test = []

    Y0_loss = 0
    Y1_loss = 0
    with tf.Session() as sess:
        tf.global_variables_initializer().run()

        for i in range(global_config['n_repetitions']):
            if i%2 == 0:
                _, Y0_loss = sess.run([optim0, cost0], feed_dict= {
                              X  : X_train[dataset_train['Treatment']==0], 
                              Y_0: Y_train[dataset_train['Treatment']==0],
                              p_keep_input: dropout_keep_input,
                              p_keep_hidden: dropout_keep_hidden})
            else:
                _, Y1_loss = sess.run([optim1, cost1], feed_dict= {
                              X  : X_train[dataset_train['Treatment']==1], 
                              Y_1: Y_train[dataset_train['Treatment']==1],
                              p_keep_input: dropout_keep_input,
                              p_keep_hidden: dropout_keep_hidden})

            # Compute Training Error
            mses_train_0.append(Y0_loss)
            mses_train_1.append(Y1_loss)


            # Predict TE
            Y0_predict_train = sess.run(pred_Y0, feed_dict={X: X_train, p_keep_input: 1.0,
                              p_keep_hidden: 1.0})
            Y1_predict_train = sess.run(pred_Y1, feed_dict={X: X_train, p_keep_input: 1.0,
                              p_keep_hidden: 1.0})

            TE_true_train               = TE_train
            TE_predict_train            = np.array(Y1_predict_train)-np.array(Y0_predict_train)

            Y0_predict_test = sess.run(pred_Y0, feed_dict={X: X_test, p_keep_input: 1.0,
                              p_keep_hidden: 1.0})
            Y1_predict_test = sess.run(pred_Y1, feed_dict={X: X_test, p_keep_input: 1.0,
                              p_keep_hidden: 1.0})

            TE_true_test               = TE_test
            TE_predict_test            = np.array(Y1_predict_test)-np.array(Y0_predict_test)

            # Compute PEHE
            pehe_train = compute_PEHE(TE_true_train, TE_predict_train)
            pehes_train.append(pehe_train)

            pehe_test = compute_PEHE(TE_true_test, TE_predict_test)
            pehes_test.append(pehe_test)

            if show_log and i % global_config['log_level'] == 0:
                print('#{}. \tMSE Y0: {} \tMSE Y1: {}\t PEHE: {}'.format(i, Y0_loss, Y1_loss, pehe_train))

        sess.close()

    result_dict = {
        "pehe_train_vals": pehes_train,
        "pehe_test_vals": pehe_test
    }
    
    return result_dict

## Topological Random Search 

In [80]:
nn4_multi_4s_all_results = []      # NN4 2 outcomes 4 shared
nn4_multi_2s_2i_all_results = []   # NN4 2 outcomes 2 shared, 2 idiosyncratic (total)
nn4_multi_2s_4i_all_results = []   # NN4 2 outcomes 2 shared, 2 idiosyncratic (each)

unos_generator = unos.UNOS_data('unos/unos_sample.csv')

for i in range(global_config['n_experiments']):
    print('Running Experiment {}/{}.'.format(i+1, global_config['n_experiments']))
    
    # Draw Data
    dataset_all = unos_generator.draw_sample()
    
    # Use only the desire number of samples
    dataset = dataset_all.loc[1:global_config['n_samples']]
    
    estimate_propensities(dataset)
    
    # Shuffle Dataset
    dataset = dataset.sample(frac=1).reset_index(drop=True)

    # Split Data into test and training set
    dataset_train = dataset[0:global_config['n_train']]
    dataset_test = dataset[global_config['n_train']:]
    
    # NN4 2 outcomes 4 shared layers(Dropout)
    nn4_multi_4s_results = run_nn4_multi(dataset_train, dataset_test, architecture='4s', show_log=True)
    nn4_multi_4s_all_results.append(nn4_multi_4s_results)    
    nn4_multi_4s_min_pehe_train =  np.min(nn4_multi_4s_results['pehe_train_vals'])
    nn4_multi_4s_min_pehe_test =  np.min(nn4_multi_4s_results['pehe_test_vals'])
    
    # NN4 2 outcomes 2 shared layers, 1 idiosyncratic per outcome (Dropout)
    #nn4_multi_2s_2i_results = run_nn4_multi(dataset_train, dataset_test, architecture='4s_2i')
    #nn4_multi_2s_2i_all_results.append(nn4_multi_2s_2i_results)    
    #nn4_multi_2s_2i_min_pehe_train =  np.min(nn4_multi_2s_2i_results['pehe_train_vals'])
    #nn4_multi_2s_2i_min_pehe_test =  np.min(nn4_multi_2s_2i_results['pehe_test_vals'])
    
    # NN4 2 outcomes 2 shared layers, 2 idiosyncratic per outcome (Dropout)
    #nn4_multi_2s_4i_results = run_nn4_multi(dataset_train, dataset_test, architecture='4s_4i')
    #nn4_multi_2s_4i_all_results.append(nn4_multi_2s_4i_results)    
    #nn4_multi_2s_4i_min_pehe_train =  np.min(nn4_multi_2s_4i_results['pehe_train_vals'])
    #nn4_multi_2s_4i_min_pehe_test =  np.min(nn4_multi_2s_4i_results['pehe_test_vals'])
    
    # NN4 2 outcomes (PBD)
    #nn4_pbd_results = run_nn4_pbd(dataset_train, dataset_test)
    #nn4_pbd_all_results.append(nn4_pbd_results)    
    #nn4_pbd_min_pehe_train =  np.min(nn4_pbd_results['pehe_train_vals'])
    #nn4_pbd_min_pehe_test =  np.min(nn4_pbd_results['pehe_test_vals'])
    

    #print('\tMin. PEHE (Train). NN4: {}\t NN4 (no Dropout): {}\t NN4 Dropout: {}\t NN4 PBD: {}'.format(nn4_min_pehe_train, nn4_no_dropout_min_pehe_train, nn4_dropout_min_pehe_train, nn4_pbd_min_pehe_train))
    #print('\tMin. PEHE (Test). NN4: {}\t NN4 (no Dropout): {}\t NN4 Dropout: {}\t NN4 PBD: {}'.format(nn4_min_pehe_test, nn4_no_dropout_min_pehe_test, nn4_dropout_min_pehe_test, nn4_pbd_min_pehe_test))
    
    # Only 2 Outcomes
    print('\tMin. PEHE (Train). NN4 Multi 4s: {}\t NN4 Multi 2s 2i: {}\t NN4 Multi 2s 4i: {}'.format(nn4_multi_4s_min_pehe_train, nn4_multi_2s_2i_min_pehe_train,nn4_multi_2s_4i_min_pehe_train ))
    print('\tMin. PEHE (Test). NN4 Multi 4s: {}\t NN4 Multi 2s 2i: {}\t NN4 Multi 2s 4i: {}'.format(nn4_multi_4s_min_pehe_test, nn4_multi_2s_2i_min_pehe_test,nn4_multi_2s_4i_min_pehe_test ))
        

Running Experiment 1/1.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


#0. 	MSE Y0: 239979040.0 	MSE Y1: 0	 PEHE: 619.3606611225365
#50. 	MSE Y0: 223232000.0 	MSE Y1: 4947439.5	 PEHE: 597.2304286808493
#100. 	MSE Y0: 220099424.0 	MSE Y1: 3534865.5	 PEHE: 594.8369720221921
#150. 	MSE Y0: 219919184.0 	MSE Y1: 2935722.0	 PEHE: 591.0437950816522
#200. 	MSE Y0: 218839456.0 	MSE Y1: 2342159.25	 PEHE: 578.0364037425782
#250. 	MSE Y0: 201000592.0 	MSE Y1: 1771882.125	 PEHE: 536.271800523079
#300. 	MSE Y0: 167628048.0 	MSE Y1: 1536571.875	 PEHE: 496.7190337420423
#350. 	MSE Y0: 202233408.0 	MSE Y1: 1378588.625	 PEHE: 444.63547321927575
#400. 	MSE Y0: 65786340.0 	MSE Y1: 1487918.875	 PEHE: 388.64657727139246
#450. 	MSE Y0: 26467518.0 	MSE Y1: 1271527.625	 PEHE: 321.09173268570964
#500. 	MSE Y0: 20482666.0 	MSE Y1: 942609.3125	 PEHE: 358.54115250068145
#550. 	MSE Y0: 90210784.0 	MSE Y1: 1030732.75	 PEHE: 342.1905229932621
#600. 	MSE Y0: 23108088.0 	MSE Y1: 1011266.875	 PEHE: 379.0913909145026
#650. 	MSE Y0: 30512728.0 	MSE Y1: 1117144.25	 PEHE: 392.0800096562538
#70

KeyboardInterrupt: 

### Report Result Summary

### Pickle results into file (Optional)

In [None]:
# Combine results
results_combined = {
    'NN4' : nn4_all_results,
    'NN4 No Dropout': nn4_no_dropout_all_results,
    'NN4 2 Outcomes': nn4_dropout_all_results,
    'NN4 PBD': nn4_pbd_all_results
}

filename = '16-06-17-results.dat'
with open(filename, 'wb') as handle:
    pickle.dump(results_combined, handle, protocol=pickle.HIGHEST_PROTOCOL)


### Print Results

In [11]:
print("PEHE TRAIN")
print(['%.2f' % np.min(res['pehe_train_vals']) for res in nn4_all_results])
print(['%.2f' % np.min(res['pehe_train_vals']) for res in nn4_no_dropout_all_results])
print(['%.2f' % np.min(res['pehe_train_vals']) for res in nn4_dropout_all_results])
print(['%.2f' % np.min(res['pehe_train_vals']) for res in nn4_pbd_all_results])

print("\nPEHE TEST")
print(['%.2f' % np.min(res['pehe_test_vals']) for res in nn4_all_results])
print(['%.2f' % np.min(res['pehe_test_vals']) for res in nn4_no_dropout_all_results])
print(['%.2f' % np.min(res['pehe_test_vals']) for res in nn4_dropout_all_results])
print(['%.2f' % np.min(res['pehe_test_vals']) for res in nn4_pbd_all_results])

PEHE TRAIN
[]
[]


NameError: name 'nn4_dropout_all_results' is not defined

## Box Plot

In [12]:
# Training
min_pehes_nn4 = [np.min(res['pehe_train_vals']) for res in nn4_all_results]
min_pehes_nn4_no_dropout = [np.min(res['pehe_train_vals']) for res in nn4_no_dropout_all_results]
min_pehes_nn4_dropout = [np.min(res['pehe_train_vals']) for res in nn4_dropout_all_results]
min_pehes_nn4_pbd = [np.min(res['pehe_train_vals']) for res in nn4_pbd_all_results]

data = [min_pehes_nn4, min_pehes_nn4_no_dropout, min_pehes_nn4_dropout, min_pehes_nn4_pbd]
labels = ['NN4 (with Dropout)', 'NN4 (no Dropout)', 'NN4 2Out (with Dropout)', 'NN4 2Out (PBD)']

plt.boxplot(data, labels=labels)
plt.title('PEHE Values (Train)')
plt.ylabel('PEHE')
plt.show()


# Test
min_pehes_nn4 = [np.min(res['pehe_test_vals']) for res in nn4_all_results]
min_pehes_nn4_no_dropout = [np.min(res['pehe_test_vals']) for res in nn4_no_dropout_all_results]
min_pehes_nn4_dropout = [np.min(res['pehe_test_vals']) for res in nn4_dropout_all_results]
min_pehes_nn4_pbd = [np.min(res['pehe_test_vals']) for res in nn4_pbd_all_results]

data = [min_pehes_nn4, min_pehes_nn4_no_dropout, min_pehes_nn4_dropout, min_pehes_nn4_pbd]
labels = ['NN4 (with Dropout)', 'NN4 (no Dropout)', 'NN4 2Out (with Dropout)', 'NN4 2Out (PBD)']

plt.boxplot(data, labels=labels)
plt.title('PEHE Values (TEST)')
plt.ylabel('PEHE')
plt.show()

NameError: name 'nn4_dropout_all_results' is not defined

## PEHE Values (over epochs)

In [13]:
plt.title('PEHE: NN-4 (Treatment as Feature) vs. NN-4 Dropout (2 outcomes) vs. NN-4 PBD')
plt.plot(nn4_all_results[0]['pehe_train_vals'], label='NN-4 (Train)',  color='r')
plt.plot(nn4_all_results[0]['pehe_test_vals'], label='NN-4 (Test)',  color='r', linestyle=':')

plt.plot(nn4_no_dropout_all_results[0]['pehe_train_vals'], label='NN-4 (Train)',  color='b')
plt.plot(nn4_no_dropout_all_results[0]['pehe_test_vals'], label='NN-4 (Test)',  color='b', linestyle=':')
#plt.plot(nn4_all_pehe_train[4], label='NN-4 (Train) 2',  color='b')
#plt.plot(nn4_all_pehe_test[4], label='NN-4 (Test) 2',  color='b', linestyle=':')

#plt.plot(nn4_dropout_pehe_train, label='NN-4 Dropout (Train)',  color='b')
#plt.plot(nn4_dropout_pehe_test, label='NN-4 Dropout (Test)',  color='b', linestyle=':')
#plt.plot(nn4_pbd_pehe_train, label='NN-4 PBD (Train)',  color='g')
#plt.plot(nn4_pbd_pehe_test, label='NN-4 PBD (Test)',  color='g', linestyle=':')

plt.ylabel('PEHE')
plt.xlabel('# Iteration')
plt.legend(loc='upper right')
plt.show()

IndexError: list index out of range

## Learning Curves

In [None]:
#plt.title('NN-4 (Treatment as Feature)')
#plt.plot(nn4_mses_train, label='MSE (Train)')
#plt.ylabel('MSE')
#plt.xlabel('# Iteration')
#plt.legend(loc='upper right')
#plt.show()

#plt.title('NN-4 Dropout (2 outcomes)')
#plt.plot(nn4_dropout_mses_train_0, label='MSE Y0')
#plt.plot(nn4_dropout_mses_train_1, label='MSE Y1')
#plt.ylabel('MSE')
#plt.xlabel('# Iteration')
#plt.legend(loc='upper right')
#plt.show()

#plt.title('NN-4 PBD')
#plt.plot(nn4_pbd_mses_train_0, label='MSE Y0')
#plt.plot(nn4_pbd_mses_train_1, label='MSE Y1')
#plt.ylabel('MSE')
#plt.xlabel('# Iteration')
#plt.legend(loc='upper right')
#plt.show()