In [16]:
import pandas as pd
import numpy as np
import os


import keras
from keras.layers import Input, Dense, BatchNormalization, Activation, Dropout
from keras.optimizers import Adam
from keras.models import Model

from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard

In [17]:
# Init params and helpers

In [18]:
model_tag = '01'
data_dir = '../data/'
cleaned_csv_file_name = 'cleaned.csv'
models_dir = './models'
logs_dir = './logs/'

num_k = 10

In [19]:
kfold_data_dir = '../data/k-fold/'
file_list = os.listdir(kfold_data_dir)
print(len(file_list))

num_k = int(len(file_list) / 2)
print(f'Number of K is {num_k}')

file_train_start = 'train'
file_test_start = 'test'


20
Number of K is 10


In [20]:
# define features and classes

In [21]:
predict_class_right = ['PostOp-SNR',  'PostOp-CNR',  'PostOp-BWR', 'PostOp-BHR',
                       'PostOp-IMFR',  'PostOp-AreRW', 'PostOp-AreRH']

predict_class_left = [ 'PostOp-SNL',  'PostOp-CNL', 'PostOp-BWL',
                      'PostOp-BHL',  'PostOp-IMFL','PostOp-AreLW', 'PostOp-AreLH']

predict_post_id = ['PostOp-ID']

feature_left = ['PreOp-SNL', 'PreOp-CNL', 'PreOp-BWL', 'PreOp-BHL', 'PreOp-IMFL', 'PreOp-AreLW',
                'PreOp-AreLH', 'PreOp-ID', 'ImplantLeft']

feature_right = ['PreOp-SNR', 'PreOp-CNR', 'PreOp-BWR', 'PreOp-BHR', 'PreOp-IMFR', 'PreOp-AreRW',
                'PreOp-AreRH', 'PreOp-ID', 'ImplantRight']

In [22]:
# Check diractory

In [23]:
if not os.path.exists(os.path.join(models_dir, model_tag)):
    os.mkdir(os.path.join(models_dir, model_tag))
    
if not os.path.exists(os.path.join(logs_dir, model_tag)):
    os.mkdir(os.path.join(logs_dir, model_tag))
    
for predict_class in predict_class_right+predict_class_left+predict_post_id:
    if not os.path.exists(os.path.join(models_dir, model_tag, predict_class)):
        os.mkdir(os.path.join(models_dir, model_tag, predict_class))
        
for predict_class in predict_class_right+predict_class_left+predict_post_id:
    if not os.path.exists(os.path.join(logs_dir, model_tag, predict_class)):
        os.mkdir(os.path.join(logs_dir, model_tag, predict_class))

In [24]:
def get_x_and_y(df: pd.DataFrame, x_head=[], y_head=[]):
    x, y = [], []
    
    for index, row in df.iterrows():
        x_temp = []
        for x_h in x_head:
            x_temp.append(row[x_h])
        x.append(x_temp)
        y.append(row[y_head[0]])
    
    return x, y

In [30]:
def create_model(feature_len):
    inp = Input(shape=(feature_len,))
    x = Dropout(0.25)(inp)
    x = Dense(512)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Dense(256)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    output = Dense(1, activation='linear')(x)
    model = Model(inp, output)
    
    loss = 'mse'
    
    model.compile(optimizer=Adam(1e-4),
                  loss=loss)
    return model

In [31]:
def get_callbacks(model_path, log_path):
    
#     er = EarlyStopping(monitor="val_loss", mode="min", patience=20, verbose=1)
    checkpoint = ModelCheckpoint(model_path, monitor='val_loss', verbose=0, save_best_only=True,  mode='min')
    tb = TensorBoard(log_path)
    return [checkpoint, tb]

In [32]:
def get_accuracy(model, x, y):
    result = model.predict(x)
    sum = 0
    for r, y in zip(result, y):
        sum += abs(r - y)
    return (sum / len(x))[0]

In [35]:
accuracies = []

prediction_tag = 'PostOp-SNL'
for k in range(num_k):
    train_df = pd.read_csv(os.path.join(kfold_data_dir, f'{file_train_start}_{k}.csv',)).dropna()
    test_df = pd.read_csv(os.path.join(kfold_data_dir, f'{file_test_start}_{k}.csv')).dropna()
    
    # Create right side model
    
    x_train, y_train = get_x_and_y(train_df, feature_left, [prediction_tag])
    x_test, y_test = get_x_and_y(test_df, feature_left, [prediction_tag])
    
    x_train = np.array(x_train)
    y_train = np.array(y_train)
    
    x_test = np.array(x_test)
    y_test = np.array(y_test)
    
    model_path = os.path.join(os.path.join(models_dir, model_tag, prediction_tag, 'model_' + str(k) + '.h5'))
    
    if not os.path.exists(os.path.join(logs_dir, model_tag, prediction_tag, str(k))):
        os.mkdir(os.path.join(logs_dir, model_tag, prediction_tag, str(k)))
        
    log_path = os.path.join(os.path.join(logs_dir, model_tag, prediction_tag, str(k)))
    
    
    model = create_model(len(x_train[0]))
    history = model.fit(x_train, y_train,
                        shuffle=True,
                        batch_size=4,
                        epochs=10000,
                        verbose=0,
                        callbacks=get_callbacks(model_path, log_path),
                        validation_data=[x_test, y_test])

    model.fit(x_train, y_train)
    model.load_weights(model_path)
    accuracies.append(get_accuracy(model, x_test, y_test))

print(sum(accuracies)/len(accuracies))

Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
1.3316785514354705


In [36]:
accuracies = []

prediction_tag = 'PostOp-SNR'
for k in range(num_k):
    train_df = pd.read_csv(os.path.join(kfold_data_dir, f'{file_train_start}_{k}.csv',)).dropna()
    test_df = pd.read_csv(os.path.join(kfold_data_dir, f'{file_test_start}_{k}.csv')).dropna()
    
    # Create right side model
    
    x_train, y_train = get_x_and_y(train_df, feature_right, [prediction_tag])
    x_test, y_test = get_x_and_y(test_df, feature_right, [prediction_tag])
    
    x_train = np.array(x_train)
    y_train = np.array(y_train)
    
    x_test = np.array(x_test)
    y_test = np.array(y_test)
    
    model_path = os.path.join(os.path.join(models_dir, model_tag, prediction_tag, 'model_' + str(k) + '.h5'))
    
    if not os.path.exists(os.path.join(logs_dir, model_tag, prediction_tag, str(k))):
        os.mkdir(os.path.join(logs_dir, model_tag, prediction_tag, str(k)))
        
    log_path = os.path.join(os.path.join(logs_dir, model_tag, prediction_tag, str(k)))
    
    
    model = create_model(len(x_train[0]))
    history = model.fit(x_train, y_train,
                        shuffle=True,
                        batch_size=4,
                        epochs=10000,
                        verbose=0,
                        callbacks=get_callbacks(model_path, log_path),
                        validation_data=[x_test, y_test])

    model.fit(x_train, y_train)
    model.load_weights(model_path)
    accuracies.append(get_accuracy(model, x_test, y_test))

print(sum(accuracies)/len(accuracies))

Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
1.3363540112972259


In [37]:
accuracies = []

prediction_tag = 'PostOp-CNR'
for k in range(num_k):
    train_df = pd.read_csv(os.path.join(kfold_data_dir, f'{file_train_start}_{k}.csv',)).dropna()
    test_df = pd.read_csv(os.path.join(kfold_data_dir, f'{file_test_start}_{k}.csv')).dropna()
    
    # Create right side model
    
    x_train, y_train = get_x_and_y(train_df, feature_right, [prediction_tag])
    x_test, y_test = get_x_and_y(test_df, feature_right, [prediction_tag])
    
    x_train = np.array(x_train)
    y_train = np.array(y_train)
    
    x_test = np.array(x_test)
    y_test = np.array(y_test)
    
    model_path = os.path.join(os.path.join(models_dir, model_tag, prediction_tag, 'model_' + str(k) + '.h5'))
    
    if not os.path.exists(os.path.join(logs_dir, model_tag, prediction_tag, str(k))):
        os.mkdir(os.path.join(logs_dir, model_tag, prediction_tag, str(k)))
        
    log_path = os.path.join(os.path.join(logs_dir, model_tag, prediction_tag, str(k)))
    
    
    model = create_model(len(x_train[0]))
    history = model.fit(x_train, y_train,
                        shuffle=True,
                        batch_size=4,
                        epochs=10000,
                        verbose=0,
                        callbacks=get_callbacks(model_path, log_path),
                        validation_data=[x_test, y_test])

    model.fit(x_train, y_train)
    model.load_weights(model_path)
    accuracies.append(get_accuracy(model, x_test, y_test))

print(sum(accuracies)/len(accuracies))

Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
1.4427192032337188


In [38]:
accuracies = []

prediction_tag = 'PostOp-CNL'
for k in range(num_k):
    train_df = pd.read_csv(os.path.join(kfold_data_dir, f'{file_train_start}_{k}.csv',)).dropna()
    test_df = pd.read_csv(os.path.join(kfold_data_dir, f'{file_test_start}_{k}.csv')).dropna()
    
    # Create right side model
    
    x_train, y_train = get_x_and_y(train_df, feature_left, [prediction_tag])
    x_test, y_test = get_x_and_y(test_df, feature_left, [prediction_tag])
    
    x_train = np.array(x_train)
    y_train = np.array(y_train)
    
    x_test = np.array(x_test)
    y_test = np.array(y_test)
    
    model_path = os.path.join(os.path.join(models_dir, model_tag, prediction_tag, 'model_' + str(k) + '.h5'))
    
    if not os.path.exists(os.path.join(logs_dir, model_tag, prediction_tag, str(k))):
        os.mkdir(os.path.join(logs_dir, model_tag, prediction_tag, str(k)))
        
    log_path = os.path.join(os.path.join(logs_dir, model_tag, prediction_tag, str(k)))
    
    
    model = create_model(len(x_train[0]))
    history = model.fit(x_train, y_train,
                        shuffle=True,
                        batch_size=4,
                        epochs=10000,
                        verbose=0,
                        callbacks=get_callbacks(model_path, log_path),
                        validation_data=[x_test, y_test])

    model.fit(x_train, y_train)
    model.load_weights(model_path)
    accuracies.append(get_accuracy(model, x_test, y_test))

print(sum(accuracies)/len(accuracies))

Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
1.3541861057281495


In [41]:
accuracies = []

prediction_tag = 'PostOp-BWL'
for k in range(num_k):
    train_df = pd.read_csv(os.path.join(kfold_data_dir, f'{file_train_start}_{k}.csv',)).dropna()
    test_df = pd.read_csv(os.path.join(kfold_data_dir, f'{file_test_start}_{k}.csv')).dropna()
    
    # Create right side model
    
    x_train, y_train = get_x_and_y(train_df, feature_left, [prediction_tag])
    x_test, y_test = get_x_and_y(test_df, feature_left, [prediction_tag])
    
    x_train = np.array(x_train)
    y_train = np.array(y_train)
    
    x_test = np.array(x_test)
    y_test = np.array(y_test)
    
    model_path = os.path.join(os.path.join(models_dir, model_tag, prediction_tag, 'model_' + str(k) + '.h5'))
    
    if not os.path.exists(os.path.join(logs_dir, model_tag, prediction_tag, str(k))):
        os.mkdir(os.path.join(logs_dir, model_tag, prediction_tag, str(k)))
        
    log_path = os.path.join(os.path.join(logs_dir, model_tag, prediction_tag, str(k)))
    
    
    model = create_model(len(x_train[0]))
    history = model.fit(x_train, y_train,
                        shuffle=True,
                        batch_size=4,
                        epochs=10000,
                        verbose=0,
                        callbacks=get_callbacks(model_path, log_path),
                        validation_data=[x_test, y_test])

    model.fit(x_train, y_train)
    model.load_weights(model_path)
    accuracies.append(get_accuracy(model, x_test, y_test))

print(sum(accuracies)/len(accuracies))

Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
0.6958091497421265


In [42]:
accuracies = []

prediction_tag = 'PostOp-BWR'
for k in range(num_k):
    train_df = pd.read_csv(os.path.join(kfold_data_dir, f'{file_train_start}_{k}.csv',)).dropna()
    test_df = pd.read_csv(os.path.join(kfold_data_dir, f'{file_test_start}_{k}.csv')).dropna()
    
    # Create right side model
    
    x_train, y_train = get_x_and_y(train_df, feature_right, [prediction_tag])
    x_test, y_test = get_x_and_y(test_df, feature_right, [prediction_tag])
    
    x_train = np.array(x_train)
    y_train = np.array(y_train)
    
    x_test = np.array(x_test)
    y_test = np.array(y_test)
    
    model_path = os.path.join(os.path.join(models_dir, model_tag, prediction_tag, 'model_' + str(k) + '.h5'))
    
    if not os.path.exists(os.path.join(logs_dir, model_tag, prediction_tag, str(k))):
        os.mkdir(os.path.join(logs_dir, model_tag, prediction_tag, str(k)))
        
    log_path = os.path.join(os.path.join(logs_dir, model_tag, prediction_tag, str(k)))
    
    
    model = create_model(len(x_train[0]))
    history = model.fit(x_train, y_train,
                        shuffle=True,
                        batch_size=4,
                        epochs=10000,
                        verbose=0,
                        callbacks=get_callbacks(model_path, log_path),
                        validation_data=[x_test, y_test])

    model.fit(x_train, y_train)
    model.load_weights(model_path)
    accuracies.append(get_accuracy(model, x_test, y_test))

print(sum(accuracies)/len(accuracies))

KeyboardInterrupt: 

In [43]:
print(accuracies)

[]


In [None]:
accuracies = []

prediction_tag = 'PostOp-IMFR'
for k in range(num_k):
    train_df = pd.read_csv(os.path.join(kfold_data_dir, f'{file_train_start}_{k}.csv',)).dropna()
    test_df = pd.read_csv(os.path.join(kfold_data_dir, f'{file_test_start}_{k}.csv')).dropna()
    
    # Create right side model
    x_train, y_train = get_x_and_y(train_df, feature_right, [prediction_tag])
    x_test, y_test = get_x_and_y(test_df, feature_right, [prediction_tag])
    
    x_train = np.array(x_train)
    y_train = np.array(y_train)
    
    x_test = np.array(x_test)
    y_test = np.array(y_test)
    
    model_path = os.path.join(os.path.join(models_dir, model_tag, prediction_tag, 'model_' + str(k) + '.h5'))
    
    if not os.path.exists(os.path.join(logs_dir, model_tag, prediction_tag, str(k))):
        os.mkdir(os.path.join(logs_dir, model_tag, prediction_tag, str(k)))
        
    log_path = os.path.join(os.path.join(logs_dir, model_tag, prediction_tag, str(k)))
    
    
    model = create_model(len(x_train[0]))
    history = model.fit(x_train, y_train,
                        shuffle=True,
                        batch_size=4,
                        epochs=10000,
                        verbose=0,
                        callbacks=get_callbacks(model_path, log_path),
                        validation_data=[x_test, y_test])

    model.fit(x_train, y_train)
    model.load_weights(model_path)
    accuracies.append(get_accuracy(model, x_test, y_test))

print(sum(accuracies)/len(accuracies))

In [None]:
accuracies = []

prediction_tag = 'PostOp-IMFL'
for k in range(num_k):
    train_df = pd.read_csv(os.path.join(kfold_data_dir, f'{file_train_start}_{k}.csv',)).dropna()
    test_df = pd.read_csv(os.path.join(kfold_data_dir, f'{file_test_start}_{k}.csv')).dropna()
    
    # Create right side model
    x_train, y_train = get_x_and_y(train_df, feature_left, [prediction_tag])
    x_test, y_test = get_x_and_y(test_df, feature_left, [prediction_tag])
    
    x_train = np.array(x_train)
    y_train = np.array(y_train)
    
    x_test = np.array(x_test)
    y_test = np.array(y_test)
    
    model_path = os.path.join(os.path.join(models_dir, model_tag, prediction_tag, 'model_' + str(k) + '.h5'))
    
    if not os.path.exists(os.path.join(logs_dir, model_tag, prediction_tag, str(k))):
        os.mkdir(os.path.join(logs_dir, model_tag, prediction_tag, str(k)))
        
    log_path = os.path.join(os.path.join(logs_dir, model_tag, prediction_tag, str(k)))
    
    
    model = create_model(len(x_train[0]))
    history = model.fit(x_train, y_train,
                        shuffle=True,
                        batch_size=4,
                        epochs=10000,
                        verbose=0,
                        callbacks=get_callbacks(model_path, log_path),
                        validation_data=[x_test, y_test])

    model.fit(x_train, y_train)
    model.load_weights(model_path)
    accuracies.append(get_accuracy(model, x_test, y_test))

print(sum(accuracies)/len(accuracies))

In [None]:
accuracies = []

prediction_tag = 'PostOp-AreLH'
for k in range(num_k):
    train_df = pd.read_csv(os.path.join(kfold_data_dir, f'{file_train_start}_{k}.csv',)).dropna()
    test_df = pd.read_csv(os.path.join(kfold_data_dir, f'{file_test_start}_{k}.csv')).dropna()
    
    # Create right side model
    x_train, y_train = get_x_and_y(train_df, feature_left, [prediction_tag])
    x_test, y_test = get_x_and_y(test_df, feature_left, [prediction_tag])
    
    x_train = np.array(x_train)
    y_train = np.array(y_train)
    
    x_test = np.array(x_test)
    y_test = np.array(y_test)
    
    model_path = os.path.join(os.path.join(models_dir, model_tag, prediction_tag, 'model_' + str(k) + '.h5'))
    
    if not os.path.exists(os.path.join(logs_dir, model_tag, prediction_tag, str(k))):
        os.mkdir(os.path.join(logs_dir, model_tag, prediction_tag, str(k)))
        
    log_path = os.path.join(os.path.join(logs_dir, model_tag, prediction_tag, str(k)))
    
    
    model = create_model(len(x_train[0]))
    history = model.fit(x_train, y_train,
                        shuffle=True,
                        batch_size=4,
                        epochs=10000,
                        verbose=0,
                        callbacks=get_callbacks(model_path, log_path),
                        validation_data=[x_test, y_test])

    model.fit(x_train, y_train)
    model.load_weights(model_path)
    accuracies.append(get_accuracy(model, x_test, y_test))

print(sum(accuracies)/len(accuracies))

In [None]:
accuracies = []

prediction_tag = 'PostOp-AreRH'
for k in range(num_k):
    train_df = pd.read_csv(os.path.join(kfold_data_dir, f'{file_train_start}_{k}.csv',)).dropna()
    test_df = pd.read_csv(os.path.join(kfold_data_dir, f'{file_test_start}_{k}.csv')).dropna()
    
    # Create right side model
    x_train, y_train = get_x_and_y(train_df, feature_right, [prediction_tag])
    x_test, y_test = get_x_and_y(test_df, feature_right, [prediction_tag])
    
    x_train = np.array(x_train)
    y_train = np.array(y_train)
    
    x_test = np.array(x_test)
    y_test = np.array(y_test)
    
    model_path = os.path.join(os.path.join(models_dir, model_tag, prediction_tag, 'model_' + str(k) + '.h5'))
    
    if not os.path.exists(os.path.join(logs_dir, model_tag, prediction_tag, str(k))):
        os.mkdir(os.path.join(logs_dir, model_tag, prediction_tag, str(k)))
        
    log_path = os.path.join(os.path.join(logs_dir, model_tag, prediction_tag, str(k)))
    
    
    model = create_model(len(x_train[0]))
    history = model.fit(x_train, y_train,
                        shuffle=True,
                        batch_size=4,
                        epochs=10000,
                        verbose=0,
                        callbacks=get_callbacks(model_path, log_path),
                        validation_data=[x_test, y_test])

    model.fit(x_train, y_train)
    model.load_weights(model_path)
    accuracies.append(get_accuracy(model, x_test, y_test))

print(sum(accuracies)/len(accuracies))

In [None]:
accuracies = []

prediction_tag = 'PostOp-AreRW'
for k in range(num_k):
    train_df = pd.read_csv(os.path.join(kfold_data_dir, f'{file_train_start}_{k}.csv',)).dropna()
    test_df = pd.read_csv(os.path.join(kfold_data_dir, f'{file_test_start}_{k}.csv')).dropna()
    
    # Create right side model
    x_train, y_train = get_x_and_y(train_df, feature_right, [prediction_tag])
    x_test, y_test = get_x_and_y(test_df, feature_right, [prediction_tag])
    
    x_train = np.array(x_train)
    y_train = np.array(y_train)
    
    x_test = np.array(x_test)
    y_test = np.array(y_test)
    
    model_path = os.path.join(os.path.join(models_dir, model_tag, prediction_tag, 'model_' + str(k) + '.h5'))
    
    if not os.path.exists(os.path.join(logs_dir, model_tag, prediction_tag, str(k))):
        os.mkdir(os.path.join(logs_dir, model_tag, prediction_tag, str(k)))
        
    log_path = os.path.join(os.path.join(logs_dir, model_tag, prediction_tag, str(k)))
    
    
    model = create_model(len(x_train[0]))
    history = model.fit(x_train, y_train,
                        shuffle=True,
                        batch_size=4,
                        epochs=10000,
                        verbose=0,
                        callbacks=get_callbacks(model_path, log_path),
                        validation_data=[x_test, y_test])

    model.fit(x_train, y_train)
    model.load_weights(model_path)
    accuracies.append(get_accuracy(model, x_test, y_test))

print(sum(accuracies)/len(accuracies))

In [None]:
accuracies = []

prediction_tag = 'PostOp-AreLW'
for k in range(num_k):
    train_df = pd.read_csv(os.path.join(kfold_data_dir, f'{file_train_start}_{k}.csv',)).dropna()
    test_df = pd.read_csv(os.path.join(kfold_data_dir, f'{file_test_start}_{k}.csv')).dropna()
    
    # Create right side model
    x_train, y_train = get_x_and_y(train_df, feature_left, [prediction_tag])
    x_test, y_test = get_x_and_y(test_df, feature_left, [prediction_tag])
    
    x_train = np.array(x_train)
    y_train = np.array(y_train)
    
    x_test = np.array(x_test)
    y_test = np.array(y_test)
    
    model_path = os.path.join(os.path.join(models_dir, model_tag, prediction_tag, 'model_' + str(k) + '.h5'))
    
    if not os.path.exists(os.path.join(logs_dir, model_tag, prediction_tag, str(k))):
        os.mkdir(os.path.join(logs_dir, model_tag, prediction_tag, str(k)))
        
    log_path = os.path.join(os.path.join(logs_dir, model_tag, prediction_tag, str(k)))
    
    
    model = create_model(len(x_train[0]))
    history = model.fit(x_train, y_train,
                        shuffle=True,
                        batch_size=4,
                        epochs=10000,
                        verbose=0,
                        callbacks=get_callbacks(model_path, log_path),
                        validation_data=[x_test, y_test])

    model.fit(x_train, y_train)
    model.load_weights(model_path)
    accuracies.append(get_accuracy(model, x_test, y_test))

print(sum(accuracies)/len(accuracies))