In [8]:
from utils.data_preprocess import load_data, load_single_leakage_model_data
from utils.module import model_eval, hyper_model, model_comparison, linear_regression, numpy_to_tensor, benchmark_linear_model
import itertools
import pandas as pd 
import yaml
import tensorflow as tf
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.models import Model
from keras.layers import Dense, Input
import keras_tuner as kt
from tensorflow import keras
from keras import layers
import seaborn as sns
import matplotlib.pyplot as plt
from kerastuner import HyperModel, Hyperband


In [9]:
model_path = 'saved_model/Multi_leak/2_loss/Mid_structure_anchor/6 Output/exp2'

In [10]:
with open("config_multi.yml", "r") as ymlfile:
    cfg = yaml.full_load(ymlfile)


single_leakage, two_leakage = load_data()
two_leakage["leak_1"] = 1
two_leakage["leak_2"] = 1

single_leakage["leak_1"] = 1
single_leakage["leak_2"] = 0

data = pd.concat([single_leakage, two_leakage], axis=0)
data['x2'] = data['x2'].replace(np.nan, 8024)
data['y2'] = data['y2'].replace(np.nan, 2616.5)

In [11]:

data = data.drop(columns=['mfc6_residual',
       'mfc7_residual', 'mfc8_residual', 'mfc9_residual', 'mfc10_residual',
       'mfc1_residual', 'mfc2_residual', 'mfc3_residual', 'mfc4_residual',
       'mfc5_residual', 'total flow rate'
       ])

y = data[['x1', 'y1', 'x2', 'y2', 'leak_1', 'leak_2']]
x = data.drop(['x1', 'y1', 'x2', 'y2', 'leak_1', 'leak_2'], axis=1)

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.15, random_state=1)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=1) 

y1_train = y_train[['x1', 'y1', 'x2', 'y2']]
y2_train = y_train[['leak_1', 'leak_2']]
y1_test = y_test[['x1', 'y1', 'x2', 'y2']]
y2_test = y_test[['leak_1', 'leak_2']]
y1_val = y_val[['x1', 'y1', 'x2', 'y2']]
y2_val = y_val[['leak_1', 'leak_2']]

y1_columns = y1_train.columns
y2_columns = y2_train.columns
X_columns = X_train.columns

scaler_coords1 = StandardScaler()
y1_train = scaler_coords1.fit_transform(y1_train)
y1_test = scaler_coords1.transform(y1_test)
y1_val = scaler_coords1.transform(y1_val)

y1_train = pd.DataFrame(y1_train, columns=y1_columns)
y1_test = pd.DataFrame(y1_test, columns=y1_columns)
y1_val = pd.DataFrame(y1_val, columns=y1_columns)

# y1_train['x2'] = y1_train['x2'].replace(np.nan, -5)
# y1_train['y2'] = y1_train['y2'].replace(np.nan, -5)

# y1_test['x2'] = y1_test['x2'].replace(np.nan, -5)
# y1_test['y2'] = y1_test['y2'].replace(np.nan, -5)

# y1_val['x2'] = y1_val['x2'].replace(np.nan, -5)
# y1_val['y2'] = y1_val['y2'].replace(np.nan, -5)
# Not sure if 0 is good enough or try generating a random number

# scaler_coords2 = StandardScaler()
# y2_train = scaler_coords2.fit_transform(y2_train)
# y2_test = scaler_coords2.fit_transform(y2_test)
# y2_val = scaler_coords2.transform(y2_val)

# y2_train = pd.DataFrame(y2_train, columns=y2_columns)
# y2_test = pd.DataFrame(y2_test, columns=y2_columns)
# y2_val = pd.DataFrame(y2_val, columns=y2_columns)

y2_train = y2_train.reset_index().drop(columns='sample_number')
y2_val = y2_val.reset_index().drop(columns='sample_number')
y2_test = y2_test.reset_index().drop(columns='sample_number')

# y_train = pd.concat([y1_train, y2_train], axis=1)
# y_test = pd.concat([y1_test, y2_test], axis=1)
# y_val = pd.concat([y1_val, y2_val], axis=1)

# scaler_flows = StandardScaler()
# X_train = scaler_flows.fit_transform(X_train)
# X_test = scaler_flows.transform(X_test)
# X_val = scaler_flows.transform(X_val)

In [12]:
X_train_np, y1_train_np, y2_train_np = X_train.values, y1_train.values, y2_train.values
X_val_np, y1_val_np, y2_val_np = X_val.values, y1_val.values, y2_val.values
X_test_np, y1_test_np, y2_test_np = X_test.values, y1_test.values, y2_test.values

# Create TensorFlow datasets from NumPy arrays.
batch_size = 32
buffer_size = len(X_train)  # Set the buffer size to the number of training examples for full shuffling.

train_dataset = tf.data.Dataset.from_tensor_slices((X_train_np, y1_train_np, y2_train_np ))
val_dataset = tf.data.Dataset.from_tensor_slices((X_val_np, y1_val_np, y2_val_np))
test_dataset = tf.data.Dataset.from_tensor_slices((X_test_np, y1_test_np, y2_test_np))

# Shuffle, batch, and prefetch the training dataset.
train_dataset = train_dataset.shuffle(buffer_size).batch(batch_size).prefetch(tf.data.AUTOTUNE)

# Batch the validation and test datasets.
val_dataset = val_dataset.batch(batch_size)
test_dataset = test_dataset.batch(batch_size)


In [13]:
# y_train = {
#     "y1" : y1_train,
#     "y2" : y2_train
# }

# y_val = {
#     "y1" : y1_val,
#     "y2" : y2_val
# }

# y_test = {
#     "y1" : y1_test,
#     "y2" : y2_test
# }

losses = {
	"y1": "mse",
	"y2": 'binary_crossentropy'
    # "y2" : 'mse'
    }

metrics = {
    "y1": 'mae',
    "y2": 'mae'
    }



In [14]:
# def base_model(inputs):
#     # add the sequential layers here
#     x= Dense(128, activation='relu', kernel_initializer='he_uniform')(inputs)
#     # x= Dense(128, activation='relu', kernel_initializer='he_uniform')(x)
#     return x

# def final_model(inputs):
#     x = base_model(inputs)
#     y1 = Dense(units=4, name='y1')(x)
#     y2 = Dense(units =2, name = 'y2')(x)
#     model = Model(inputs=inputs, outputs = [y1, y2])
    
#     return model

# inputs = tf.keras.layers.Input(shape=(10,))
# model = final_model(inputs)

# model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
#             loss=losses,
#             metrics = metrics)

# history = model.fit(train_dataset.map(lambda x, y1, y2: (x, {'y1': y1, 'y2': y2})), 
#                     validation_data = val_dataset.map(lambda x, y1, y2: (x, {'y1': y1, 'y2': y2})), 
#                     verbose = 1, epochs=100, shuffle = True)
# # model_evaluate, y_pred = model_eval(model, X_test, y_test, X_train, y_train, X_val, y_val)
# # coords = np.concatenate([y_pred[0], y_test['y1']], axis=1)
# # presence = np.concatenate([y_pred[1], y_test['y2']], axis=1)

In [15]:
# Experiment 2 with Nadam

EPOCHS = 1000
# Define the multi-task HyperModel
class MultiTaskHyperModel(HyperModel):

    def build(self, hp):
        inputs = keras.Input(shape=(10,))
        shared_layer = inputs
        for i in range(hp.Int('num_layers', 1, 15)):
            shared_layer = layers.Dense(
                units=hp.Int("units_" + str(i), min_value=32, max_value=512, step=32),
                # activation=hp.Choice("activation", ["relu"]),
                activation = 'relu',
                # add elu
                kernel_initializer='he_uniform'
            )(shared_layer)


        task_layer1 = shared_layer
        for j in range(hp.Int(f'task_{i}_num_layers', 0, 10)):
            task_layer1 = layers.Dense(units=hp.Choice(f'task_{i}_layer_{j}_neurons', values=[4, 8, 16]), 
                                       activation='relu',
                                       kernel_initializer='he_uniform')(task_layer1)
        y1 = layers.Dense(4, name='y1', activation = 'linear', kernel_initializer='he_uniform')(task_layer1)

        task_layer2 = shared_layer
        for j in range(hp.Int(f'task_{i}_num_layers', 0, 10)):
            task_layer2 = layers.Dense(units=hp.Choice(f'task_{i}_layer_{j}_neurons', values=[4, 8, 16]),
                                        activation='relu',
                                        kernel_initializer='he_uniform')(task_layer2)
        y2 = layers.Dense(2, name='y2', activation = 'sigmoid', kernel_initializer='he_uniform')(task_layer2)

        outputs = [y1, y2]

        # loss1_weight = hp.Float("loss1_weight", min_value=1e-4, max_value=1, sampling="log")
        # loss1_weight = hp.Float("loss2_weight", min_value=1e-4, max_value=1, sampling="log")
        
        loss1_weight = hp.Choice('loss1_weight', values=[0.2, 0.4, 0.6, 0.8])
        # loss2_weight = hp.Choice('loss2_weight', values=[0.00005, 0.2, 0.4, 0.6, 0.8, 1.0])
        # to stop the model from reducing both these weightage to very low and hence reducing the total loss
        loss2_weight = 1 - loss1_weight

        lossWeights = {"y1": loss1_weight, "y2": loss2_weight}

        model = keras.Model(inputs=inputs, outputs=outputs)
        learning_rate = hp.Float("lr", min_value=1e-4, max_value=1e-1, sampling="log")

        # model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
        #             loss="mse",  metrics='mae')
        
        model.compile(optimizer=tf.keras.optimizers.Nadam(learning_rate=learning_rate),
                    loss=losses, loss_weights=lossWeights,
                    metrics = metrics)
        return model

# Create the multi-task HyperModel
multi_task_hypermodel = MultiTaskHyperModel()

# Define the Hyperband tuner
tuner = Hyperband(
    multi_task_hypermodel,
    objective = kt.Objective("val_y1_mae", direction="min"),
    max_epochs=EPOCHS,
    factor=2,
    directory="../../tensorflow_log_files/studienarbeit/",
    project_name='multi_task_tuning_NAdam_obj_val_y1_mae'
)

# Perform hyperparameter search
tuner.search(train_dataset.map(lambda x, y1, y2: (x, {'y1': y1, 'y2': y2})),
            #  X_train, y_train, 
            #  validation_data = (X_val, y_val), 
            validation_data = val_dataset.map(lambda x, y1, y2: (x, {'y1': y1, 'y2': y2})), 
             verbose = 1, epochs=EPOCHS, shuffle = True)

best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
print("Best Hyperparameters:", best_hps)

# Build the best model with the best hyperparameters
best_model = tuner.hypermodel.build(best_hps)

# Train the best model on the full dataset
best_model.fit(train_dataset.map(lambda x, y1, y2: (x, {'y1': y1, 'y2': y2})),
            #  X_train, y_train, 
            #  validation_data = (X_val, y_val), 
            validation_data = val_dataset.map(lambda x, y1, y2: (x, {'y1': y1, 'y2': y2})), 
             verbose = 1, epochs=EPOCHS, shuffle = True)

INFO:tensorflow:Reloading Tuner from ../../tensorflow_log_files/studienarbeit/multi_task_tuning_NAdam_obj_val_y1_mae/tuner0.json
INFO:tensorflow:Oracle triggered exit
Best Hyperparameters: <keras_tuner.engine.hyperparameters.hyperparameters.HyperParameters object at 0x7ff7205256a0>
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/100

<keras.callbacks.History at 0x7ff78c3d81c0>

In [16]:
# best_model.save(model_path)

In [17]:
best_model = tf.keras.models.load_model(model_path)
best_model.summary()
y_predictions = best_model.predict(test_dataset.map(lambda x, y1, y2: (x, {'y1': y1, 'y2': y2})))
results_train = best_model.evaluate(train_dataset.map(lambda x, y1, y2: (x, {'y1': y1, 'y2': y2})), verbose=1)
results_val = best_model.evaluate(val_dataset.map(lambda x, y1, y2: (x, {'y1': y1, 'y2': y2})), verbose=1)
results_test = best_model.evaluate(test_dataset.map(lambda x, y1, y2: (x, {'y1': y1, 'y2': y2})), verbose=1)

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 10)]         0           []                               
                                                                                                  
 dense_19 (Dense)               (None, 128)          1408        ['input_2[0][0]']                
                                                                                                  
 dense_20 (Dense)               (None, 32)           4128        ['dense_19[0][0]']               
                                                                                                  
 dense_21 (Dense)               (None, 192)          6336        ['dense_20[0][0]']               
                                                                                            

In [18]:
y_pred = np.concatenate((y_predictions[0],y_predictions[1]), axis=1)
y_pred[:,-2][np.abs(y_pred[:,-2]) < 0.5] = 0
y_pred[:,-2][np.abs(y_pred[:,-2]) > 0.5] = 1
y_pred[:,-1][np.abs(y_pred[:,-1]) < 0.5] = 0
y_pred[:,-1][np.abs(y_pred[:,-1]) > 0.5] = 1
y_pred = pd.DataFrame(y_pred, columns=y_train.columns)
y1_pred_inverse = scaler_coords1.inverse_transform(y_pred[['x1', 'y1','x2', 'y2']])
y_pred[['x1', 'y1','x2', 'y2']] = pd.DataFrame(y1_pred_inverse,columns=['x1', 'y1','x2', 'y2'])
pd.concat([y_pred, y_test.reset_index().drop(columns='sample_number')], axis=1).to_csv(model_path+'predictions.csv')

In [19]:
from sklearn.metrics import mean_squared_error

mse_x1 = mean_squared_error(y_test['x1'], y_pred['x1'], squared=True)
mse_x2 = mean_squared_error(y_test['x2'], y_pred['x2'], squared=True)

mse_y1 = mean_squared_error(y_test['y1'], y_pred['y1'], squared=True)
mse_y2 = mean_squared_error(y_test['y2'], y_pred['y2'], squared=True)

mse_leak1 = mean_squared_error(y_test['leak_1'], y_pred['leak_1'], squared=True)
mse_leak2 = mean_squared_error(y_test['leak_2'], y_pred['leak_1'], squared=True)

mse = [mse_x1, mse_x2, mse_y1, mse_y2, mse_leak1, mse_leak2]

In [20]:
mse

[2799174.841773452,
 1854270.0378119028,
 263964.3058309706,
 155616.49934978667,
 0.0,
 0.39316239316239315]

In [21]:


# idea to explore - 
# 1. Just use 1 loss function for the entire output
# 2. Give 2 loss function for each leakageness and coordinates (2a. Try the same without sigmoid function)
# 3.
# create a model first with 4 coordinate - 2 for each. 
# Then use that model as base model, add additional layers for leakageness and do transfer learning to do the 6 output case

# 4. 
# use masking feature of tensorflow (low priority) - # Use the idea of masking for the case where when we have to only 1 leakage 

# do i actually need objectness 1. since it is always present.i just need 1 objectness right ?
# coordinate are more important right - hence give more wightage to the coordinate loss function
# try auxiliary task and joint model
# remove bad quality data