In [1]:
%%capture

from sz_utils import data_handler
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import getpass

# check if gpu is available
import tensorflow as tf
tf.config.list_physical_devices('GPU')

# collect the data
preictal, interictal = data_handler.make_patient_windows("chb01")

# make the labels
X = np.concatenate((preictal, interictal), axis=0)
y = np.concatenate((np.ones((preictal.shape[0], 1)), np.zeros((interictal.shape[0], 1))), axis=0)

# Shuffle the data
shuffle_indices = np.random.permutation(np.arange(X.shape[0]))
X = X[shuffle_indices]
y = y[shuffle_indices]

# Split the data into train and test
train_size = int(X.shape[0] * 0.8)
X_train = X[:train_size]
y_train = y[:train_size]
X_test = X[train_size:]
y_test = y[train_size:]

# Split test data into validation and test
val_size = int(X_test.shape[0] * 0.5)
X_val = X_test[:val_size]
y_val = y_test[:val_size]
X_test = X_test[val_size:]
y_test = y_test[val_size:]

2023-03-29 19:23:17.051228: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-29 19:23:18.994410: I tensorflow/c/logging.cc:34] Successfully opened dynamic library libdirectml.d6f03b303ac3c4f2eeb8ca631688c9757b361310.so
2023-03-29 19:23:18.994587: I tensorflow/c/logging.cc:34] Successfully opened dynamic library libdxcore.so
2023-03-29 19:23:19.003806: I tensorflow/c/logging.cc:34] Successfully opened dynamic library libd3d12.so
2023-03-29 19:23:21.693095: I tensorflow/c/logging.cc:34] DirectML device enumeration: found 1 compatible adapters.


In [2]:
import numpy as np
import mlflow.tensorflow
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
import mlflow

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D,LSTM, Conv1D, Activation, MaxPooling1D
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import confusion_matrix
from tensorflow.keras.metrics import AUC
import keras

In [3]:
# shapes
print("X_train shape:", X_train.shape, "y_train shape:", y_train.shape, "X_val shape:", X_val.shape, "y_val shape:", y_val.shape, "X_test shape:", X_test.shape, "y_test shape:", y_test.shape)

X_train shape: (672, 1280, 22) y_train shape: (672, 1) X_val shape: (84, 1280, 22) y_val shape: (84, 1) X_test shape: (84, 1280, 22) y_test shape: (84, 1)


In [4]:
num_classes = 2 
input_shape_dataset = (X_train.shape[1], X_train.shape[2])
input_shape_dataset

def create_model_cnn_basic_1_layer(
    input_shape_dataset: tuple = input_shape_dataset,
    num_classes: int = num_classes,
    debug: bool = False,
    filters: int = 256,
    kernel_size: int = 3,
    pool_size: int = 2,
    dropout: float = 0.1,
    dense_size: int = 64,
    loss: str = "binary_crossentropy",
    optimizer: str = "adam",
    metrics: list = ["accuracy"],

) -> tf.keras.Model:

    """This function creates a basic convolutional neural network model with 2 convolutional layers, 2 dense layers and a softmax layer

    :param input_shape_dataset: shape of the input data
    :type input_shape_dataset: tuple
    :param num_classes: number of classes
    :type num_classes: int
    :return: return a model
    :rtype: tf.keras.Model
    """

    if debug:
        print("------------model summary---------------")
        print("input_shape_dataset", input_shape_dataset)
        print("num_classes", num_classes)

    input_shape_dataset: tuple
    
    model = Sequential()

    model.add(Conv1D(filters, kernel_size, input_shape=(input_shape_dataset)))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size=pool_size))
    model.add(Flatten())
    model.add(Dense(dense_size))
    model.add(Dropout(dropout))

    model.add(Dense(1))
    model.add(Activation('sigmoid'))

    model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

    return model

In [5]:
from dataclasses import dataclass
import tensorflow as tf
import mlflow
from sklearn.model_selection import KFold

class Experiment:
    experiment_name: str
    model_name: str
    model: tf.keras.Model
    dataset: tuple
    hyperparameters: dict
    metrics: dict


    def __init__(self, experiment_name, model_name, model, dataset, hyperparameters, metrics):
        self.experiment_name = experiment_name
        self.model_name = model_name
        self.model = model
        self.dataset = dataset
        self.hyperparameters = hyperparameters
        self.metrics = metrics      

    # TODO: add kfolds
    def fit(self):
        # add k-fold cross validation


        mlflow.tensorflow.autolog()
        history = self.model.fit(self.dataset[0], self.dataset[1],
                                    validation_data=(self.dataset[2], self.dataset[3]),
                                    epochs=self.hyperparameters["epochs"],
                                    callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)])
        # TODO log artifacts
        # mlflow.log_artifacts("graphs", self._log_graphs(history))
        return history

    def set_experiment(self):
        mlflow.set_experiment(self.experiment_name)

    def log_params(self):
        mlflow.log_param("model_name", self.model_name)
        for key, value in self.hyperparameters.items():
            mlflow.log_param(key, value)

    
    def log_metrics(self, history):
        for metric_name, metric_values in history.history.items():
            for epoch, value in enumerate(metric_values):
                mlflow.log_metric(f"{metric_name}", value, step=epoch)
    
    # def _log_graphs(self, history):
        # TODO make a function to log graphs
    
        # Save training and validation loss and accuracy plots as artifact
        # import matplotlib.pyplot as plt
        # fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(8, 8), dpi=100, sharex=True)
        # ax[0].plot(history.history["loss"], label="train_loss")
        # ax[0].plot(history.history["val_loss"], label="val_loss")
        # ax[0].set_title("Training/Validation Loss")
        # ax[0].set_xlabel("Epoch")
        # ax[0].set_ylabel("Loss")
        # # ax[0].set_grid(True)
        # ax[0].legend()
        
        # ax[1].plot(history.history["accuracy"], label="train_acc")
        # ax[1].plot(history.history["val_accuracy"], label="val_acc")
        # ax[1].set_title("Training/Validation Accuracy")
        # ax[1].set_xlabel("Epoch")
        # ax[1].set_ylabel("Accuracy")
        # # ax[1].set_grid(True)
        # ax[1].legend()
        
        # fig.tight_layout()
        # fig.savefig("graphs.png")
        # plt.close(fig)
        # return "graphs.png"
        
    def log_artifacts(self):
        
        pass

# TODO: Analyze if it is necessary to use gc.collect()
# import gc
# gc.collect()
def registrar_experiment(experiment: Experiment):
    with mlflow.start_run(nested=True):
        experiment.set_experiment()
        experiment.log_params()
        experiment.log_metrics(experiment.fit())
        # experiment._log_graphs(experiment.fit())
        experiment.log_artifacts()


experiment_1 = Experiment(
    experiment_name = "CNN_autolog",
    model_name = "CNN_basic_1_layer",
    model = create_model_cnn_basic_1_layer(),
    dataset = (X_train, y_train, X_val, y_val),
    hyperparameters = {
        "epochs": 11,
        "filters": 256,
        "kernel_size": 3,
        "pool_size": 2,
        "dropout": 0.1,
        "dense_size": 64,
        "loss": "binary_crossentropy",
        "optimizer": "adam",
        
    },metrics = ["accuracy"],
)

experiment_2 = Experiment(
    experiment_name = "CNN_autolog",
    model_name = "CNN_basic_1_layer",
    model = create_model_cnn_basic_1_layer(),
    dataset = (X_train, y_train, X_val, y_val),
    hyperparameters = {
        "epochs": 17,
        "filters": 256,
        "kernel_size": 3,
        "pool_size": 2,
        "dropout": 0.1,
        "dense_size": 32,
        "loss": "binary_crossentropy",
        "optimizer": "adam",
        
    },metrics = ["accuracy"],
)

2023-03-29 19:23:42.526319: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-29 19:23:42.528822: I tensorflow/c/logging.cc:34] DirectML: creating device on adapter 0 (NVIDIA GeForce GTX 1650)
2023-03-29 19:23:44.227891: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-03-29 19:23:44.228043: W tensorflow/core/common_runtime/pluggable_device/pluggable_device_bfc_allocator.cc:28] Overriding allow_growth setting because force_memory_growth was requested by the device.
2023-03-29 19:23:44.228434: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_f

In [6]:
%%capture
experiments = [experiment_1, experiment_2]

for experiment in experiments:
    registrar_experiment(experiment)
    

2023-03-29 19:23:46.322759: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-03-29 19:23:51.329031: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-03-29 19:23:51.363020: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-03-29 19:23:51.363106: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10171 MB memory) -> physical PluggableDevice (device: 0, name: DML, pci bus id: <undefined>)
2023-03-29 19:23:54.562414: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-03-29 19:24:00.

## test artifacts

In [7]:
from dataclasses import dataclass
import tensorflow as tf
import mlflow
from sklearn.model_selection import KFold

class Experiment:
    experiment_name: str
    model_name: str
    model: tf.keras.Model
    dataset: tuple
    hyperparameters: dict
    metrics: dict


    def __init__(self, experiment_name, model_name, model, dataset, hyperparameters, metrics):
        self.experiment_name = experiment_name
        self.model_name = model_name
        self.model = model
        self.dataset = dataset
        self.hyperparameters = hyperparameters
        self.metrics = metrics      

    # TODO: add kfolds
    def fit(self):
        # add k-fold cross validation


        mlflow.tensorflow.autolog()
        history = self.model.fit(self.dataset[0], self.dataset[1],
                                    validation_data=(self.dataset[2], self.dataset[3]),
                                    epochs=self.hyperparameters["epochs"],
                                    callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)])
        # TODO log artifacts
        mlflow.log_artifact(self._log_graphs(history), artifact_path='Artifacts')
        return history

    def set_experiment(self):
        mlflow.set_experiment(self.experiment_name)

    def log_params(self):
        mlflow.log_param("model_name", self.model_name)
        for key, value in self.hyperparameters.items():
            mlflow.log_param(key, value)

    
    def log_metrics(self, history):
        for metric_name, metric_values in history.history.items():
            for epoch, value in enumerate(metric_values):
                mlflow.log_metric(f"{metric_name}", value, step=epoch)
        
    import matplotlib.pyplot as plt

    def _log_graphs(self, history):
        fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(12, 4))
        
        # Accuracy plot
        ax[0].plot(history.history['accuracy'], label='train')
        ax[0].plot(history.history['val_accuracy'], label='val')
        ax[0].set_title('Accuracy')
        ax[0].set_xlabel('Epoch')
        ax[0].set_ylabel('Accuracy')
        ax[0].legend()
    
        # Loss plot
        ax[1].plot(history.history['loss'], label='train')
        ax[1].plot(history.history['val_loss'], label='val')
        ax[1].set_title('Loss')
        ax[1].set_xlabel('Epoch')
        ax[1].set_ylabel('Loss')
        ax[1].legend()
        
        # Save the plots to a file
        plt.tight_layout()
        plt.savefig('graphs.png')
        return 'graphs.png'
        
    def log_artifacts(self):
        
        pass

# TODO: Analyze if it is necessary to use gc.collect()
# import gc
# gc.collect()

def registrar_experiment(experiment: Experiment):
    with mlflow.start_run(nested=True):
        experiment.set_experiment()
        experiment.log_params()
        history = experiment.fit()
        experiment.log_metrics(history)
        graph_path = experiment._log_graphs(history)
        mlflow.log_artifact(graph_path, artifact_path='Artifacts')
        experiment.log_artifacts()

experiment_1 = Experiment(
    experiment_name = "CNN_autolog",
    model_name = "CNN_basic_1_layer",
    model = create_model_cnn_basic_1_layer(),
    dataset = (X_train, y_train, X_val, y_val),
    hyperparameters = {
        "epochs": 11,
        "filters": 256,
        "kernel_size": 3,
        "pool_size": 1,
        "dropout": 0.1,
        "dense_size": 32,
        "loss": "binary_crossentropy",
        "optimizer": "adam",
        
    },metrics = ["accuracy"],
)

experiment_2 = Experiment(
    experiment_name = "CNN_autolog",
    model_name = "CNN_basic_1_layer",
    model = create_model_cnn_basic_1_layer(),
    dataset = (X_train, y_train, X_val, y_val),
    hyperparameters = {
        "epochs": 17,
        "filters": 128,
        "kernel_size": 3,
        "pool_size": 2,
        "dropout": 0.1,
        "dense_size": 16,
        "loss": "binary_crossentropy",
        "optimizer": "adam",
        
    },metrics = ["accuracy"],
)

In [8]:
%%capture
experiments = [experiment_1, experiment_2]

for experiment in experiments:
    registrar_experiment(experiment)
    

2023-03-29 19:24:12.701402: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-03-29 19:24:13.658487: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-03-29 19:24:13.685964: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-03-29 19:24:13.686046: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10171 MB memory) -> physical PluggableDevice (device: 0, name: DML, pci bus id: <undefined>)
2023-03-29 19:24:19.766869: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-03-29 19:24:26.

## Test k-fold cross validation

In [9]:
%%capture
import numpy as np
import mlflow.tensorflow
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
import mlflow

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D,LSTM, Conv1D, Activation, MaxPooling1D
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import confusion_matrix
from tensorflow.keras.metrics import AUC
import keras

from sklearn.model_selection import KFold

from sz_utils import data_handler
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# check if gpu is available
import tensorflow as tf
tf.config.list_physical_devices('GPU')

In [10]:
%%capture
# collect the data
preictal, interictal = data_handler.make_patient_windows("chb01")

# make the labels
X = np.concatenate((preictal, interictal), axis=0)
y = np.concatenate((np.ones((preictal.shape[0], 1)), np.zeros((interictal.shape[0], 1))), axis=0)

# Shuffle the data
shuffle_indices = np.random.permutation(np.arange(X.shape[0]))
X = X[shuffle_indices]
y = y[shuffle_indices]

# Split the data into train and test
train_size = int(X.shape[0] * 0.8)
X_train = X[:train_size]
y_train = y[:train_size]
X_test = X[train_size:]
y_test = y[train_size:]

# Split test data into validation and test
val_size = int(X_test.shape[0] * 0.5)
X_val = X_test[:val_size]
y_val = y_test[:val_size]
X_test = X_test[val_size:]
y_test = y_test[val_size:]

In [11]:
num_classes = 2 
input_shape_dataset = (X_train.shape[1], X_train.shape[2])
input_shape_dataset

def create_model_cnn_basic_1_layer(
    input_shape_dataset: tuple = input_shape_dataset,
    num_classes: int = num_classes,
    debug: bool = False,
    filters: int = 256,
    kernel_size: int = 3,
    pool_size: int = 2,
    dropout: float = 0.1,
    dense_size: int = 64,
    loss: str = "binary_crossentropy",
    optimizer: str = "adam",
    metrics: list = ["accuracy"],

) -> tf.keras.Model:

    """This function creates a basic convolutional neural network model with 2 convolutional layers, 2 dense layers and a softmax layer

    :param input_shape_dataset: shape of the input data
    :type input_shape_dataset: tuple
    :param num_classes: number of classes
    :type num_classes: int
    :return: return a model
    :rtype: tf.keras.Model
    """

    if debug:
        print("------------model summary---------------")
        print("input_shape_dataset", input_shape_dataset)
        print("num_classes", num_classes)

    input_shape_dataset: tuple
    
    model = Sequential()

    model.add(Conv1D(filters, kernel_size, input_shape=(input_shape_dataset)))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size=pool_size))
    model.add(Flatten())
    model.add(Dense(dense_size))
    model.add(Dropout(dropout))

    model.add(Dense(1))
    model.add(Activation('sigmoid'))

    model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

    return model

In [12]:
num_folds = 5
fold_no = 1
# Define per-fold score containers <-- these are new
acc_per_fold = []
loss_per_fold = []

inputs = X
targets = y

kfold = KFold(n_splits=num_folds, shuffle=True)

for train, test in kfold.split(inputs, targets):
    # create model
    model = Sequential()
    model.add(Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(inputs.shape[1], inputs.shape[2])))
    model.add(Conv1D(filters=32, kernel_size=3, activation='relu'))
    model.add(Dropout(0.5))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(100, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
     # Generate a print
    print('------------------------------------------------------------------------')
    print(f'Training for fold {fold_no} ...')
    # Fit the model
    model.fit(inputs[train], targets[train], epochs=10, verbose=0)
    # evaluate the model
    # Generate generalization metrics
    scores = model.evaluate(inputs[test], targets[test], verbose=0)
    print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')
    acc_per_fold.append(scores[1] * 100)
    loss_per_fold.append(scores[0])

    # Increase fold number
    fold_no = fold_no + 1



2023/03/29 19:25:16 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '1475518e7886440c9716229eb7d33116', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow


------------------------------------------------------------------------
Training for fold 1 ...


2023-03-29 19:25:17.423315: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-03-29 19:25:22.612340: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-03-29 19:25:29.158503: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-03-29 19:25:29.194532: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-03-29 19:25:29.194673: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10171 MB memory) -> physical PluggableDevice (device: 0, name: DML, pci bus id: <undefined>)
2023/03/29 19:25:29 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '9a01b01bfebe4775ba07b26942318dcb', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow


Score for fold 1: loss of 0.11172610521316528; accuracy of 98.21428656578064%
------------------------------------------------------------------------
Training for fold 2 ...


2023-03-29 19:25:29.794975: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-03-29 19:25:32.560999: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-03-29 19:25:37.394789: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-03-29 19:25:37.431962: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-03-29 19:25:37.432050: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10171 MB memory) -> physical PluggableDevice (device: 0, name: DML, pci bus id: <undefined>)
2023/03/29 19:25:37 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '1a7ddc010fff412b8eadbf4e7ac0caf8', which will track hyperparameters, performan

Score for fold 2: loss of 0.6943904161453247; accuracy of 44.64285671710968%
------------------------------------------------------------------------
Training for fold 3 ...


2023-03-29 19:25:37.955999: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-03-29 19:25:40.720879: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-03-29 19:25:46.043422: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-03-29 19:25:46.077254: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-03-29 19:25:46.077350: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10171 MB memory) -> physical PluggableDevice (device: 0, name: DML, pci bus id: <undefined>)
2023/03/29 19:25:46 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '51b8a9d5bb1f4b68aa5b50ab0c788352', which will track hyperparameters, performan

Score for fold 3: loss of 0.38884949684143066; accuracy of 92.26190447807312%
------------------------------------------------------------------------
Training for fold 4 ...


2023-03-29 19:25:46.644247: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-03-29 19:25:49.405485: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-03-29 19:25:54.276806: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-03-29 19:25:54.310506: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-03-29 19:25:54.310564: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10171 MB memory) -> physical PluggableDevice (device: 0, name: DML, pci bus id: <undefined>)
2023/03/29 19:25:54 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '5bcb8fff354242b6be9b1bef8cdf9d58', which will track hyperparameters, performan

Score for fold 4: loss of 0.25302520394325256; accuracy of 95.23809552192688%
------------------------------------------------------------------------
Training for fold 5 ...


2023-03-29 19:25:54.984944: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-03-29 19:25:57.803636: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Score for fold 5: loss of 0.6940484046936035; accuracy of 41.66666865348816%


2023-03-29 19:26:03.127543: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-03-29 19:26:03.168825: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-03-29 19:26:03.169023: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10171 MB memory) -> physical PluggableDevice (device: 0, name: DML, pci bus id: <undefined>)


In [13]:
dataset_ns = (X, y)
dataset_ns[0].shape 

(840, 1280, 22)

In [14]:
from dataclasses import dataclass
import tensorflow as tf
import mlflow
from sklearn.model_selection import KFold

dataset_ns = (X, y)

import warnings

warnings.filterwarnings("ignore", category=UserWarning, module="mlflow.tensorflow")


class Experiment_kfolds:
    def __init__(self, experiment_name, model_name, model, dataset_ns, hyperparameters, metrics, num_folds=5):
        self.experiment_name = experiment_name
        self.model_name = model_name
        self.model = model
        self.dataset_ns = dataset_ns
        self.hyperparameters = hyperparameters
        self.metrics = metrics
        self.num_folds = num_folds
        self.fold_no = 1
        self.acc_per_fold = []
        self.loss_per_fold = []
        self.kfold = KFold(n_splits=self.num_folds, shuffle=True)
        self.train = None
        self.test = None

    def fit(self):
        try:
            
            # mlflow.tensorflow.autolog(log_models=False)
            mlflow.tensorflow.autolog()
            history = self.model.fit(self.dataset_ns[0][train], self.dataset_ns[1][train], epochs=self.hyperparameters["epochs"], verbose=0)
            return history

        except Exception as e:
            print("El erro se pruduce en el fit, el error es: ", e)

    def evaluate(self, history):

        try:
                
            scores = model.evaluate(self.dataset_ns[0][test], self.dataset_ns[1][test], verbose=0)
            print(f'Score for fold {self.fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')
            self.acc_per_fold.append(scores[1] * 100)
            self.loss_per_fold.append(scores[0])
            self.log_metrics(history)
        except Exception as e:
            print("El erro se pruduce en el evaluate, el error es: ", e)

    def set_experiment(self):
        mlflow.set_experiment(self.experiment_name)

    def log_params(self):
        mlflow.log_param("model_name", self.model_name)
        for key, value in self.hyperparameters.items():
            mlflow.log_param(key, value)


    def log_metrics(self, history):
        for metric_name, metric_values in history.history.items():
            for epoch, value in enumerate(metric_values):
                mlflow.log_metric(f"{metric_name}", value, step=epoch)

    def log_artifacts(self):
        pass

    def run(self):
        self.set_experiment()
        self.log_params()
        for train, test in self.kfold.split(self.dataset_ns[0], self.dataset_ns[1]):
            self.train = train
            self.test = test
            history = self.fit()
            self.evaluate(history)  # pass history object returned by fit()
            self.fold_no = self.fold_no + 1


In [15]:
import mlflow

class Experiment_kfolds:
    def __init__(self, experiment_name, model_name, model, dataset_ns, hyperparameters, metrics, num_folds=5):
        self.experiment_name = experiment_name
        self.model_name = model_name
        self.model = model
        self.dataset_ns = dataset_ns
        self.hyperparameters = hyperparameters
        self.metrics = metrics
        self.num_folds = num_folds
        self.fold_no = 1
        self.acc_per_fold = []
        self.loss_per_fold = []
        self.kfold = KFold(n_splits=self.num_folds, shuffle=True)
        self.train = None
        self.test = None

    def fit_evaluate(self):
        try:
            mlflow.tensorflow.autolog()
            for i, (train, test) in enumerate(self.kfold.split(self.dataset_ns[0], self.dataset_ns[1])):
                with mlflow.start_run(run_name=f"fold_{i}", nested=True):
                    self.train = train
                    self.test = test
                    history = self.model.fit(self.dataset_ns[0][train], self.dataset_ns[1][train], epochs=self.hyperparameters["epochs"], verbose=1)
                    scores = model.evaluate(self.dataset_ns[0][test], self.dataset_ns[1][test], verbose=0)
                    print(f'Score for fold {self.fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')
                    self.acc_per_fold.append(scores[1] * 100)
                    mlflow.log_param("acc_per_fold", self.acc_per_fold)
                    self.loss_per_fold.append(scores[0])
                    self.log_metrics(history)
                    self.fold_no = self.fold_no + 1
                # mlflow.end_run() # This line is optional
            return history
        except Exception as e:
            print("El error se produce en el fit, el error es: ", e)


    def set_experiment(self):
        mlflow.set_experiment(self.experiment_name)

    def log_params(self):
        mlflow.log_param("model_name", self.model_name)
        for key, value in self.hyperparameters.items():
            mlflow.log_param(key, value)


    def log_metrics(self, history):

        for metric_name, metric_values in history.history.items():
            for epoch, value in enumerate(metric_values):
                mlflow.log_metric(f"{metric_name}", value, step=epoch)

    def log_artifacts(self):
        pass

    def run(self):
        self.set_experiment()
        self.log_params()
        self.fit_evaluate()


def run_experiment(experiment_name, model_name, model, dataset_ns, hyperparameters, metrics, num_folds=5):
    with mlflow.start_run():
        experiment = Experiment_kfolds(experiment_name, model_name, model, dataset_ns, hyperparameters, metrics, num_folds)
        experiment.run()

In [16]:
experiment_1_kfolds = Experiment_kfolds(
    experiment_name = "CNN_autolog_kfolds",
    # experiment_description = "CNN con autolog y kfold",
    model_name = "CNN_basic_1_layer",
    model = create_model_cnn_basic_1_layer(),
    dataset_ns = dataset_ns,
    hyperparameters = {
        "epochs": 12,
        "filters": 256,
        "kernel_size": 3,
        "pool_size": 2,
        "dropout": 0.1,
        "dense_size": 64,
        "loss": "binary_crossentropy",
        "optimizer": "adam",

    },metrics = ["accuracy"],
    num_folds = 5
)

run_experiment( experiment_1_kfolds.experiment_name, experiment_1_kfolds.model_name, experiment_1_kfolds.model, experiment_1_kfolds.dataset_ns, experiment_1_kfolds.hyperparameters, experiment_1_kfolds.metrics, experiment_1_kfolds.num_folds)



2023/03/29 19:26:03 INFO mlflow.tracking.fluent: Experiment with name 'CNN_autolog_kfolds' does not exist. Creating a new experiment.


Epoch 1/12


2023-03-29 19:26:04.445988: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


2023-03-29 19:26:11.346315: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Score for fold 1: loss of 0.6930347084999084; accuracy of 51.1904776096344%
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12




Score for fold 2: loss of 0.6891950368881226; accuracy of 48.80952537059784%
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12




Score for fold 3: loss of 0.6930980086326599; accuracy of 50.59524178504944%
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12




Score for fold 4: loss of 0.6932880282402039; accuracy of 48.80952537059784%
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12




Score for fold 5: loss of 0.6930980086326599; accuracy of 50.59524178504944%


In [17]:
# clear ram
import gc
gc.collect()

22620