In [1]:
%%capture

# standard libraries
import math
import os
import tempfile
import json
from pathlib import Path
import pickle

# standard scientific libraries
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
from numpy import asarray, save, load
import pandas as pd
import seaborn as sns

# scikit-learn
import sklearn
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight

# tensorflow
import tensorflow as tf
import tensorflow_addons as tfa
import keras
from keras.models import Sequential
from keras.layers import Dense,Conv1D, Conv2D, MaxPooling2D, Dropout, Flatten, Input, MaxPooling1D
from keras.optimizers import RMSprop
from keras.utils import to_categorical, set_random_seed

# If using TensorFlow, this will make GPU ops as deterministic as possible, but it will affect the overall performance, so be mindful of that.
tf.config.experimental.enable_op_determinism()

2024-11-22 12:01:25.676498: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [2]:
def train(DATA_DIR, RESULTS_DIR, PREFIX = "", SUFFIX="", MODEL_TYPE = "convolutional", NUM_CLASSES = 0, SEED=42, EPOCHS = 100, BATCH_SIZE = 16, LEARNING_RATE = 1e-3, PATIENCE = 10):

    # 1. Data

    # 1.1. Specify directories
    DATA_DIR = Path(DATA_DIR)
    SET_NAME = str(DATA_DIR).split("/")[-1]
    RESULTS_FILE = RESULTS_DIR / (PREFIX + SET_NAME + SUFFIX)
    print(f"\nTraining {SET_NAME}, results will be saved into {RESULTS_FILE}\n")
    
    # 1.2. Load datasets
    x_train_dict = np.load(DATA_DIR/"train_matrices.npz", allow_pickle=True)
    y_train_arr = np.load(DATA_DIR/"train_labels.npy", allow_pickle=True)
    x_dev_dict = np.load(DATA_DIR/"dev_matrices.npz", allow_pickle=True)
    y_dev_arr = np.load(DATA_DIR/"dev_labels.npy", allow_pickle=True)

    # 1.3. Define NUM_CLASSES
    if not NUM_CLASSES: NUM_CLASSES = len(set(y_train_arr))
    print(f"Number of classes: {NUM_CLASSES}")
    
    # 1.4. Convert discontinuous label values to continuous values
    # This is required for the folds datasets, which have missing numbers in the middle
    #! Don't forget to take this into account in the results!
    conversion_dict = dict(zip(set(y_train_arr), range(NUM_CLASSES)))
    y_train_arr = [conversion_dict[i] for i in y_train_arr]
    y_dev_arr = [conversion_dict[i] for i in y_dev_arr]
    
    # 1.5. Make X and Y TRAIN and DEV sets
    X_TRAIN = np.stack(x_train_dict['arr_0'], axis=0)
    X_DEV = np.stack(x_dev_dict['arr_0'], axis=0)
    Y_TRAIN = to_categorical(y_train_arr, num_classes=NUM_CLASSES)
    Y_DEV = to_categorical(y_dev_arr, num_classes=NUM_CLASSES)
    print(f"Training features shape: {X_TRAIN.shape} labels shape: {Y_TRAIN.shape}")
    print(f"Validation features shape: {X_DEV.shape} labels shape: {Y_DEV.shape}")

    # 1.6. Define INPUT_SHAPE
    INPUT_SHAPE = X_TRAIN.shape[1:]
    INPUT_LENGTH = INPUT_SHAPE[0]
    INPUT_SIZE = INPUT_SHAPE[0] * INPUT_SHAPE[1]
    print(f"Input shape: {INPUT_SHAPE}")
    
    # 1.7. Calculate class weights
    CLASS_WEIGHTS = np.round(class_weight.compute_class_weight(class_weight='balanced', classes=np.unique(y_train_arr), y=y_train_arr), 2)
    CLASS_WEIGHTS = dict(enumerate(CLASS_WEIGHTS))

    # 2. Model

    # 2.1. Define metrics
    METRICS = [
        keras.metrics.TruePositives(name = 'tp'),
        keras.metrics.FalsePositives(name = 'fp'),
        keras.metrics.TrueNegatives(name = 'tn'),
        keras.metrics.FalseNegatives(name = 'fn'),
        #// keras.metrics.BinaryAccuracy(name = 'accuracy'), # TODO remove this?
        keras.metrics.CategoricalAccuracy(name='accuracy'),
        keras.metrics.Precision(name = 'precision'),
        keras.metrics.Recall(name = 'recall'),
        keras.metrics.AUC(name = 'auc', curve='roc'),
        keras.metrics.AUC(name = 'prc', curve = 'PR'),
        tfa.metrics.F1Score(name = 'f1', num_classes = NUM_CLASSES),
        tfa.metrics.MatthewsCorrelationCoefficient(name = 'mcc', num_classes = NUM_CLASSES)
    ]
    
    # 2.2. Set seed
    set_random_seed(SEED)
    
    # 2.3. Construct model
    model = Sequential()
    match MODEL_TYPE:
        case "benchmark":
            # This is just a single-layer dense network.
            model.add(Flatten())
            model.add(Dense(INPUT_SIZE, activation = "relu", kernel_initializer='he_normal'))
            model.add(Dense(NUM_CLASSES, activation='softmax'))
        case "convolutional":
            # NOTE: kernel = n*n matrix, filter = collection of (convolved) feature maps from all input channels (depth). Output size is therefore H'*w'*filters
            # kernel_intializer: he_normal used for ReLU layers, otherwise standard (glorot_uniform)
            # The following architecture is a pretty standard image classification architecture, with extra dropouts in the middle
            model.add(Conv1D(
                16, 3,
                activation='relu',
                padding='same',
                input_shape = INPUT_SHAPE,
                kernel_initializer = 'he_normal'))
            model.add(MaxPooling1D(pool_size = 2, strides = 2))
            model.add(Dropout(0.2))
            model.add(Conv1D(
                32, 3,
                activation = 'relu',
                padding = 'same',
                kernel_initializer = 'he_normal'))
            model.add(MaxPooling1D(pool_size = 2, strides = 2))
            model.add(Dropout(0.2))
            model.add(Flatten())
            model.add(Dense(
                INPUT_LENGTH * 16,
                activation = 'relu',
                kernel_initializer='he_normal'))
            model.add(Dropout(0.2))
            model.add(Dense(
                NUM_CLASSES,
                activation = 'softmax'))
        case "convolutional_simple":
            # NOTE: kernel = n*n matrix, filter = collection of (convolved) feature maps from all input channels (depth). Output size is therefore H'*w'*filters
            # kernel_intializer: he_normal used for ReLU layers, otherwise standard (glorot_uniform)
            # The following architecture is a pretty standard image classification architecture, with extra dropouts in the middle
            model.add(Conv1D(
                16, 3,
                activation='relu',
                padding='same',
                input_shape = INPUT_SHAPE,
                kernel_initializer = 'he_normal'))
            model.add(MaxPooling1D(pool_size = 2, strides = 2))
            model.add(Conv1D(
                32, 3,
                activation = 'relu',
                padding = 'same',
                kernel_initializer = 'he_normal'))
            model.add(MaxPooling1D(pool_size = 2, strides = 2))
            model.add(Flatten())
            model.add(Dense(
                32,
                activation = 'relu',
                kernel_initializer='he_normal'))
            model.add(Dense(
                NUM_CLASSES,
                activation = 'softmax'))
    model.summary()

    # 2.4. Compile model
    model.compile(
        optimizer = keras.optimizers.Adam(learning_rate = LEARNING_RATE),  # optimizer=RMSprop(lr=0.001),
        loss = keras.losses.CategoricalCrossentropy(),
        metrics = METRICS)

    # 2.5. Define early stopping
    EARLY_STOPPING = keras.callbacks.EarlyStopping(monitor='val_loss', patience=PATIENCE, restore_best_weights=True)
    
    # 2.6. Train
    history = model.fit(
        X_TRAIN,
        Y_TRAIN,
        validation_data = (X_DEV, Y_DEV),
        batch_size = BATCH_SIZE,
        epochs = EPOCHS,
        callbacks=[EARLY_STOPPING],
        class_weight = CLASS_WEIGHTS
    )
    
    # 2.7. Save model
    model.save(RESULTS_FILE, overwrite=True)
    
    # 2.8. Save model history
    with open(RESULTS_FILE/"history", "wb") as file:
        pickle.dump(history.history, file)

In [3]:
DATA_PATH = Path("../data_generation/training_data/622/")
RESULTS_DIR = Path("results/622/")
for DATA_DIR in os.listdir(DATA_PATH):
    for seed in [1, 2, 3]:
        train(DATA_PATH/DATA_DIR, RESULTS_DIR, SUFFIX=f"_{seed}", SEED=seed)


Training uncg_18, results will be saved into results/622/uncg_18_1

Number of classes: 2
Training features shape: (334, 18, 6) labels shape: (334, 2)
Validation features shape: (113, 18, 6) labels shape: (113, 2)
Input shape: (18, 6)


2024-11-22 12:02:18.115018: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 38551 MB memory:  -> device: 0, name: NVIDIA A100-SXM4-40GB, pci bus id: 0000:e3:00.0, compute capability: 8.0


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 18, 16)            304       
                                                                 
 max_pooling1d (MaxPooling1D  (None, 9, 16)            0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 9, 16)             0         
                                                                 
 conv1d_1 (Conv1D)           (None, 9, 32)             1568      
                                                                 
 max_pooling1d_1 (MaxPooling  (None, 4, 32)            0         
 1D)                                                             
                                                                 
 dropout_1 (Dropout)         (None, 4, 32)             0

2024-11-22 12:02:21.633520: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8401
2024-11-22 12:02:22.501516: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:630] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100


INFO:tensorflow:Assets written to: results/622/uncg_18_1/assets



Training uncg_18, results will be saved into results/622/uncg_18_2

Number of classes: 2
Training features shape: (334, 18, 6) labels shape: (334, 2)
Validation features shape: (113, 18, 6) labels shape: (113, 2)
Input shape: (18, 6)
Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_2 (Conv1D)           (None, 18, 16)            304       
                                                                 
 max_pooling1d_2 (MaxPooling  (None, 9, 16)            0         
 1D)                                                             
                                                                 
 dropout_3 (Dropout)         (None, 9, 16)             0         
                                                                 
 conv1d_3 (Conv1D)           (None, 9, 32)             1568      
                                                                 
 max_pooling1d_3 

INFO:tensorflow:Assets written to: results/622/uncg_18_2/assets



Training uncg_18, results will be saved into results/622/uncg_18_3

Number of classes: 2
Training features shape: (334, 18, 6) labels shape: (334, 2)
Validation features shape: (113, 18, 6) labels shape: (113, 2)
Input shape: (18, 6)
Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_4 (Conv1D)           (None, 18, 16)            304       
                                                                 
 max_pooling1d_4 (MaxPooling  (None, 9, 16)            0         
 1D)                                                             
                                                                 
 dropout_6 (Dropout)         (None, 9, 16)             0         
                                                                 
 conv1d_5 (Conv1D)           (None, 9, 32)             1568      
                                                                 
 max_pooling1d_5 

INFO:tensorflow:Assets written to: results/622/uncg_18_3/assets
