In [1]:
%%capture

# standard libraries
import math
import os
import tempfile
import json
from pathlib import Path
import pickle

# standard scientific libraries
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
from numpy import asarray, save, load
import pandas as pd
import seaborn as sns

# scikit-learn
import sklearn
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight

# tensorflow
import tensorflow as tf
import tensorflow_addons as tfa
import keras
from keras.models import Sequential
from keras.layers import Dense,Conv1D, Conv2D, MaxPooling2D, Dropout, Flatten, Input, MaxPooling1D, Reshape
from keras.optimizers import RMSprop
from keras.utils import to_categorical

2024-08-15 13:31:57.209258: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [None]:
DATA_DIR = Path("../data_generation/training_data/sample_data/multi_48")

x_train_dict = np.load(DATA_DIR/"train_matrices.npz", allow_pickle=True)
x_train = np.stack(x_train_dict['arr_0'], axis=0)
y_train_dict = np.load(DATA_DIR/"train_labels.npz", allow_pickle=True)
y_train = [a.toarray() for a in y_train_dict['arr_0']]
y_train = np.stack(y_train, axis=0)

print("Training features shape:", x_train.shape)
print("Training labels shape:", y_train.shape)

x_dev_dict = np.load(DATA_DIR/"dev_matrices.npz", allow_pickle=True)
x_dev = np.stack(x_dev_dict['arr_0'], axis=0)
y_dev_dict = np.load(DATA_DIR/"dev_labels.npz", allow_pickle=True)
y_dev = [a.toarray() for a in y_dev_dict['arr_0']]
y_dev = np.stack(y_dev, axis=0)

print("Validation features shape:", x_dev.shape)
print("Validation labels shape:", y_dev.shape)

print("\nInput shape:", x_train.shape[1:])

In [None]:
INPUT_SHAPE = x_train.shape[1:]
INPUT_SIZE = INPUT_SHAPE[0]*INPUT_SHAPE[1]
OUTPUT_SHAPE = y_train.shape[1:]
OUTPUT_SIZE = OUTPUT_SHAPE[0]*OUTPUT_SHAPE[1]

METRICS = [ 
    keras.metrics.TruePositives(name = 'tp'),
    keras.metrics.FalsePositives(name = 'fp'),
    keras.metrics.TrueNegatives(name = 'tn'),
    keras.metrics.FalseNegatives(name = 'fn'),
    keras.metrics.CategoricalAccuracy(name='accuracy'),
    keras.metrics.Precision(name = 'precision'),
    keras.metrics.Recall(name = 'recall'),
    keras.metrics.AUC(name = 'auc', curve='roc'),
    keras.metrics.AUC(name = 'prc', curve = 'PR'),
    # tfa.metrics.F1Score(name = 'f1', num_classes = NUM_CLASSES),
    # tfa.metrics.MatthewsCorrelationCoefficient(name = 'mcc', num_classes = NUM_CLASSES)
]

model = Sequential([
    Flatten(input_shape = INPUT_SHAPE),
    Dense(INPUT_SIZE, activation="relu"),
    Dropout(0.2),
    Dense(INPUT_SIZE, activation="relu"),
    Dropout(0.2),
    Dense(OUTPUT_SIZE, activation='softmax'),
    Reshape(OUTPUT_SHAPE)
])

model.compile(
    optimizer=keras.optimizers.Adam(learning_rate = 1e-3),
    loss=keras.losses.CategoricalCrossentropy(), 
    metrics=METRICS)

model.summary()

In [None]:
BATCH_SIZE = 16
EPOCHS = 20

# # Calculate class weight
# y_integers = np.argmax(y_train, axis=1)
# CLASS_WEIGHTS = np.round(class_weight.compute_class_weight(class_weight='balanced', classes=np.unique(y_integers), y=y_integers), 2)
# CLASS_WEIGHTS = dict(enumerate(CLASS_WEIGHTS))

history = model.fit(
    x_train,
    y_train,
    validation_data = (x_dev, y_dev),
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    # class_weight=CLASS_WEIGHTS
)

In [None]:
RESULTS_FILE = Path("results/ANN/multi/24")

model.save(RESULTS_FILE, overwrite=True)