## Supervised models
This notebook is intended for giving an introduction the ML supervised models that can be used for Covid detection.

For this notebook to find the new modules created for this project, we need to set its path to be in the root directory.

In [1]:
# Auto reload modules
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append("../")

<img src="../images/Supervised_Models.png" width="800"/>

## Loading packages and dependencies

In [None]:
import numpy as np

from src.features.extract_features import load_extracted_features
from sklearn.model_selection import train_test_split
from src.models.build_model import train_advanced_supervised_model, evaluate_model


# Path to the raw data and preprocessed data
raw_data_dir = '../data/raw/dataset/images'
IMG_SIZE = 299  # Resize images to IMG_SIZExIMG_SIZE pixels

## Extracting features from images

In [4]:
_, y_healthy, X_healthy  = load_extracted_features(images_dir=raw_data_dir+'{}/images',
                                                       category='NORMAL', dataset_label=0, random_seed=42, samples=781, 
                                                       image_size=IMG_SIZE, image_resized=True, augmentor=True)                                                       
_, y_sick, X_sick = load_extracted_features(images_dir=raw_data_dir+'{}/images',
                                                        category=['COVID','Viral Pneumonia','Lung_Opacity'], dataset_label=1,
                                                        image_size=IMG_SIZE, image_resized=True)


Loaded images for NORMAL: 10973 resized images, 10973 features, and 10973 labels.
Loaded images for ['COVID', 'Viral Pneumonia', 'Lung_Opacity']: 10973 resized images, 10973 features, and 10973 labels.


## Normalizing features

In [5]:
X = np.vstack((X_healthy, X_sick)) #image data
y = np.concatenate((y_healthy, y_sick)) #labels

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

X_train shape: (17556, 299, 299), y_train shape: (17556,)
X_test shape: (4390, 299, 299), y_test shape: (4390,)


In [6]:
# Reshape image data for CNN
X_train = X_train.reshape(-1, IMG_SIZE, IMG_SIZE, 1)
X_test = X_test.reshape(-1, IMG_SIZE, IMG_SIZE, 1)

print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")

X_train shape: (17556, 299, 299, 1)
X_test shape: (4390, 299, 299, 1)


## Training and evaluating models

### Convolutional Neural Networks (CNN)

✅ Strengths:
* Highly accurate for image tasks.
* Learns complex patterns automatically.
* Works well with large image datasets.

❌ Weaknesses:
* Computationally expensive (needs GPUs).
* Requires large labeled datasets.
* Not easily interpretable.

In [7]:
model, history = train_advanced_supervised_model(X_train, y_train, IMG_SIZE, 50)

2025-03-02 14:59:08.705040: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3 Max
2025-03-02 14:59:08.705064: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 128.00 GB
2025-03-02 14:59:08.705069: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 48.00 GB
2025-03-02 14:59:08.705089: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-03-02 14:59:08.705104: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Epoch 1/50


2025-03-02 14:59:11.046675: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m439/439[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 60ms/step - accuracy: 0.5024 - loss: 0.6944 - val_accuracy: 0.4801 - val_loss: 0.6934 - learning_rate: 0.0010
Epoch 2/50
[1m439/439[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 58ms/step - accuracy: 0.4987 - loss: 0.6932 - val_accuracy: 0.4801 - val_loss: 0.6938 - learning_rate: 0.0010
Epoch 3/50
[1m439/439[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 59ms/step - accuracy: 0.5049 - loss: 0.6932 - val_accuracy: 0.4801 - val_loss: 0.6935 - learning_rate: 0.0010
Epoch 4/50
[1m439/439[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 59ms/step - accuracy: 0.5079 - loss: 0.6931 - val_accuracy: 0.4801 - val_loss: 0.6933 - learning_rate: 0.0010
Epoch 5/50
[1m439/439[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 59ms/step - accuracy: 0.4982 - loss: 0.6932 - val_accuracy: 0.4801 - val_loss: 0.6938 - learning_rate: 0.0010
Epoch 6/50
[1m439/439[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2

In [None]:
train_loss, train_acc = history.history['loss'][-1], history.history['accuracy'][-1]
print(f"Train Accuracy: {train_acc:.4f}, Train Loss: {train_loss:.4f}")

test_loss, test_acc = evaluate_model("Binary classification [Normal, Others] for images without masks", model, X_test, _, model_type="CNN", classification_type="binary")
print(f"Test Accuracy: {test_acc:.4f}, Test Loss: {test_loss:.4f}")


Train Accuracy: 0.5050, Train Loss: 0.6931
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.5044 - loss: 0.6931


2025/03/02 15:03:08 INFO mlflow.tracking.fluent: Experiment with name 'Advanced Supervised Models' does not exist. Creating a new experiment.


[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step


Successfully registered model 'tensorflow-CNN-2025-03-02'.
2025/03/02 15:03:18 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: tensorflow-CNN-2025-03-02, version 1


🏃 View run CNN-2025-03-02 15:03:08.137755 at: http://localhost:8080/#/experiments/877999877627272198/runs/0316bf79033a4b0b8eee394eab47b4d8
🧪 View experiment at: http://localhost:8080/#/experiments/877999877627272198
Test Accuracy: 0.5000, Test Loss: 0.6932


Created version '1' of model 'tensorflow-CNN-2025-03-02'.


### Transfer learning

✅ Strengths
* Transfer learning reduces training time while maintaining high accuracy.
* Fine-tuning improves performance when sufficient data is available.
* Combining deep features with statistical features can enhance results.

In [9]:
model, history = train_advanced_supervised_model(X_train, y_train, IMG_SIZE, 50, model_type="Transfer Learning")

Epoch 1/50
[1m439/439[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 110ms/step - accuracy: 0.5086 - loss: 0.7028 - val_accuracy: 0.4801 - val_loss: 0.6947 - learning_rate: 1.0000e-04
Epoch 2/50
[1m439/439[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 107ms/step - accuracy: 0.4934 - loss: 0.7014 - val_accuracy: 0.4801 - val_loss: 0.6939 - learning_rate: 1.0000e-04
Epoch 3/50
[1m439/439[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 106ms/step - accuracy: 0.5006 - loss: 0.7015 - val_accuracy: 0.4801 - val_loss: 0.6936 - learning_rate: 1.0000e-04
Epoch 4/50
[1m439/439[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 105ms/step - accuracy: 0.5005 - loss: 0.6999 - val_accuracy: 0.5199 - val_loss: 0.6925 - learning_rate: 1.0000e-04
Epoch 5/50
[1m439/439[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 107ms/step - accuracy: 0.4948 - loss: 0.7010 - val_accuracy: 0.4801 - val_loss: 0.6936 - learning_rate: 1.0000e-04
Epoch 6/50
[1m439/439[0m [32m━━━

In [None]:
train_loss, train_acc = history.history['loss'][-1], history.history['accuracy'][-1]
print(f"Train Accuracy: {train_acc:.4f}, Train Loss: {train_loss:.4f}")

test_loss, test_acc = evaluate_model("Binary classification [Normal, Others] for images without masks",model, X_test, _, model_type="Transfer Learning", classification_type="binary")
print(f"Test Accuracy: {test_acc:.4f}, Test Loss: {test_loss:.4f}")

Train Accuracy: 0.4963, Train Loss: 0.7009
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 88ms/step - accuracy: 0.4956 - loss: 0.6935
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 93ms/step


Successfully registered model 'tensorflow-Transfer Learning-2025-03-02'.
2025/03/02 15:11:09 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: tensorflow-Transfer Learning-2025-03-02, version 1


🏃 View run Transfer Learning-2025-03-02 15:10:47.343155 at: http://localhost:8080/#/experiments/877999877627272198/runs/a2b291c113ae499186312af3fa129c12
🧪 View experiment at: http://localhost:8080/#/experiments/877999877627272198
Test Accuracy: 0.5000, Test Loss: 0.6934


Created version '1' of model 'tensorflow-Transfer Learning-2025-03-02'.
