## Supervised models
This notebook is intended for giving an introduction the ML supervised models that can be used for Covid detection.

For this notebook to find the new modules created for this project, we need to set its path to be in the root directory.

In [1]:
# Auto reload modules
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append("../")

<img src="../images/Supervised_Models.png" width="800"/>

## Loading packages and dependencies

In [3]:
import numpy as np

from src.features.extract_features import load_extracted_features
from sklearn.model_selection import train_test_split
from src.models.build_model import train_advanced_supervised_model, evaluate_model


# Path to the raw data and preprocessed data
raw_data_dir = '../data/raw/COVID-19_Radiography_Dataset/'
IMG_SIZE = 299  # Resize images to IMG_SIZExIMG_SIZE pixels

## Extracting features from images

In [4]:
_, y_normal, X_normal = load_extracted_features(images_dir=raw_data_dir+'{}/images',
                                                    category='NORMAL', dataset_label=0, 
                                                    image_size=IMG_SIZE, image_resized=True, augmentor=True)
_, y_covid, X_covid = load_extracted_features(images_dir=raw_data_dir+'{}/images',
                                                    category='COVID', dataset_label=1, random_seed=42, samples=6576, 
                                                    image_size=IMG_SIZE, image_resized=True, augmentor=True)
_, y_pneumonia, X_pneumonia = load_extracted_features(images_dir=raw_data_dir+'{}/images',
                                                    category='Viral Pneumonia', dataset_label=2, random_seed=42, samples=8847, 
                                                    image_size=IMG_SIZE, image_resized=True, augmentor=True)
_, y_opacity, X_opacity = load_extracted_features(images_dir=raw_data_dir+'{}/images',
                                                    category='Lung_Opacity', dataset_label=3, random_seed=42, samples=4180,
                                                    image_size=IMG_SIZE, image_resized=True, augmentor=True) 

Loaded images for NORMAL: 10192 resized images, 10192 features, and 10192 labels.
Loaded images for COVID: 10192 resized images, 10192 features, and 10192 labels.
Loaded images for Viral Pneumonia: 10192 resized images, 10192 features, and 10192 labels.
Loaded images for Lung_Opacity: 10192 resized images, 10192 features, and 10192 labels.


## Normalizing features

In [5]:
# Combine datasets
X = np.vstack((X_normal, X_covid, X_pneumonia, X_opacity))
y = np.concatenate((y_normal, y_covid, y_pneumonia, y_opacity))

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

X_train shape: (32614, 299, 299), y_train shape: (32614,)
X_test shape: (8154, 299, 299), y_test shape: (8154,)


In [6]:
# Reshape image data for CNN
X_train = X_train.reshape(-1, IMG_SIZE, IMG_SIZE, 1)
X_test = X_test.reshape(-1, IMG_SIZE, IMG_SIZE, 1)

print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")

X_train shape: (32614, 299, 299, 1)
X_test shape: (8154, 299, 299, 1)


## Training and evaluating models

### Convolutional Neural Networks (CNN)

✅ Strengths:
* Highly accurate for image tasks.
* Learns complex patterns automatically.
* Works well with large image datasets.

❌ Weaknesses:
* Computationally expensive (needs GPUs).
* Requires large labeled datasets.
* Not easily interpretable.

In [7]:
# Train the model
model, history = train_advanced_supervised_model(X_train, y_train, IMG_SIZE, 50, model_type='CNN_Multi')

2025-03-08 17:04:26.455327: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3 Max
2025-03-08 17:04:26.455366: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 128.00 GB
2025-03-08 17:04:26.455389: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 48.00 GB
2025-03-08 17:04:26.455407: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-03-08 17:04:26.455423: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Epoch 1/50


  return self.fn(y_true, y_pred, **self._fn_kwargs)
2025-03-08 17:04:44.823792: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m816/816[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 62ms/step - accuracy: 0.2505 - loss: 0.0000e+00 - val_accuracy: 0.2470 - val_loss: 0.0000e+00 - learning_rate: 0.0010
Epoch 2/50
[1m816/816[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 61ms/step - accuracy: 0.2523 - loss: 0.0000e+00 - val_accuracy: 0.2470 - val_loss: 0.0000e+00 - learning_rate: 0.0010
Epoch 3/50
[1m816/816[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 61ms/step - accuracy: 0.2502 - loss: 0.0000e+00 - val_accuracy: 0.2470 - val_loss: 0.0000e+00 - learning_rate: 0.0010
Epoch 4/50
[1m816/816[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step - accuracy: 0.2480 - loss: 0.0000e+00
Epoch 4: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
[1m816/816[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 61ms/step - accuracy: 0.2480 - loss: 0.0000e+00 - val_accuracy: 0.2470 - val_loss: 0.0000e+00 - learning_rate: 0.0010
Epoch 5/50
[1m816/816[0m [32m━━━━━

In [8]:
train_loss, train_acc = history.history['loss'][-1], history.history['accuracy'][-1]
print(f"Train Accuracy: {train_acc:.4f}, Train Loss: {train_loss:.4f}")

test_loss, test_acc = evaluate_model("Binary classification [Normal, Others] for images without masks", model, X_test, y_test, model_type="CNN")
print(f"Test Accuracy: {test_acc:.4f}, Test Loss: {test_loss:.4f}")


Train Accuracy: 0.2508, Train Loss: 0.0000
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 12ms/step - accuracy: 0.2564 - loss: 0.0000e+00
[1m  8/255[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m4s[0m 16ms/step 



[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step


Successfully registered model 'tensorflow-CNN-2025-03-08'.
2025/03/08 17:10:15 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: tensorflow-CNN-2025-03-08, version 1


🏃 View run CNN-2025-03-08 17:10:02.333245 at: http://localhost:8080/#/experiments/877999877627272198/runs/52ad6bee3bcd4478828fc71b066841c1
🧪 View experiment at: http://localhost:8080/#/experiments/877999877627272198
Test Accuracy: 0.2499, Test Loss: 0.0000


Created version '1' of model 'tensorflow-CNN-2025-03-08'.


### Transfer learning

✅ Strengths
* Transfer learning reduces training time while maintaining high accuracy.
* Fine-tuning improves performance when sufficient data is available.
* Combining deep features with statistical features can enhance results.

In [9]:
model, history = train_advanced_supervised_model(X_train, y_train, IMG_SIZE, 50, model_type="Transfer Learning Multi")

Epoch 1/50


  return self.fn(y_true, y_pred, **self._fn_kwargs)


[1m816/816[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m94s[0m 111ms/step - accuracy: 0.2503 - loss: 0.0000e+00 - val_accuracy: 0.2470 - val_loss: 0.0000e+00 - learning_rate: 1.0000e-04
Epoch 2/50
[1m816/816[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 97ms/step - accuracy: 0.2466 - loss: 0.0000e+00 - val_accuracy: 0.2470 - val_loss: 0.0000e+00 - learning_rate: 1.0000e-04
Epoch 3/50
[1m816/816[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 98ms/step - accuracy: 0.2456 - loss: 0.0000e+00 - val_accuracy: 0.2470 - val_loss: 0.0000e+00 - learning_rate: 1.0000e-04
Epoch 4/50
[1m816/816[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step - accuracy: 0.2489 - loss: 0.0000e+00
Epoch 4: ReduceLROnPlateau reducing learning rate to 4.999999873689376e-05.
[1m816/816[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 103ms/step - accuracy: 0.2489 - loss: 0.0000e+00 - val_accuracy: 0.2470 - val_loss: 0.0000e+00 - learning_rate: 1.0000e-04
Epoch 5/50
[1m816/

In [10]:
train_loss, train_acc = history.history['loss'][-1], history.history['accuracy'][-1]
print(f"Train Accuracy: {train_acc:.4f}, Train Loss: {train_loss:.4f}")

test_loss, test_acc = evaluate_model("Binary classification [Normal, Others] for images without masks",model, X_test, y_test, model_type="Transfer Learning")
print(f"Test Accuracy: {test_acc:.4f}, Test Loss: {test_loss:.4f}")

Train Accuracy: 0.2508, Train Loss: 0.0000
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 91ms/step - accuracy: 0.2564 - loss: 0.0000e+00
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 90ms/step


Successfully registered model 'tensorflow-Transfer Learning-2025-03-08'.
2025/03/08 17:20:15 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: tensorflow-Transfer Learning-2025-03-08, version 1


🏃 View run Transfer Learning-2025-03-08 17:19:43.408257 at: http://localhost:8080/#/experiments/877999877627272198/runs/2718a22fb9af47e0a7af333ddd277236
🧪 View experiment at: http://localhost:8080/#/experiments/877999877627272198
Test Accuracy: 0.2499, Test Loss: 0.0000


Created version '1' of model 'tensorflow-Transfer Learning-2025-03-08'.
