## Supervised models
This notebook is intended for giving an introduction the ML supervised models that can be used for Covid detection.

For this notebook to find the new modules created for this project, we need to set its path to be in the root directory.

In [1]:
import sys
sys.path.append("../")

## Loading packages and dependencies

In [2]:
import numpy as np

from src.features.extract_features import load_extracted_features
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from src.models.build_model import train_advanced_supervised_model, evaluate_model


# Path to the raw data and preprocessed data
raw_data_dir = '../data/raw/COVID-19_Radiography_Dataset/'
IMG_SIZE = 299  # Resize images to IMG_SIZExIMG_SIZE pixels

## Extracting features from images

In [3]:
F_healthy, y_healthy, X_healthy = load_extracted_features(raw_data_dir+'{}/images','NORMAL', 0, IMG_SIZE, image_resized=True)
F_sick, y_sick, X_sick = load_extracted_features(raw_data_dir+'{}/images',['COVID','Viral Pneumonia','Lung_Opacity'], 1, IMG_SIZE, image_resized=True)

Loaded images for NORMAL: 10192 resized images, 10192 features, and 10192 labels.
Loaded images for ['COVID', 'Viral Pneumonia', 'Lung_Opacity']: 10973 resized images, 10973 features, and 10973 labels.


## Normalizing features

In [4]:
# Combine datasets
# Keep 7 features only
# F_healthy = [f[:7] for f in F_healthy]
# F_sick = [f[:7] for f in F_sick]

F = np.vstack((F_healthy, F_sick)) #image features
X = np.vstack((X_healthy, X_sick)) #image data
y = np.concatenate((y_healthy, y_sick)) #labels

X_img_train, X_img_test, X_feat_train, X_feat_test, y_train, y_test = train_test_split(
    X, F, y, test_size=0.2, random_state=42, stratify=y
)
print(f"X_train shape: {X_feat_train.shape}, y_train shape: {y_train.shape}")
print(f"X_test shape: {X_feat_test.shape}, y_test shape: {y_test.shape}")
print(f"X_img_train shape: {X_img_train.shape}")

# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_feat_train)
X_test = scaler.transform(X_feat_test)
X_feat_train = scaler.fit_transform(X_feat_train)
X_feat_test = scaler.transform(X_feat_test)

X_train shape: (16932, 14), y_train shape: (16932,)
X_test shape: (4233, 14), y_test shape: (4233,)
X_img_train shape: (16932, 299, 299)


In [5]:
# Reshape image data for CNN
X_img_train = X_img_train.reshape(-1, IMG_SIZE, IMG_SIZE, 1)
X_img_test = X_img_test.reshape(-1, IMG_SIZE, IMG_SIZE, 1)

print(f"X_img_train shape: {X_img_train.shape}")
print(f"X_img_test shape: {X_img_test.shape}")

X_img_train shape: (16932, 299, 299, 1)
X_img_test shape: (4233, 299, 299, 1)


## Training and evaluating models

### Convolutional Neural Networks (CNN)

✅ Strengths:
* Highly accurate for image tasks.
* Learns complex patterns automatically.
* Works well with large image datasets.

❌ Weaknesses:
* Computationally expensive (needs GPUs).
* Requires large labeled datasets.
* Not easily interpretable.

In [6]:
model, history = train_advanced_supervised_model([X_img_train, X_feat_train], y_train, [X_img_test, X_feat_test], y_test, 
                                [IMG_SIZE,X_feat_train.shape[1]], 10)

2025-02-02 16:07:19.867123: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3 Max
2025-02-02 16:07:19.867142: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 128.00 GB
2025-02-02 16:07:19.867147: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 48.00 GB
2025-02-02 16:07:19.867162: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-02-02 16:07:19.867171: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Epoch 1/10


2025-02-02 16:07:22.812550: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m1059/1059[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 37ms/step - accuracy: 0.6550 - loss: 0.8559 - val_accuracy: 0.8068 - val_loss: 0.4324
Epoch 2/10
[1m1059/1059[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 35ms/step - accuracy: 0.7692 - loss: 0.5308 - val_accuracy: 0.7938 - val_loss: 0.4779
Epoch 3/10
[1m1059/1059[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 35ms/step - accuracy: 0.7973 - loss: 0.4804 - val_accuracy: 0.8193 - val_loss: 0.5636
Epoch 4/10
[1m1059/1059[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 35ms/step - accuracy: 0.7732 - loss: 0.7365 - val_accuracy: 0.8162 - val_loss: 0.9039
Epoch 5/10
[1m1059/1059[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 35ms/step - accuracy: 0.7656 - loss: 1.4572 - val_accuracy: 0.7382 - val_loss: 3.8682
Epoch 6/10
[1m1059/1059[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 36ms/step - accuracy: 0.7563 - loss: 5.9334 - val_accuracy: 0.8155 - val_loss: 5.0635
Epoch 7/10
[1m

In [7]:
train_loss, train_acc = history.history['loss'][-1], history.history['accuracy'][-1]
print(f"Train Accuracy: {train_acc:.4f}, Train Loss: {train_loss:.4f}")

test_loss, test_acc = evaluate_model(model, [X_img_test, X_feat_test], y_test, model_type="CNN")
print(f"Test Accuracy: {test_acc:.4f}, Test Loss: {test_loss:.4f}")


Train Accuracy: 0.7092, Train Loss: 1768.6885
[1m133/133[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 16ms/step - accuracy: 0.7690 - loss: 1469.8152
Test Accuracy: 0.7690, Test Loss: 1461.3030


### Capsule Network

✅ Strengths:

* CapsNet learns spatial relationships better than CNNs.
* More robust to rotation & deformation in medical images.
* Less training data required but computationally expensive.

#### RBF kernel

In [8]:
model, history = train_advanced_supervised_model([X_img_train, X_feat_train], y_train, [X_img_test, X_feat_test], y_test, 
                                [IMG_SIZE,X_feat_train.shape[1]], 10, model_type="Capsule Network")

Epoch 1/10
[1m1059/1059[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 66ms/step - accuracy: 0.6480 - loss: 0.7623 - val_accuracy: 0.7723 - val_loss: 0.7764
Epoch 2/10
[1m1059/1059[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 70ms/step - accuracy: 0.6812 - loss: 2.0711 - val_accuracy: 0.7756 - val_loss: 1.8550
Epoch 3/10
[1m1059/1059[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 67ms/step - accuracy: 0.6654 - loss: 7.8044 - val_accuracy: 0.7864 - val_loss: 8.0277
Epoch 4/10
[1m1059/1059[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 64ms/step - accuracy: 0.6887 - loss: 30.0880 - val_accuracy: 0.7999 - val_loss: 29.1838
Epoch 5/10
[1m1059/1059[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 62ms/step - accuracy: 0.6945 - loss: 99.5408 - val_accuracy: 0.7912 - val_loss: 74.6851
Epoch 6/10
[1m1059/1059[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 61ms/step - accuracy: 0.7047 - loss: 170.3077 - val_accuracy: 0.7893 - val_loss: 109.8

In [9]:
train_loss, train_acc = history.history['loss'][-1], history.history['accuracy'][-1]
print(f"Train Accuracy: {train_acc:.4f}, Train Loss: {train_loss:.4f}")

test_loss, test_acc = evaluate_model(model, [X_img_test, X_feat_test], y_test, model_type="Capsule Network")
print(f"Test Accuracy: {test_acc:.4f}, Test Loss: {test_loss:.4f}")

Train Accuracy: 0.6676, Train Loss: 6.4810
[1m133/133[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 23ms/step - accuracy: 0.6101 - loss: 4.7443
Test Accuracy: 0.6119, Test Loss: 4.6779


### Transfer learning

✅ Strengths
* Transfer learning reduces training time while maintaining high accuracy.
* Fine-tuning improves performance when sufficient data is available.
* Combining deep features with statistical features can enhance results.

In [10]:
model, history = train_advanced_supervised_model([X_img_train, X_feat_train], y_train, [X_img_test, X_feat_test], y_test, 
                                [IMG_SIZE,X_feat_train.shape[1]], 10, model_type="Transfer Learning")

Epoch 1/10
[1m1059/1059[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 62ms/step - accuracy: 0.5887 - loss: 0.6971 - val_accuracy: 0.6636 - val_loss: 0.6209
Epoch 2/10
[1m1059/1059[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 58ms/step - accuracy: 0.6366 - loss: 0.6637 - val_accuracy: 0.6995 - val_loss: 0.6159
Epoch 3/10
[1m1059/1059[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 58ms/step - accuracy: 0.6551 - loss: 0.6503 - val_accuracy: 0.6924 - val_loss: 0.6108
Epoch 4/10
[1m1059/1059[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 58ms/step - accuracy: 0.6506 - loss: 0.6508 - val_accuracy: 0.6979 - val_loss: 0.6087
Epoch 5/10
[1m1059/1059[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 58ms/step - accuracy: 0.6601 - loss: 0.6497 - val_accuracy: 0.7059 - val_loss: 0.6088
Epoch 6/10
[1m1059/1059[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 59ms/step - accuracy: 0.6582 - loss: 0.6511 - val_accuracy: 0.7009 - val_loss: 0.6045
Epoc

In [11]:
train_loss, train_acc = history.history['loss'][-1], history.history['accuracy'][-1]
print(f"Train Accuracy: {train_acc:.4f}, Train Loss: {train_loss:.4f}")

test_loss, test_acc = evaluate_model(model, [X_img_test, X_feat_test], y_test, model_type="Transfer Learning")
print(f"Test Accuracy: {test_acc:.4f}, Test Loss: {test_loss:.4f}")

Train Accuracy: 0.6382, Train Loss: 0.6945
[1m133/133[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 83ms/step - accuracy: 0.7024 - loss: 0.6162
Test Accuracy: 0.7014, Test Loss: 0.6129
