In [24]:
import os
import numpy as np
import torchvision.transforms as transforms
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold
from PIL import Image
from sklearn.metrics import classification_report as report

In [2]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
train_transformer = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomResizedCrop((224), scale=(0.5, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    normalize
])

val_transformer = transforms.Compose([
    transforms.Resize(224),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    normalize
])

In [3]:
txt_COVID='CT_COVID.txt'
txt_NonCOVID='CT_NonCOVID.txt'

data_COVID='Data/CT_COVID'
data_NonCOVID='Data/CT_NonCOVID'

In [4]:
data_COVID = r'C:\Users\PC\ML_Classification_Project\COVID-19-CT-Classification\Data\CT_COVID'
data_NonCOVID = r'C:\Users\PC\ML_Classification_Project\COVID-19-CT-Classification\Data\CT_NonCOVID'

with open(txt_COVID, 'r') as file:
    COVID_train_paths = [os.path.join(data_COVID, line) for line in file.read().splitlines()]

with open(txt_NonCOVID, 'r') as file:
    NonCOVID_train_paths = [os.path.join(data_NonCOVID, line) for line in file.read().splitlines()]


In [5]:
COVID_train_labels = np.ones(len(COVID_train_paths))
NonCOVID_train_labels = np.zeros(len(NonCOVID_train_paths))

In [6]:
all_image_paths = np.concatenate((COVID_train_paths, NonCOVID_train_paths), axis=0)
all_labels = np.concatenate((COVID_train_labels, NonCOVID_train_labels), axis=0)

In [7]:
def k_fold_cv_dataset_split(all_image_paths, all_labels, k_folds):
    random_state = 42
    kfold = KFold(n_splits=k_folds, shuffle=True, random_state=random_state)

    train_splits = []
    val_splits = []

    for train_index, val_index in kfold.split(all_image_paths):
        train_paths_fold = all_image_paths[train_index]
        val_paths_fold = all_image_paths[val_index]
        train_labels_fold = all_labels[train_index]
        val_labels_fold = all_labels[val_index]

        train_splits.append((train_paths_fold, train_labels_fold))
        val_splits.append((val_paths_fold, val_labels_fold))

    return train_splits, val_splits

In [25]:
def train_fold(train_paths, train_labels, val_paths, val_labels):
    # Load the images and labels for training set with transformations
    train_images = []
    for path in train_paths:
        image = Image.open(path).convert("RGB")
        image = train_transformer(image)  # Apply transformations
        train_images.append(image)

    # Resize images to a consistent size
    resized_train_images = [transforms.Resize((224, 224))(image) for image in train_images]

    # Convert the list of resized images to a NumPy array
    X_train = np.array([np.array(image) for image in resized_train_images])
    X_train = X_train.reshape(X_train.shape[0], -1)
    y_train = train_labels

    # Load the images and labels for validation set with transformations
    val_images = []
    for path in val_paths:
        image = Image.open(path).convert("RGB")
        image = val_transformer(image)  # Apply transformations
        val_images.append(image)

    # Resize images to a consistent size
    resized_val_images = [transforms.Resize((224, 224))(image) for image in val_images]

    # Convert the list of resized images to a NumPy array
    X_val = np.array([np.array(image) for image in resized_val_images])
    X_val = X_val.reshape(X_val.shape[0], -1)
    y_val = val_labels

    # Step 2: Perform PCA Dimensionality Reduction
    # Apply PCA to reduce the dimensionality of the image data
    pca = PCA(n_components=173)  # Choose the desired number of components
    X_train_pca = pca.fit_transform(X_train)
    X_val_pca = pca.transform(X_val)

    # Step 3: Train the SVM Classifier
    # Initialize an SVM classifier
    svm_classifier = SVC()

    # Train the SVM classifier on the reduced training data
    svm_classifier.fit(X_train_pca, y_train)

    # Step 4: Evaluate the Model
    # Use the trained SVM classifier to predict the labels of the validation data
    y_val_pred = svm_classifier.predict(X_val_pca)

    # Evaluate the accuracy and other performance metrics of the model
    accuracy = accuracy_score(y_val, y_val_pred)
    classification_report_result = report(y_val, y_val_pred)

    return accuracy, classification_report_result

In [26]:
num_folds = 5
train_splits, val_splits = k_fold_cv_dataset_split(all_image_paths, all_labels, num_folds)

fold_accuracies = []
fold_classification_reports = []

In [27]:
for fold, (train_split, val_split) in enumerate(zip(train_splits, val_splits)):
    print("Fold:", fold + 1)

    train_paths_fold, train_labels_fold = train_split
    val_paths_fold, val_labels_fold = val_split

    # Train and evaluate the model for the current fold
    accuracy, classification_report_result = train_fold(train_paths_fold, train_labels_fold, val_paths_fold, val_labels_fold)

    fold_accuracies.append(accuracy)
    fold_classification_reports.append(classification_report_result)

    print("Validation Set for Fold", fold + 1)
    print("Accuracy:", accuracy)
    print("Classification Report:\n", classification_report_result)

avg_accuracy = np.mean(fold_accuracies)
print("Average Accuracy:", avg_accuracy)

# Print the classification report for each fold
for i, classification_report_result in enumerate(fold_classification_reports):
    print("\nClassification Report for Fold", i+1)
    print(classification_report_result)

# Aggregate the classification reports across all folds
combined_classification_report = "\n".join(fold_classification_reports)
print("\nCombined Classification Report:")
print(combined_classification_report)

Fold: 1
Validation Set for Fold 1
Accuracy: 0.68
Classification Report:
               precision    recall  f1-score   support

         0.0       0.70      0.71      0.70        80
         1.0       0.66      0.64      0.65        70

    accuracy                           0.68       150
   macro avg       0.68      0.68      0.68       150
weighted avg       0.68      0.68      0.68       150

Fold: 2
Validation Set for Fold 2
Accuracy: 0.7181208053691275
Classification Report:
               precision    recall  f1-score   support

         0.0       0.67      0.87      0.76        75
         1.0       0.81      0.57      0.67        74

    accuracy                           0.72       149
   macro avg       0.74      0.72      0.71       149
weighted avg       0.74      0.72      0.71       149

Fold: 3
Validation Set for Fold 3
Accuracy: 0.6241610738255033
Classification Report:
               precision    recall  f1-score   support

         0.0       0.60      0.74      0.66 

UFuncTypeError: ufunc 'add' did not contain a loop with signature matching types (dtype('<U326'), dtype('<U326')) -> None