In [1]:
import os
import cv2
import numpy as np

def load_data_from_dir(directory):
    labels_dict = {"normal": 0, "pneumonia": 1}
    data = []
    labels = []
    
    for sub_folder, label in labels_dict.items():
        sub_folder_path = os.path.join(directory, sub_folder)
        for image_file in os.listdir(sub_folder_path):
            image = cv2.imread(os.path.join(sub_folder_path, image_file), cv2.IMREAD_GRAYSCALE)
            data.append(cv2.resize(image, (128, 128)))  # resizing the image to a standard size
            labels.append(label)
    
    return np.array(data), np.array(labels)


In [4]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_curve, auc, log_loss
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report


# Load Data
train_images, train_labels = load_data_from_dir("data_to_model/train")
test_images, test_labels = load_data_from_dir("data_to_model/test")
val_images, val_labels = load_data_from_dir("data_to_model/validation")

# Flatten Images
train_images_flat = train_images.reshape(train_images.shape[0], -1)
test_images_flat = test_images.reshape(test_images.shape[0], -1)
val_images_flat = val_images.reshape(val_images.shape[0], -1)

svm_model = SVC(kernel='poly', degree=3)  # Note: probability=True is removed
svm_model.fit(train_images_flat, train_labels)

# Evaluate (Helper Function)
def evaluate_model(model, data, labels, dataset_name):
    pred = model.predict(data)
    
    acc = accuracy_score(labels, pred)
    f1 = f1_score(labels, pred)
    precision = precision_score(labels, pred)
    recall = recall_score(labels, pred)
    
    print(f"---- {dataset_name} Data ----")
    print(f"Accuracy: {acc}\nF1 Score: {f1}\nPrecision: {precision}\nRecall: {recall}\n")
    print("Confusion Matrix:")
    print(confusion_matrix(labels, pred))
    print("\nClassification Report:")
    print(classification_report(labels, pred))
    
    return pred

# Performance Metrics
train_pred = evaluate_model(svm_model, train_images_flat, train_labels, "Train")
test_pred = evaluate_model(svm_model, test_images_flat, test_labels, "Test")
val_pred = evaluate_model(svm_model, val_images_flat, val_labels, "Validation")

---- Train Data ----
Accuracy: 0.9998379779650033
F1 Score: 0.9998537370191605
Precision: 0.999707516817783
Recall: 1.0

Confusion Matrix:
[[2753    1]
 [   0 3418]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      2754
           1       1.00      1.00      1.00      3418

    accuracy                           1.00      6172
   macro avg       1.00      1.00      1.00      6172
weighted avg       1.00      1.00      1.00      6172

---- Test Data ----
Accuracy: 0.9650259067357513
F1 Score: 0.9681978798586572
Precision: 0.9762470308788599
Recall: 0.9602803738317757

Confusion Matrix:
[[334  10]
 [ 17 411]]

Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.97      0.96       344
           1       0.98      0.96      0.97       428

    accuracy                           0.97       772
   macro avg       0.96      0.97      0.96       772
weighted a

In [2]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_curve, auc, log_loss
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report


# Load Data
train_images, train_labels = load_data_from_dir("data_to_model_augmented/train")
test_images, test_labels = load_data_from_dir("data_to_model/test")
val_images, val_labels = load_data_from_dir("data_to_model/validation")

# Flatten Images
train_images_flat = train_images.reshape(train_images.shape[0], -1)
test_images_flat = test_images.reshape(test_images.shape[0], -1)
val_images_flat = val_images.reshape(val_images.shape[0], -1)

svm_model = SVC(kernel='poly', degree=3)  # Note: probability=True is removed
svm_model.fit(train_images_flat, train_labels)

# Evaluate (Helper Function)
def evaluate_model(model, data, labels, dataset_name):
    pred = model.predict(data)
    
    acc = accuracy_score(labels, pred)
    f1 = f1_score(labels, pred)
    precision = precision_score(labels, pred)
    recall = recall_score(labels, pred)
    
    print(f"---- {dataset_name} Data ----")
    print(f"Accuracy: {acc}\nF1 Score: {f1}\nPrecision: {precision}\nRecall: {recall}\n")
    print("Confusion Matrix:")
    print(confusion_matrix(labels, pred))
    print("\nClassification Report:")
    print(classification_report(labels, pred))
    
    return pred

# Performance Metrics
train_pred = evaluate_model(svm_model, train_images_flat, train_labels, "Train")
test_pred = evaluate_model(svm_model, test_images_flat, test_labels, "Test")
val_pred = evaluate_model(svm_model, val_images_flat, val_labels, "Validation")

---- Train Data ----
Accuracy: 0.9996759559300065
F1 Score: 0.9997075168177829
Precision: 0.9994152046783625
Recall: 1.0

Confusion Matrix:
[[5504    4]
 [   0 6836]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      5508
           1       1.00      1.00      1.00      6836

    accuracy                           1.00     12344
   macro avg       1.00      1.00      1.00     12344
weighted avg       1.00      1.00      1.00     12344

---- Test Data ----
Accuracy: 0.9468911917098446
F1 Score: 0.9518213866039953
Precision: 0.9574468085106383
Recall: 0.9462616822429907

Confusion Matrix:
[[326  18]
 [ 23 405]]

Classification Report:
              precision    recall  f1-score   support

           0       0.93      0.95      0.94       344
           1       0.96      0.95      0.95       428

    accuracy                           0.95       772
   macro avg       0.95      0.95      0.95       772
weighted 