In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = ""
print("CUDA_VISIBLE_DEVICES =", os.environ["CUDA_VISIBLE_DEVICES"])

!pip install python-docx seaborn pillow scikit-learn joblib --quiet

from google.colab import drive
drive.mount('/content/drive')

import random
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
from PIL import Image
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
from joblib import dump
from docx import Document
from docx.shared import Inches

device = torch.device('cpu')
print("Running on device:", device)

random.seed(42)
np.random.seed(42)

DATA_ROOT  = 'path'
OUTPUT_DIR = 'path'
os.makedirs(OUTPUT_DIR, exist_ok=True)

BATCH_SIZE = 32

BEST_ACC_TXT   = os.path.join(OUTPUT_DIR, 'cnn_svm_best_val_acc.txt')
CLF_TXT        = os.path.join(OUTPUT_DIR, 'cnn_svm_classification_report.txt')
CM_PNG         = os.path.join(OUTPUT_DIR, 'cnn_svm_confusion_matrix.png')
REPORT_DOCX    = os.path.join(OUTPUT_DIR, 'cnn_svm_report.docx')

class Transform:
    def __init__(self):
        self.mean = np.array([0.485]*3, dtype=np.float32)
        self.std  = np.array([0.229]*3, dtype=np.float32)
    def __call__(self, img: Image.Image):
        img = img.resize((224,224))
        arr = np.array(img.convert('RGB'), dtype=np.float32)/255.0
        arr = (arr - self.mean)/self.std
        return torch.from_numpy(arr).permute(2,0,1)

transform = Transform()

class AnnotDataset(Dataset):
    def __init__(self, root, transform):
        self.samples = []
        self.classes = sorted(d for d in os.listdir(root) if os.path.isdir(os.path.join(root,d)))
        self.cl2i    = {c:i for i,c in enumerate(self.classes)}
        for c in self.classes:
            for fn in os.listdir(os.path.join(root,c)):
                if fn.lower().endswith(('.jpg','.jpeg','.png')):
                    self.samples.append((os.path.join(root,c,fn), self.cl2i[c]))
        self.transform = transform
    def __len__(self): return len(self.samples)
    def __getitem__(self, idx):
        p,l = self.samples[idx]
        img = Image.open(p)
        return self.transform(img), l

full_ds = AnnotDataset(DATA_ROOT, transform)
NUM_CLASSES = len(full_ds.classes)
n = len(full_ds)
n_train = int(0.8*n)
n_val   = n - n_train
train_ds, val_ds = random_split(full_ds, [n_train, n_val])
train_ld = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
val_ld   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False)

class SimpleCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.f = nn.Sequential(
            nn.Conv2d(3,64,3,padding=1), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64,128,3,padding=1), nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(128,256,3,padding=1), nn.BatchNorm2d(256), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(256,512,3,padding=1), nn.BatchNorm2d(512), nn.ReLU(),
            nn.AdaptiveAvgPool2d((1,1))
        )
    def forward(self,x):
        x = self.f(x)
        return x.view(x.size(0), -1)

cnn = SimpleCNN().cpu()
cnn.eval()

def extract_features(dataloader, model):
    feats, labels = [], []
    with torch.no_grad():
        for x, y in dataloader:
            f = model(x).numpy()
            feats.append(f)
            labels.append(y.numpy())
    return np.vstack(feats), np.concatenate(labels)

print("Extracting CNN features...")
train_feats, train_lbls = extract_features(train_ld, cnn)
val_feats, val_lbls = extract_features(val_ld, cnn)
print("Train features:", train_feats.shape, "Val features:", val_feats.shape)

print("Training SVM classifier...")
svm = SVC(kernel='rbf', C=10, gamma='scale', probability=True)
svm.fit(train_feats, train_lbls)

svm_acc = svm.score(val_feats, val_lbls)
print(f"Validation Accuracy: {svm_acc:.4f}")

dump(svm, os.path.join(OUTPUT_DIR, 'cnn_svm_model.joblib'))

with open(BEST_ACC_TXT,'w') as f: f.write(f"{svm_acc:.4f}\n")

print("Evaluating model...")
preds = svm.predict(val_feats)
cm = confusion_matrix(val_lbls, preds)
plt.figure(figsize=(6,5))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=full_ds.classes, yticklabels=full_ds.classes)
plt.title("CNN+SVM Confusion Matrix")
plt.savefig(CM_PNG); plt.close()

rep = classification_report(val_lbls, preds, target_names=full_ds.classes)
with open(CLF_TXT,'w') as f: f.write(rep)

doc = Document()
doc.add_heading('CNN+SVM Pneumonia Detection Report', level=1)
doc.add_paragraph(f"Validation Accuracy: {svm_acc:.4f}")
doc.add_heading('Confusion Matrix', level=2)
doc.add_picture(CM_PNG, width=Inches(5))
doc.add_heading('Classification Report', level=2)
doc.add_paragraph(rep)
doc.save(REPORT_DOCX)

print("✅ Done! All CNN+SVM outputs saved in:", OUTPUT_DIR)
