In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = ""
print("CUDA_VISIBLE_DEVICES =", os.environ["CUDA_VISIBLE_DEVICES"])

!pip install python-docx seaborn pillow timm --quiet

from google.colab import drive
drive.mount('/content/drive')

import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="huggingface_hub.utils._auth")

import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from PIL import Image
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
from docx import Document
from docx.shared import Inches
import timm
import copy

device = torch.device('cpu')
print("Running on device:", device)

random.seed(42)
np.random.seed(42)
torch.manual_seed(42)

DATA_ROOT    = 'path'
OUTPUT_DIR   = 'path'
os.makedirs(OUTPUT_DIR, exist_ok=True)

BATCH_SIZE    = 32
NUM_EPOCHS    = 50
LEARNING_RATE = 1e-4

BEST_MODEL_PTH = os.path.join(OUTPUT_DIR, 'best_model.pth')
BEST_ACC_TXT   = os.path.join(OUTPUT_DIR, 'best_val_acc.txt')
CLF_TXT        = os.path.join(OUTPUT_DIR, 'classification_report.txt')
CM_PNG         = os.path.join(OUTPUT_DIR, 'confusion_matrix.png')

class Transform:
    def __init__(self):
        self.mean = np.array([0.485]*3, dtype=np.float32)
        self.std  = np.array([0.229]*3, dtype=np.float32)
    def __call__(self, img: Image.Image):
        img = img.resize((224,224))
        arr = np.array(img.convert('RGB'), dtype=np.float32) / 255.0
        arr = (arr - self.mean) / self.std
        return torch.from_numpy(arr).permute(2,0,1)

transform = Transform()

class AnnotDataset(Dataset):
    def __init__(self, root, transform):
        self.samples = []
        self.classes = sorted(d for d in os.listdir(root) if os.path.isdir(os.path.join(root, d)))
        self.cl2i    = {c:i for i,c in enumerate(self.classes)}
        for c in self.classes:
            for fn in os.listdir(os.path.join(root, c)):
                if fn.lower().endswith(('.jpg', '.jpeg', '.png')):
                    self.samples.append((os.path.join(root, c, fn), self.cl2i[c]))
        self.transform = transform

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        path, label = self.samples[idx]
        img = Image.open(path)
        return self.transform(img), label

full_ds = AnnotDataset(DATA_ROOT, transform)
NUM_CLASSES = len(full_ds.classes)
n = len(full_ds)
n_train = int(0.8 * n)
n_val   = n - n_train
train_ds, val_ds = random_split(full_ds, [n_train, n_val])
train_ld = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
val_ld   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False)

class ChannelAttention(nn.Module):
    def __init__(self, in_planes, ratio=16):
        super().__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)
        self.fc = nn.Sequential(
            nn.Conv2d(in_planes, in_planes//ratio, 1, bias=False),
            nn.ReLU(inplace=False),
            nn.Conv2d(in_planes//ratio, in_planes, 1, bias=False)
        )
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        avg = self.fc(self.avg_pool(x))
        mx  = self.fc(self.max_pool(x))
        return self.sigmoid(avg + mx)

class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=7):
        super().__init__()
        self.conv = nn.Conv2d(2,1,kernel_size,padding=kernel_size//2,bias=False)
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        avg = torch.mean(x, dim=1, keepdim=True)
        mx,_ = torch.max(x, dim=1, keepdim=True)
        feat = torch.cat([avg, mx], dim=1)
        return self.sigmoid(self.conv(feat))

class CBAM(nn.Module):
    def __init__(self, in_planes, ratio=16, kernel_size=7):
        super().__init__()
        self.ca = ChannelAttention(in_planes, ratio)
        self.sa = SpatialAttention(kernel_size)
    def forward(self, x):
        x = x * self.ca(x)
        x = x * self.sa(x)
        return x

class HybridDenseEff(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        densenet = timm.create_model('densenet121', pretrained=True)
        self.dense_feats = nn.Sequential(densenet.features, nn.ReLU(inplace=False))
        self.cbam        = CBAM(in_planes=1024)
        eff = timm.create_model('efficientnet_b0', pretrained=True)
        eff.reset_classifier(0)
        self.eff_feats = eff
        fusion_dim = 1024 + self.eff_feats.num_features
        self.classifier = nn.Sequential(
            nn.Linear(fusion_dim, 512),
            nn.ReLU(inplace=False),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )
    def forward(self, x):
        dn = self.dense_feats(x)
        dn = self.cbam(dn)
        dn = nn.functional.adaptive_avg_pool2d(dn,1).view(x.size(0), -1)
        ef = self.eff_feats(x).view(x.size(0), -1)
        fusion = torch.cat([dn, ef], dim=1)
        return self.classifier(fusion)

model = HybridDenseEff(NUM_CLASSES).to(device)
crit  = nn.CrossEntropyLoss()
opt   = optim.Adam(model.parameters(), lr=LEARNING_RATE)

best_val = 0.0
best_wts = copy.deepcopy(model.state_dict())
history = {'train_loss':[], 'train_acc':[], 'val_loss':[], 'val_acc':[]}

for epoch in range(1, NUM_EPOCHS+1):
    print(f"Starting epoch {epoch}/{NUM_EPOCHS}...")
    model.train()
    tloss = tcorrect = tcount = 0
    for x,y in train_ld:
        logits = model(x)
        loss   = crit(logits,y)
        opt.zero_grad(); loss.backward(); opt.step()
        preds = logits.argmax(1)
        tloss   += loss.item()*x.size(0)
        tcorrect+= (preds==y).sum().item()
        tcount  += y.size(0)
    tl, ta = tloss/tcount, tcorrect/tcount

    model.eval()
    vloss = vcorrect = vcount = 0
    all_y, all_p = [], []
    with torch.no_grad():
        for x,y in val_ld:
            logits = model(x)
            loss   = crit(logits,y)
            preds  = logits.argmax(1)
            vloss   += loss.item()*x.size(0)
            vcorrect+= (preds==y).sum().item()
            vcount  += y.size(0)
            all_y.extend(y.numpy()); all_p.extend(preds.numpy())
    vl, va = vloss/vcount, vcorrect/vcount

    history['train_loss'].append(tl); history['train_acc'].append(ta)
    history['val_loss'].append(vl);   history['val_acc'].append(va)
    if va > best_val:
        best_val = va
        best_wts = copy.deepcopy(model.state_dict())
        torch.save(best_wts, BEST_MODEL_PTH)

    print(f"Epoch {epoch}/{NUM_EPOCHS} → Train: {tl:.4f}/{ta:.4f}  Val: {vl:.4f}/{va:.4f}")

with open(BEST_ACC_TXT,'w') as f:
    f.write(f"{best_val:.4f}\n")

plt.figure(); plt.plot(history['train_loss'], label='Train Loss'); plt.plot(history['val_loss'], label='Val Loss'); plt.legend(); plt.savefig(os.path.join(OUTPUT_DIR,'loss_curve.png')); plt.close()
plt.figure(); plt.plot(history['train_acc'], label='Train Acc'); plt.plot(history['val_acc'], label='Val Acc'); plt.legend(); plt.savefig(os.path.join(OUTPUT_DIR,'acc_curve.png')); plt.close()

model.load_state_dict(best_wts)
cm = confusion_matrix(all_y, all_p)
plt.figure(figsize=(6,5)); sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=full_ds.classes, yticklabels=full_ds.classes); plt.savefig(CM_PNG); plt.close()

rep = classification_report(all_y, all_p, target_names=full_ds.classes)
with open(CLF_TXT,'w') as f:
    f.write(rep)

doc = Document()
doc.add_heading('Pneumonia Detection Report', level=1)
doc.add_paragraph(f"Best Validation Accuracy: {best_val:.4f}")
doc.add_heading('Confusion Matrix', level=2); doc.add_picture(CM_PNG, width=Inches(5))
doc.add_heading('Classification Report', level=2); doc.add_paragraph(rep)
doc.add_heading('Training/Validation Curves', level=2); doc.add_picture(os.path.join(OUTPUT_DIR,'loss_curve.png'), width=Inches(5)); doc.add_picture(os.path.join(OUTPUT_DIR,'acc_curve.png'), width=Inches(5))
doc.save(os.path.join(OUTPUT_DIR,'report.docx'))

print(" Done! All outputs in", OUTPUT_DIR)
