In [3]:
import os
import pandas as pd
import torch
import pydicom
import numpy as np
import albumentations as A
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import timm
import torch.nn as nn
import torch.optim as optim
from PIL import Image

# -------------------------
# Path setup
# -------------------------
BASE_DIR = "./dataset"
IMAGE_DIR = os.path.join(BASE_DIR, "images/images")
ANNOTATION_DIR = os.path.join(BASE_DIR, "annotations/annotations/tcia-lidc-xml")
CSV_FILE = os.path.join(BASE_DIR, "lidc_metadata.csv")

# -------------------------
# Load metadata
# -------------------------
metadata = pd.read_csv(CSV_FILE)
metadata['findings'] = metadata['findings'].fillna('')  # Replace NaN with empty string
metadata['label'] = metadata['findings'].apply(lambda x: 1 if 'Nodules' in str(x) else 0)
label_dict = dict(zip(metadata['image_id'], metadata['label']))

# -------------------------
# Get all DICOM images
# -------------------------
image_paths = [os.path.join(IMAGE_DIR, fname) for fname in os.listdir(IMAGE_DIR) if fname.endswith(".dcm")]

# -------------------------
# Custom Dataset Class
# -------------------------
class LIDCDataset(Dataset):
    def __init__(self, image_paths, label_dict, transform=None):
        self.image_paths = image_paths
        self.label_dict = label_dict
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, index):
        img_path = self.image_paths[index]
        img_id = os.path.basename(img_path).replace(".dcm", "")

        # Load DICOM Image
        dicom_image = pydicom.dcmread(img_path)
        image = dicom_image.pixel_array  # Extract pixel data

        # Convert grayscale to RGB
        if len(image.shape) == 2:
            image = np.stack([image] * 3, axis=-1)

        # Normalize to 0-255
        image = (image - np.min(image)) / (np.max(image) - np.min(image)) * 255.0
        image = image.astype(np.uint8)

        # Get label (default: 0 if not found)
        label = self.label_dict.get(img_id, 0)

        # Apply transformations
        if self.transform:
            image = self.transform(image=image)["image"]

        # Convert to PyTorch tensor
        image = torch.tensor(image).permute(2, 0, 1)  # (H, W, C) → (C, H, W)
        label = torch.tensor(label, dtype=torch.float32)

        return image, label


# -------------------------
# Define transformations
# -------------------------
transform = A.Compose([
    A.Resize(224, 224),
    A.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
    A.HorizontalFlip(p=0.5),
])

# -------------------------
# Create dataset and dataloader
# -------------------------
dataset = LIDCDataset(image_paths, label_dict, transform=transform)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

# -------------------------
# Hybrid Model: EfficientNet + ViT + BiLSTM
# -------------------------
class HybridModel(nn.Module):
    def __init__(self):
        super(HybridModel, self).__init__()
        
        # Feature extractor - EfficientNet
        self.efficientnet = timm.create_model("efficientnet_b0", pretrained=True, num_classes=0)  
        self.efficientnet_out = self.efficientnet.num_features  

        # Transformer - Vision Transformer (ViT)
        self.vit = timm.create_model("vit_base_patch16_224", pretrained=True, num_classes=0)
        self.vit_out = self.vit.num_features  

        # BiLSTM
        self.bilstm = nn.LSTM(input_size=self.efficientnet_out + self.vit_out, hidden_size=256, num_layers=2, batch_first=True, bidirectional=True)
        
        # Fully connected classifier
        self.fc = nn.Sequential(
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 1)
        )

    def forward(self, x):
        # EfficientNet Feature Extraction
        eff_features = self.efficientnet(x)  
        
        # Vision Transformer Feature Extraction
        vit_features = self.vit(x)
        
        # Concatenate both feature vectors
        combined_features = torch.cat((eff_features, vit_features), dim=1).unsqueeze(1)  

        # Pass through BiLSTM
        lstm_out, _ = self.bilstm(combined_features)
        lstm_out = lstm_out[:, -1, :]  

        # Final Classification
        output = self.fc(lstm_out)
        return output

# -------------------------
# Training Setup
# -------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = HybridModel().to(device)

# Loss function
criterion = nn.BCEWithLogitsLoss()

# Optimizer
optimizer = optim.AdamW(model.parameters(), lr=1e-4)

# -------------------------
# Training Loop
# -------------------------
num_epochs = 5
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images).squeeze()
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # Calculate Accuracy
        preds = (torch.sigmoid(outputs) > 0.5).float()
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    epoch_acc = correct / total * 100
    print(f"Epoch {epoch+1}, Loss: {running_loss / len(dataloader):.4f}, Accuracy: {epoch_acc:.2f}%")

# -------------------------
# Save Model
# -------------------------
torch.save(model.state_dict(), "hybrid_lidc_model.pth")
print("Model saved successfully!")


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Epoch 1, Loss: 0.4058, Accuracy: 93.30%
Epoch 2, Loss: 0.2013, Accuracy: 95.03%
Epoch 3, Loss: 0.1858, Accuracy: 95.03%
Epoch 4, Loss: 0.1221, Accuracy: 95.03%
Epoch 5, Loss: 0.0564, Accuracy: 99.14%
Model saved successfully!


In [None]:
import os
import pandas as pd
import torch
import pydicom
import numpy as np
import albumentations as A
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import timm
import torch.nn as nn
import torch.optim as optim
from PIL import Image

# -------------------------
# Path setup
# -------------------------
BASE_DIR = "./dataset"
IMAGE_DIR = os.path.join(BASE_DIR, "images/images")
ANNOTATION_DIR = os.path.join(BASE_DIR, "annotations/annotations/tcia-lidc-xml")
CSV_FILE = os.path.join(BASE_DIR, "lidc_metadata.csv")

# -------------------------
# Load metadata
# -------------------------
metadata = pd.read_csv(CSV_FILE)
metadata['findings'] = metadata['findings'].fillna('None')
metadata['projection'] = metadata['projection'].fillna('Frontal')  # Default to Frontal
metadata['label'] = metadata['findings'].apply(lambda x: 1 if 'Nodules' in str(x) else 0)
label_dict = dict(zip(metadata['image_id'], metadata[['label', 'projection', 'findings']].values))

# -------------------------
# Get all DICOM images
# -------------------------
image_paths = [os.path.join(IMAGE_DIR, fname) for fname in os.listdir(IMAGE_DIR) if fname.endswith(".dcm")]

# -------------------------
# Custom Dataset Class
# -------------------------
class LIDCDataset(Dataset):
    def __init__(self, image_paths, label_dict, transform=None):
        self.image_paths = image_paths
        self.label_dict = label_dict
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, index):
        img_path = self.image_paths[index]
        img_id = os.path.basename(img_path).replace(".dcm", "")

        # Load DICOM Image
        dicom_image = pydicom.dcmread(img_path)
        image = dicom_image.pixel_array  # Extract pixel data

        # Convert grayscale to RGB
        if len(image.shape) == 2:
            image = np.stack([image] * 3, axis=-1)

        # Normalize to 0-255
        image = (image - np.min(image)) / (np.max(image) - np.min(image)) * 255.0
        image = image.astype(np.uint8)

        # Get label, projection, and findings
        label_info = self.label_dict.get(img_id, [0, 'Frontal', 'None'])
        label, projection, findings = label_info
        
        # Apply transformations
        if self.transform:
            image = self.transform(image=image)["image"]

        # Convert to PyTorch tensor
        image = torch.tensor(image).permute(2, 0, 1)
        label = torch.tensor(label, dtype=torch.float32)

        return image, label, img_id, projection, findings

# -------------------------
# Define transformations
# -------------------------
transform = A.Compose([
    A.Resize(224, 224),
    A.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
    A.HorizontalFlip(p=0.5),
])

# -------------------------
# Create dataset and dataloader
# -------------------------
dataset = LIDCDataset(image_paths, label_dict, transform=transform)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

# -------------------------
# Modified Model to Output Node Locations
# -------------------------
class HybridModel(nn.Module):
    def __init__(self):
        super(HybridModel, self).__init__()
        self.efficientnet = timm.create_model("efficientnet_b0", pretrained=True, num_classes=0)
        self.efficientnet_out = self.efficientnet.num_features
        self.vit = timm.create_model("vit_base_patch16_224", pretrained=True, num_classes=0)
        self.vit_out = self.vit.num_features
        self.bilstm = nn.LSTM(input_size=self.efficientnet_out + self.vit_out, hidden_size=256, num_layers=2, batch_first=True, bidirectional=True)
        self.fc_class = nn.Linear(512, 1)  # Binary classification
        self.fc_bbox = nn.Linear(512, 4)  # Bounding box coordinates (x, y, w, h)

    def forward(self, x):
        eff_features = self.efficientnet(x)
        vit_features = self.vit(x)
        combined_features = torch.cat((eff_features, vit_features), dim=1).unsqueeze(1)
        lstm_out, _ = self.bilstm(combined_features)
        lstm_out = lstm_out[:, -1, :]
        classification = self.fc_class(lstm_out)
        bbox = self.fc_bbox(lstm_out)
        return classification, bbox

# -------------------------
# Training Loop
# -------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = HybridModel().to(device)
criterion_class = nn.BCEWithLogitsLoss()
criterion_bbox = nn.MSELoss()
optimizer = optim.AdamW(model.parameters(), lr=1e-4)
num_epochs = 5

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for images, labels, img_ids, projections, findings in dataloader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs, bboxes = model(images)
        outputs = outputs.squeeze()
        loss_class = criterion_class(outputs, labels)
        loss_bbox = criterion_bbox(bboxes, torch.zeros_like(bboxes))  # Placeholder
        loss = loss_class + loss_bbox
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        preds = (torch.sigmoid(outputs) > 0.5).float()
        correct += (preds == labels).sum().item()
        total += labels.size(0)
    
    print(f"Epoch {epoch+1}, Loss: {running_loss / len(dataloader):.4f}, Accuracy: {correct / total * 100:.2f}%")

# -------------------------
# Save Model
# -------------------------
torch.save(model.state_dict(), "hybrid_lidc_model.pth")
print("Model saved successfully!")

  from .autonotebook import tqdm as notebook_tqdm
  check_for_updates()


Epoch 1, Loss: 0.3180, Accuracy: 91.58%
Epoch 2, Loss: 0.1974, Accuracy: 95.03%
Epoch 3, Loss: 0.1849, Accuracy: 95.03%
Epoch 4, Loss: 0.0989, Accuracy: 95.03%
Epoch 5, Loss: 0.0630, Accuracy: 98.06%
Model saved successfully!
