In [1]:
import os
import numpy as np
import pandas as pd
import pydicom as dicom
import cv2
import math
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms

In [2]:
# Constants
DATA_DIR = 'Data/'  # Update this to your data directory
LABELS_CSV = 'LungCT-Labels - Sheet1.csv'  # Update this to your labels file
IMG_SIZE_PX = 150
SLICE_COUNT = 20
BATCH_SIZE = 8
EPOCHS = 10
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [3]:
# Custom Dataset Class
class LungCTDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        slices, label = self.data[idx]
        slices = np.array(slices).astype(np.float32)

        # Normalize and convert to 3 channels
        slices = np.stack([slices] * 3, axis=0)
        if self.transform:
            slices = self.transform(slices)
        
        return slices, label

In [4]:
# Function to load and preprocess data
def load_data(data_dir, labels_df, img_size_px=IMG_SIZE_PX, hm_slices=SLICE_COUNT):
    image_full_data = []
    
    patients = os.listdir(data_dir)

    for patient in patients:
        label = labels_df.at[patient, 'Labels']
        path = os.path.join(data_dir, patient)
        slices = [dicom.read_file(os.path.join(path, s)) for s in os.listdir(path)]
        slices.sort(key=lambda x: int(x.ImagePositionPatient[2]))

        new_slices = []
        slices = [cv2.resize(np.array(each_slice.pixel_array), (img_size_px, img_size_px)) for each_slice in slices]
        
        chunk_sizes = math.ceil(len(slices) / hm_slices)
        for i in range(0, len(slices), chunk_sizes):
            slice_chunk = slices[i:i + chunk_sizes]
            if len(slice_chunk) > 0:
                new_slices.append(np.mean(slice_chunk, axis=0))

        if len(new_slices) < hm_slices:
            last_slice = new_slices[-1]
            while len(new_slices) < hm_slices:
                new_slices.append(last_slice)

        image_full_data.append([new_slices, np.array([1, 0]) if label == 1 else np.array([0, 1])])

    return image_full_data

In [5]:
# Load labels
labels_df = pd.read_csv(LABELS_CSV, index_col=0)

# Load and preprocess data
image_full_data = load_data(DATA_DIR, labels_df)

In [6]:
# Shuffle and split data into training and validation sets
np.random.shuffle(image_full_data)
train_data = image_full_data[:int(len(image_full_data) * 0.8)]
validation_data = image_full_data[int(len(image_full_data) * 0.8):]

# Data transformations
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])

In [7]:
# Create datasets
train_dataset = LungCTDataset(train_data, transform=transform)
valid_dataset = LungCTDataset(validation_data, transform=transform)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False)


In [8]:
# Define DeepLabV3 Model
class DeepLabV3Model(nn.Module):
    def __init__(self, num_classes):
        super(DeepLabV3Model, self).__init__()
        self.deeplab = models.segmentation.deeplabv3_resnet50(pretrained=True)
        self.deeplab.classifier[4] = nn.Conv2d(256, num_classes, kernel_size=(1, 1), stride=(1, 1))

    def forward(self, x):
        return self.deeplab(x)['out']


In [9]:
# Instantiate model
model_deeplab = DeepLabV3Model(num_classes=2).to(DEVICE)


Downloading: "https://download.pytorch.org/models/deeplabv3_resnet50_coco-cd0a2569.pth" to C:\Users\Nithin Kodipyaka/.cache\torch\hub\checkpoints\deeplabv3_resnet50_coco-cd0a2569.pth


  0%|          | 0.00/161M [00:00<?, ?B/s]

In [10]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_deeplab.parameters(), lr=0.001)

In [11]:

# Training loop
def train_model(model, train_loader, criterion, optimizer, epochs):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, torch.argmax(labels, dim=1))
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f'Epoch [{epoch + 1}/{epochs}], Loss: {running_loss / len(train_loader):.4f}')


In [12]:
# Evaluation loop
def evaluate_model(model, valid_loader):
    model.eval()
    all_predictions = []
    all_labels = []
    with torch.no_grad():
        for images, labels in valid_loader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            all_predictions.append(predicted.cpu().numpy())
            all_labels.append(torch.argmax(labels, dim=1).cpu().numpy())

    return np.concatenate(all_labels), np.concatenate(all_predictions)


In [13]:
# Train DeepLabV3
train_model(model_deeplab, train_loader, criterion, optimizer, EPOCHS)


ValueError: pic should be 2/3 dimensional. Got 4 dimensions.