In [None]:
pip install torch torchvision


Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [None]:
pip install tensorflow



In [59]:
import torch
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import pandas as pd
import torch.optim as optim
import torch.nn as nn
from torch.optim.lr_scheduler import StepLR
import numpy as np

# Custom dataset class
class HemorrhageDataset(Dataset):
    def __init__(self, dataframe):
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        image = Image.open(row["png_path"]).convert("RGB")
        image = self.transform(image)
        labels = torch.tensor(row.iloc[1:-1].values, dtype=torch.float32)
        return image, labels

In [60]:
# Define the file paths
csv_files = ["train.csv", "val.csv", "test.csv"]  # Add or remove CSV files as needed
old_path = "C:\\Users\\Myers\\Desktop\\rsna-ihd-dataset_extracted\\rsna-intracranial-hemorrhage-detection\\preprocessed_images\\"
new_path = "/content/drive/MyDrive/preprocessed_images5K/"

for csv_file in csv_files:
    # Load the CSV
    df = pd.read_csv(csv_file)

    # Replace the file paths in the 'png_path' column
    df["png_path"] = df["png_path"].str.replace(old_path, new_path, regex=False)

    # Save the updated CSV file (overwrite or create a new file)
    updated_csv_file = f"updated_{csv_file}"  # Saves as 'updated_train.csv', etc.
    df.to_csv(updated_csv_file, index=False)

    print(f"Updated file saved as: {updated_csv_file}")


Updated file saved as: updated_train.csv
Updated file saved as: updated_val.csv
Updated file saved as: updated_test.csv


In [61]:
# Load data
train_df = pd.read_csv("/content/updated_test.csv")  # Update path
val_df = pd.read_csv("/content/updated_val.csv")
test_df = pd.read_csv("/content/updated_test.csv")

In [64]:
# prompt: head of both df

print(train_df.head())
print(val_df.head())
print(test_df.head())


       dicom_id  subdural  epidural  subarachnoid  intraparenchymal  \
0  ID_01241a0ab       0.0       0.0           0.0               0.0   
1  ID_042ab92ef       0.0       0.0           0.0               0.0   
2  ID_03678fae1       0.0       0.0           0.0               0.0   
3  ID_01ed2b6c3       0.0       0.0           0.0               0.0   
4  ID_04e8cccc0       0.0       0.0           0.0               0.0   

   intraventricular  any  none  \
0               0.0  0.0     1   
1               0.0  0.0     1   
2               0.0  0.0     1   
3               0.0  0.0     1   
4               0.0  0.0     1   

                                            png_path  
0  /content/drive/MyDrive/preprocessed_images5K/I...  
1  /content/drive/MyDrive/preprocessed_images5K/I...  
2  /content/drive/MyDrive/preprocessed_images5K/I...  
3  /content/drive/MyDrive/preprocessed_images5K/I...  
4  /content/drive/MyDrive/preprocessed_images5K/I...  
       dicom_id  subdural  epidural  s

In [94]:
# Modify the dataset class to handle grayscale images
class HemorrhageTestDataset(Dataset):
    def __init__(self, dataframe):
        self.dataframe = dataframe
        self.transform = transforms.Compose([
            transforms.Grayscale(num_output_channels=1),
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.5], std=[0.5])
        ])

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        image_path = row["png_path"]

        # Load grayscale image
        image = Image.open(image_path).convert("L")
        image = self.transform(image)

        # ✅ Ensure labels are converted to float and cleaned
        labels = row.iloc[1:-1]  # Exclude dicom_id and png_path
        labels = labels.apply(pd.to_numeric, errors='coerce').fillna(0)  # Convert to numeric, replace NaNs
        labels = np.array(labels, dtype=np.float32)  # Convert to float32
        labels = torch.tensor(labels, dtype=torch.float32)  # Convert to tensor

        return image, labels

In [66]:
# Create dataset and dataloader
train_dataset = HemorrhageDataset(train_df)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Create the validation dataset & DataLoader
val_dataset = HemorrhageDataset(val_df)  # Assuming val_df contains validation data
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)  # Define val_loader

# Create the validation dataset & DataLoader
test_dataset = HemorrhageDataset(test_df)  # Assuming val_df contains validation data
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)  # Define val_loader

In [67]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [68]:
# Display sample batch
sample_images, sample_labels = next(iter(train_loader))
print("Sample Batch Shape:", sample_images.shape, sample_labels.shape)


Sample Batch Shape: torch.Size([32, 1, 224, 224]) torch.Size([32, 7])


In [69]:
# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [70]:
# Load pre-trained ResNet-151 model
model = models.resnet152(pretrained=True)



In [71]:
# Modify the first layer to accept 1-channel grayscale images
model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)

In [72]:
# Modify the last layer to match the number of labels (7 output classes)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 7)  # 7 classes for hemorrhage types


In [73]:
# Move model to GPU (if available)
model = model.to(device)

In [74]:
# Define loss function and optimizer
criterion = nn.BCEWithLogitsLoss()  # Use BCE for multi-label classification
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [75]:
scheduler = StepLR(optimizer, step_size=5, gamma=0.1)

In [76]:
def validate(model, val_loader, criterion):
    model.eval()
    val_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)

            # Compute loss
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            # Convert logits to binary predictions
            predicted = (torch.sigmoid(outputs) > 0.5).float()  # Threshold at 0.5
            correct += (predicted == labels).sum().item()
            total += labels.numel()  # Total number of elements (not just batch size)

    val_loss /= len(val_loader)
    accuracy = 100 * correct / total
    return val_loss, accuracy


In [77]:
import logging

# Configure logging (optional)
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

In [78]:
num_epochs = 10  # Adjust as needed
for epoch in range(num_epochs):
    print(f"Starting Epoch {epoch + 1}/{num_epochs}...")  # Debugging Print
    model.train()
    running_loss = 0.0

    for i, (images, labels) in enumerate(train_loader):
        print(f"Batch {i + 1}/{len(train_loader)}")  # Debugging Print
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # Log batch progress
        if (i + 1) % 10 == 0:
            log_msg = f"Epoch {epoch + 1}/{num_epochs}, Batch {i + 1}/{len(train_loader)}, Loss: {loss.item():.4f}"
            logging.info(log_msg)
            print(log_msg)  # ✅ Added print

    # Step the scheduler
    scheduler.step()

    # Compute validation loss & accuracy
    val_loss, val_accuracy = validate(model, val_loader, criterion)

    # Log epoch end and average loss
    epoch_loss = running_loss / len(train_loader)
    log_msg = f"Epoch {epoch + 1}/{num_epochs} finished, Average Loss: {epoch_loss:.4f}, Val Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.2f}%"
    logging.info(log_msg)
    print(log_msg)  # ✅ Added print


Starting Epoch 1/10...
Batch 1/24
Batch 2/24
Batch 3/24
Batch 4/24
Batch 5/24
Batch 6/24
Batch 7/24
Batch 8/24
Batch 9/24
Batch 10/24
Epoch 1/10, Batch 10/24, Loss: 0.2852
Batch 11/24
Batch 12/24
Batch 13/24
Batch 14/24
Batch 15/24
Batch 16/24
Batch 17/24
Batch 18/24
Batch 19/24
Batch 20/24
Epoch 1/10, Batch 20/24, Loss: 0.1860
Batch 21/24
Batch 22/24
Batch 23/24
Batch 24/24
Epoch 1/10 finished, Average Loss: 0.2843, Val Loss: 844.0864, Accuracy: 48.27%
Starting Epoch 2/10...
Batch 1/24
Batch 2/24
Batch 3/24
Batch 4/24
Batch 5/24
Batch 6/24
Batch 7/24
Batch 8/24
Batch 9/24
Batch 10/24
Epoch 2/10, Batch 10/24, Loss: 0.2351
Batch 11/24
Batch 12/24
Batch 13/24
Batch 14/24
Batch 15/24
Batch 16/24
Batch 17/24
Batch 18/24
Batch 19/24
Batch 20/24
Epoch 2/10, Batch 20/24, Loss: 0.3590
Batch 21/24
Batch 22/24
Batch 23/24
Batch 24/24
Epoch 2/10 finished, Average Loss: 0.2433, Val Loss: 1.3128, Accuracy: 84.25%
Starting Epoch 3/10...
Batch 1/24
Batch 2/24
Batch 3/24
Batch 4/24
Batch 5/24
Batch 6/

In [79]:
best_val_loss = float("inf")  # Initialize to a very high value

In [80]:
if val_loss < best_val_loss:
    logging.info(f"Validation loss improved from {best_val_loss:.4f} to {val_loss:.4f}. Saving model...")
    best_val_loss = val_loss
    torch.save(model.state_dict(), "best_resnet151.pth")


In [81]:
#just double checking
print(f"Total training images: {len(train_dataset)}")
print(f"Expected number of images: {len(train_df)}")  # Should match 750

sample_images, sample_labels = next(iter(train_loader))

print(f"Image Batch Shape: {sample_images.shape}")  # Should be [batch_size, channels, 224, 224]
print(f"Label Batch Shape: {sample_labels.shape}")  # Should be [batch_size, num_classes]


Total training images: 750
Expected number of images: 750
Image Batch Shape: torch.Size([32, 1, 224, 224])
Label Batch Shape: torch.Size([32, 7])


In [82]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load ResNet model
model = models.resnet152(pretrained=False)  # Load model without pretrained weights
model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)  # Adjust for grayscale input
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 7)  # Assuming 7 output labels
model = model.to(device)

# Load best trained weights
model.load_state_dict(torch.load("best_resnet151.pth"))
model.eval()  # Set to evaluation mode


  model.load_state_dict(torch.load("best_resnet151.pth"))


ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [83]:
# Define image transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # ResNet input size
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])  # Normalize grayscale images
])

In [84]:
# Custom dataset class for test data
class HemorrhageTestDataset(Dataset):
    def __init__(self, dataframe):
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        image_path = row["png_path"]

        # Load grayscale image
        image = Image.open(image_path).convert("L")  # Convert to grayscale
        image = self.transform(image)

        # Extract labels
        labels = torch.tensor(row.iloc[1:-1].values, dtype=torch.float32)  # Exclude dicom_id and png_path

        return image, labels

In [95]:
# Create test dataset and DataLoader
test_dataset = HemorrhageTestDataset(test_df)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [92]:
def evaluate(model, test_loader, criterion):
    model.eval()
    test_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)

            loss = criterion(outputs, labels)
            test_loss += loss.item()

            # Convert outputs to binary predictions
            predicted = (torch.sigmoid(outputs) > 0.5).float()
            correct += (predicted == labels).sum().item()
            total += labels.numel()

    test_loss /= len(test_loader)
    accuracy = 100 * correct / total
    return test_loss, accuracy


In [96]:
# Compute Test Results
criterion = nn.BCEWithLogitsLoss()
test_loss, test_accuracy = evaluate(model, test_loader, criterion)

print(f"✅ Test Loss: {test_loss:.4f}, Accuracy: {test_accuracy:.2f}%")

✅ Test Loss: 0.1883, Accuracy: 92.23%
