In [2]:
import os
import json
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import numpy as np


class PatientDataset(Dataset):
    def __init__(self, json_path, root_dir, id_range, transform=None):
        """
        Args:
            json_path (str): Path to the JSON file.
            root_dir (str): Root directory where images are stored.
            id_range (tuple): Range of patient IDs to include (start_id, end_id).
            transform (callable, optional): Transform to be applied on an image.
        """
        self.root_dir = root_dir
        self.start_id, self.end_id = id_range
        self.transform = transform

        # Load and filter data based on the patient ID range
        with open(json_path, 'r') as f:
            data = json.load(f)
        self.data, self.entry_count = self._prepare_data(data)

    def _prepare_data(self, data):
        """
        Prepares the dataset by ensuring every patient has a maximum of 4 images for each side.
        If an image is missing, it substitutes a null matrix.
        """
        prepared_data = []
        entry_count = 0

        for patient_id, patient_data in data.items():
            if self.start_id <= patient_id <= self.end_id:
                for side in ['Right', 'Left','Right1',
                             'Left1','Right2','Left2','Right3','Left3']:
                    if side in patient_data:
                        side_data = patient_data[side]
                        label = side_data['Label']
                        image_paths = side_data.get("Paths", {})

                        images = []
                        
                        for img_type in ["deep","Choroidal_vessel","Choroidal_capilla","surface"]:
                            if (img_type in image_paths):
                                    path = image_paths.get(img_type)
                                
                                    full_path = os.path.abspath(os.path.join(self.root_dir, path))
                            if os.path.exists(full_path):
                                    images.append(full_path)
                            else:
                                    print(f"Missing file: {full_path}, substituting with null matrix.")
                                    images.append(None)
                        
                        

                        prepared_data.append({
                            'patient_id': patient_id,
                            'side': side,
                            'images': images,
                            'label': label
                        })
                        entry_count += 1
                    

        return prepared_data, entry_count

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[idx]
        images = []
        for img_path in item['images']:
            if img_path:
                try:
                    img = Image.open(img_path).convert("L")
                    if self.transform:
                        img = self.transform(img)
                except FileNotFoundError:
                    img = torch.zeros(1, 224, 224)  # Substitute with null matrix
            else:
                img = torch.zeros(1, 224, 224)  # Null matrix for missing images
            images.append(img)

        images = torch.stack(images)  # Stack the images into a single tensor
        label_tensor = torch.tensor(item['label'], dtype=torch.float32)

        return images, label_tensor, item['patient_id'], item['side']


# Example usage
if __name__ == "__main__":
    # Define paths and ID range
    json_path = "train.json"
    root_dir = "C:\\Users\\manoj\\OneDrive\\Desktop\\intern\\train"
    id_range = ("20230402140053","20230708145810")

    # Define image transformations
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
    ])

    # Create dataset and dataloader
    dataset = PatientDataset(json_path, root_dir, id_range, transform)
    dataloader = DataLoader(dataset, batch_size=3000, shuffle=True)

    # Print total entries
    print(f"Total valid patient entries (e.g., left, right): {dataset.entry_count}")

    # Iterate through the dataloader
    for images, labels, patient_ids, sides in dataloader:
        print(f"Batch images shape: {images.shape}")  # Shape: [batch_size, 4, 1, 224, 224]
        print(f"Batch labels: {labels}")
        print(f"Patient IDs: {patient_ids}")
        print(f"Sides: {sides}")
        break


Missing file: C:\Users\manoj\OneDrive\Desktop\intern\train\patients\20230518111635\Left\deep.png, substituting with null matrix.
Missing file: C:\Users\manoj\OneDrive\Desktop\intern\train\patients\20230518111635\Left\Choroidal_vessel.png, substituting with null matrix.
Missing file: C:\Users\manoj\OneDrive\Desktop\intern\train\patients\20230518111635\Left\Choroidal_capilla.png, substituting with null matrix.
Missing file: C:\Users\manoj\OneDrive\Desktop\intern\train\patients\20230518111635\Left\surface.png, substituting with null matrix.
Missing file: C:\Users\manoj\OneDrive\Desktop\intern\train\patients\20230516142136\Right\deep.png, substituting with null matrix.
Missing file: C:\Users\manoj\OneDrive\Desktop\intern\train\patients\20230516142136\Right\Choroidal_vessel.png, substituting with null matrix.
Missing file: C:\Users\manoj\OneDrive\Desktop\intern\train\patients\20230516142136\Right\Choroidal_capilla.png, substituting with null matrix.
Missing file: C:\Users\manoj\OneDrive\Des

In [3]:
from torch.utils.data import random_split, DataLoader
import torch
train_size = int(0.8 * len(dataset))  # 80% for training
val_size = len(dataset) - train_size   # 20% for validation

# Set random seed for reproducibility
torch.manual_seed(42)

# Split the dataset
train_dataset, val_dataset = random_split(
    dataset, 
    [train_size, val_size]
)

# Create DataLoaders
train_loader = DataLoader(
    train_dataset,
    batch_size=32,
    shuffle=True,
    
    pin_memory=True  # Useful if using CUDA
)

val_loader = DataLoader(
    val_dataset,
    batch_size=32,
    shuffle=False,
    
    pin_memory=True
)



In [46]:
images.squeeze(2).size()

torch.Size([30, 4, 224, 224])

In [67]:
print(abs(outputs[].round().bool()))


tensor([ True,  True, False, False,  True], device='cuda:0')


In [None]:
# Check for CUDA availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Modify ResNet to accept 4 input channels
resnet50 = models.resnet50(pretrained=True)
new_conv1 = torch.nn.Conv2d(4, 64, kernel_size=7, stride=2, padding=3, bias=False)

# Initialize the weights using the pretrained weights
with torch.no_grad():
    new_conv1.weight[:, :3] = resnet50.conv1.weight
    new_conv1.weight[:, 3] = resnet50.conv1.weight.mean(dim=1)
num_classes=5
resnet50.conv1 = new_conv1
resnet50.fc = torch.nn.Linear(resnet50.fc.in_features, num_classes)
resnet50 = resnet50.to(device)
print(resnet50.fc)

# Define optimizer and criterion
optimizer = torch.optim.Adam(resnet50.parameters(), lr=0.001)
criterion = torch.nn.CrossEntropyLoss()

# Training loop with accuracy calculation
for epoch in range(20):
    total_loss = 0
    correct = 0
    total = 0
    
    # Set model to training mode
    resnet50.train()
    
    # Training loop
    for images, labels, _, _ in dataloader:
        inputs = images.squeeze(2).to(device)
        labels = labels.to(device)  # Ensure labels are long type
        
        # Forward pass
        outputs = resnet50(inputs)
        loss = criterion(outputs, labels)
        
        # Backward pass and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # Calculate accuracy
        for i in range(len(labels)):
            if torch.equal(outputs[i].round(), labels[i]):         
                correct += 1
        total += labels.size(0)     
        total_loss += loss.item()
    
    # Calculate epoch statistics
    avg_loss = total_loss / len(dataloader)
    accuracy = 100 * correct / total
    
    print(f"Epoch {epoch+1}")
    print(f"Average Loss: {avg_loss:.4f}")
    print(f"Training Accuracy: {accuracy:.2f}%")
    print("-" * 50)

    # Optional: Validation loop
    resnet50.eval()
    

In [6]:
import torch
from torchvision import models
import torch.nn as nn

# Check for CUDA availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Modify ResNet to accept 4 input channels
resnet50 = models.resnet50(pretrained=True)
new_conv1 = torch.nn.Conv2d(4, 64, kernel_size=7, stride=2, padding=3, bias=False)
num_classes=5
# Initialize the weights using the pretrained weights
with torch.no_grad():
    new_conv1.weight[:, :3] = resnet50.conv1.weight
    new_conv1.weight[:, 3] = resnet50.conv1.weight.mean(dim=1)

# Remove the original FC layer
resnet50.conv1 = new_conv1
features_dim = resnet50.fc.in_features
resnet50.fc = torch.nn.Identity()  # Replace with Identity to get features

# Create a new classifier network
class Classifier(torch.nn.Module):
    def __init__(self, input_dim, hidden_dims, output_dim, dropout_rate=0.3):
        super().__init__()
        self.layers = torch.nn.Sequential(
            torch.nn.Linear(input_dim, hidden_dims[0]),
            torch.nn.ReLU(),
            torch.nn.BatchNorm1d(hidden_dims[0]),
            torch.nn.Dropout(dropout_rate),
            
            torch.nn.Linear(hidden_dims[0], hidden_dims[1]),
            torch.nn.ReLU(),
            torch.nn.BatchNorm1d(hidden_dims[1]),
            torch.nn.Dropout(dropout_rate),
            
            torch.nn.Linear(hidden_dims[1], hidden_dims[2]),
            torch.nn.ReLU(),
            torch.nn.BatchNorm1d(hidden_dims[2]),
            torch.nn.Dropout(dropout_rate),
            
            torch.nn.Linear(hidden_dims[2], output_dim),
            torch.nn.Sigmoid()  # Use Sigmoid for binary classification
        )
    
    def forward(self, x):
        return self.layers(x)

# Create the classifier
classifier = Classifier(
    input_dim=features_dim,  # ResNet50 feature dimension (2048)
    hidden_dims=[1024, 512, 256],  # 3 hidden layers
    output_dim=num_classes,  # Your number of classes
).to(device)

# Combine ResNet and Classifier
class CombinedModel(torch.nn.Module):
    def __init__(self, feature_extractor, classifier):
        super().__init__()
        self.feature_extractor = feature_extractor
        self.classifier = classifier
        
    def forward(self, x):
        features = self.feature_extractor(x)
        return self.classifier(features)

# Create combined model
model = CombinedModel(resnet50, classifier).to(device)

# Define optimizer and criterion
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = torch.nn.BCELoss()  # Binary Cross Entropy Loss



Using device: cuda




In [7]:
# Training loop with accuracy calculation
for epoch in range(20):
    total_loss = 0
    correct = 0
    total = 0
    
    # Set model to training mode
    model.train()
    
    # Training loop
    for images, labels, _, _ in train_loader:
        inputs = images.squeeze(2).to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        # Backward pass and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # Calculate accuracy
        predictions = (outputs > 0.5).float()  # Convert probabilities to binary predictions
        for i in range(len(labels)):
            if torch.equal(outputs[i].round(), labels[i]):         
                correct += 1
        total += labels.size(0)     
        total_loss += loss.item()
    
    
    # Calculate epoch statistics
    avg_loss = total_loss / len(train_loader)
    accuracy = 100 * correct / total
    print(f"Epoch {epoch+1}")
    print(f"Average Loss: {avg_loss:.4f}")
    print(f"Training Accuracy: {accuracy:.2f}%")
    print("-" * 50)
      
    model.eval()  
    
    for images, labels, _, _ in val_loader:
        inputs = images.squeeze(2).to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        
        
        # Calculate accuracy
        predictions = (outputs > 0.5).float()  # Convert probabilities to binary predictions
        for i in range(len(labels)):
            if torch.equal(outputs[i].round(), labels[i]):         
                correct += 1
        total += labels.size(0)     
        total_loss += loss.item()
    # Calculate epoch statistics
    avg_loss = total_loss / len(val_loader)
    accuracy = 100 * correct / total
    print(f"Epoch {epoch+1}")
    print(f"val Average Loss: {avg_loss:.4f}")
    print(f"val Accuracy: {accuracy:.2f}%")
    print("-" * 50)
    

    # Optional: Print sample predictions
    if epoch % 5 == 0:  # Print every 5 epochs
        print("\nSample predictions:")
        print("Predicted:", outputs[0].round().cpu().detach().numpy())
        print("Actual:", labels[0].cpu().numpy())
        print("-" * 50)

Epoch 1
Average Loss: 0.7447
Training Accuracy: 3.36%
--------------------------------------------------
Epoch 1
val Average Loss: 2.9779
val Accuracy: 4.76%
--------------------------------------------------

Sample predictions:
Predicted: [0. 1. 0. 1. 0.]
Actual: [1. 1. 0. 1. 0.]
--------------------------------------------------
Epoch 2
Average Loss: 0.7026
Training Accuracy: 7.46%
--------------------------------------------------
Epoch 2
val Average Loss: 6.7667
val Accuracy: 6.85%
--------------------------------------------------
Epoch 3
Average Loss: 0.7030
Training Accuracy: 6.34%
--------------------------------------------------
Epoch 3
val Average Loss: 7.6402
val Accuracy: 6.55%
--------------------------------------------------
Epoch 4
Average Loss: 0.6687
Training Accuracy: 7.09%
--------------------------------------------------
Epoch 4
val Average Loss: 3.0154
val Accuracy: 9.82%
--------------------------------------------------
Epoch 5
Average Loss: 0.6606
Training A

In [14]:
corr=torch.equal(outputs[i],labels[i])
outputs.shape



torch.Size([4, 5])

In [1]:
(outputs.round() ==labels)

NameError: name 'outputs' is not defined