In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
tumor_dataset_path="/content/drive/MyDrive/Colab Notebooks/tumor"

In [3]:
import os
import cv2
import xml.etree.ElementTree as ET

def read_dataset(dataset_path):
    train_images = []
    train_labels = []
    test_images = []
    test_labels = []

    # Navigating the dataset directory
    for class_folder in os.listdir(dataset_path):
        class_folder_path = os.path.join(dataset_path, class_folder)
        if os.path.isdir(class_folder_path):
            train_images_folder_path = os.path.join(class_folder_path, 'images', 'train')
            train_labels_folder_path = os.path.join(class_folder_path, 'labels', 'train')
            test_images_folder_path = os.path.join(class_folder_path, 'images', 'test')
            test_labels_folder_path = os.path.join(class_folder_path, 'labels', 'test')

            # Training dataset: Navigating files in image folder
            for image_file in os.listdir(train_images_folder_path):
                if image_file.endswith('.jpg'):
                    image_path = os.path.join(train_images_folder_path, image_file)
                    label_file = os.path.join(train_labels_folder_path, image_file.replace('.jpg', '.txt'))

                    # Checking the existence of the label file
                    if not os.path.exists(label_file):
                        continue

                    # Reading the image
                    image = cv2.imread(image_path)
                    train_images.append(image)

                    # Reading the tag file
                    with open(label_file, 'r') as f:
                        labels = f.readlines()
                    train_labels.append(labels)

            # Test dataset: Navigating files in image folder
            for image_file in os.listdir(test_images_folder_path):
                if image_file.endswith('.jpg'):
                    image_path = os.path.join(test_images_folder_path, image_file)
                    label_file = os.path.join(test_labels_folder_path, image_file.replace('.jpg', '.txt'))

                    # Checking the existence of the label file
                    if not os.path.exists(label_file):
                        continue

                    # Reading the image
                    image = cv2.imread(image_path)
                    test_images.append(image)

                    # Reading the tag file
                    with open(label_file, 'r') as f:
                        labels = f.readlines()
                    test_labels.append(labels)

    return train_images, train_labels, test_images, test_labels


train_images, train_labels, test_images, test_labels = read_dataset(tumor_dataset_path)


In [4]:
from PIL import Image
import numpy as np
import cv2

# New size
new_size = (224, 224)


# Train data resizing
resized_train_images = []
resized_train_labels = []

for i in range(len(train_images)):
    image_array = train_images[i]
    image = Image.fromarray(image_array)
    resized_image = image.resize(new_size)
    resized_train_images.append(resized_image)
    
    labels = train_labels[i]
    #resized_labels = []
    
    
    label_values = labels[0].split(' ')
    x = float(label_values[1])
    #print(label)
    y = float(label_values[2])
    width = float(label_values[3])
    height = float(label_values[4])
        
    new_x = x * (new_size[0] / image.width)
    new_y = y * (new_size[1] / image.height)
    new_width = width * (new_size[0] / image.width)
    new_height = height * (new_size[1] / image.height)

    resized_labels = [new_x, new_y, new_width, new_height]
    resized_train_labels.append(resized_labels)
print(len(resized_train_labels)) 
print(len(resized_train_images))   

# Test data resizing
resized_test_images = []
resized_test_labels = []

for i in range(len(test_images)):
    image_array = test_images[i]
    image = Image.fromarray(image_array)
    resized_image = image.resize(new_size)
    resized_test_images.append(resized_image)
    
    labels = test_labels[i]
    
    label_values = labels[0].split(' ')
    #print(label_values[0])
    x = float(label_values[1])
    y = float(label_values[2])
    width = float(label_values[3])
    height = float(label_values[4])
        
    new_x = x * (new_size[0] / image.width)
    new_y = y * (new_size[1] / image.height)
    new_width = width * (new_size[0] / image.width)
    new_height = height * (new_size[1] / image.height)
        
    resized_labels= [new_x, new_y, new_width, new_height]
    resized_test_labels.append(resized_labels)
    #print(resized_test_labels[i])

print(len(resized_test_labels)) 
print(len(resized_test_images)) 


878
878
223
223


In [5]:
import torch.nn as nn
# Define the RPN class to integrate the RPN component
class RPN(nn.Module):
    def __init__(self, in_channels, num_anchors):
        super(RPN, self).__init__()

        # convolution layers
        self.conv = nn.Conv2d(in_channels, 256, kernel_size=3, stride=1, padding=1)

        # Output channel for bounding box coordinate estimates
        self.reg_layer = nn.Conv2d(256, 4 * num_anchors, kernel_size=1, stride=1, padding=0)

        # Output channel for class possibilities
        self.cls_layer = nn.Conv2d(256, 2 * num_anchors, kernel_size=1, stride=1, padding=0)

        # for Anker boxes
        self.num_anchors = num_anchors

    def forward(self, x):
        # convolution layers
        x = F.relu(self.conv(x))
        #x = F.relu(self.conv(x.unsqueeze(-1).unsqueeze(-1)))


        # boundibg box estimates
        reg = self.reg_layer(x)
        reg = reg.permute(0, 2, 3, 1).contiguous().view(x.size(0), -1, 4)

        # class possibilities
        cls = self.cls_layer(x)
        cls = cls.permute(0, 2, 3, 1).contiguous().view(x.size(0), -1, 2)

        return reg, cls

In [6]:
import torch
import torchvision.models as models
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# Step 1: Load or create a pre-trained model
model = models.resnet18(pretrained=True)

# Step 2: Add the classification part or RPN component using the properties of the model
num_features = model.fc.in_features
print(num_features)
num_classes = 3  # class number
model.fc = nn.Linear(num_features, num_classes)



512


In [7]:
# Step 3: Use Softmax loss for classification part, L2 loss for RPN component
softmax_loss = nn.CrossEntropyLoss()
l2_loss = nn.MSELoss()

In [15]:
import torch

def compute_loss(reg, cls, train_labels):
    # Initialize loss variables
    total_loss = 0.0
    
    # Iterate over each tensor in the train_labels list
    for labels in train_labels:
        # Reshape the labels tensor
        #labels = labels.view(-1, 5)

        # Get the actual bounding box coordinates
        box_coordinates = labels[:, :3]

        # Calculate classification loss (softmax loss)
        classification_loss = torch.nn.functional.cross_entropy(cls, labels[:, 3].long())

        # Calculate the L2 loss for the RPN component
        regression_loss = torch.nn.functional.mse_loss(reg, box_coordinates)

        # Calculate total loss (sum of softmax loss and L2 loss)
        total_loss += classification_loss + regression_loss

    return total_loss


In [9]:
# Step 5: Choose an optimization algorithm and use the back propagation algorithm to train the model
learning_rate = 0.001
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

In [10]:
# Create the RPN component
in_channels = 3  # Channel count of feature maps
num_anchors = 7 # anker box number
rpn = RPN(in_channels, num_anchors)

In [11]:
from torchvision import transforms
from torch.utils.data import DataLoader


transform = transforms.Compose([
    transforms.ToTensor()
])


class CustomDataset():
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform
    
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]
        
        if self.transform:
            image = self.transform(image)
        
        return image, label

#have variables resized_train_images and resized_train_labels
train_dataset = CustomDataset(resized_train_images, resized_train_labels, transform=transform)

batch_size = 32  # Batch size
shuffle = True  # Set to True, to shuffle the training data

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle)


In [12]:
for images,labels in train_loader:
  print(images.shape)

torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([14, 3, 224, 224])


In [19]:
import torch
num_epochs = 4

for epoch in range(num_epochs):
    i=0
    for images, labels in train_loader:
        
        # Get feature maps using model
        features = model(images)
        print(features.shape)
        features = features.unsqueeze(2).unsqueeze(3)
        features = features.repeat(1, 1, 224, 224)


    
        reg, cls = rpn(features)
        print(labels)
        desired_batch_size = 32
        labels = [label.repeat(desired_batch_size // len(labels)) for label in labels]
        labels = torch.cat(labels)[:desired_batch_size]



        print(len(resized_train_labels[i]))
        i=i+1
        # Loss calculation
        #resized_train_labels_tensor = torch.tensor(resized_train_labels[i])
        labels_tensor = torch.unsqueeze(torch.tensor(resized_train_labels[i]), 0)
        print(labels_tensor.dim())
        loss = compute_loss(reg, cls,labels_tensor)
        
        #Backpropagation and gradient update
        optimizer.zero_grad()
        loss.backward()
        optimizer.step() 


torch.Size([32, 3])
[tensor([0.1574, 0.3962, 0.2075, 0.5167, 0.2218, 0.1841, 0.2829, 0.5760, 0.6871,
        0.1594, 0.4637, 0.1551, 0.2172, 0.2252, 0.1728, 0.1566, 0.7166, 0.1512,
        0.1571, 0.2737, 0.2185, 0.4601, 0.3296, 0.1428, 0.2236, 0.2542, 0.2675,
        0.1499, 0.3816, 0.2267, 0.2560, 0.1923], dtype=torch.float64), tensor([0.1307, 0.4293, 0.1189, 0.5515, 0.1692, 0.1887, 0.1204, 0.5007, 0.4570,
        0.1653, 0.5736, 0.1646, 0.1225, 0.1227, 0.2244, 0.1933, 0.4693, 0.1579,
        0.0575, 0.5582, 0.2416, 0.5936, 0.5648, 0.1607, 0.2008, 0.2413, 0.1386,
        0.1458, 0.4231, 0.1350, 0.2485, 0.2511], dtype=torch.float64), tensor([0.0693, 0.1187, 0.0493, 0.1588, 0.0339, 0.0529, 0.0575, 0.1095, 0.2079,
        0.0365, 0.1187, 0.0513, 0.0308, 0.0282, 0.0827, 0.0678, 0.1826, 0.0508,
        0.0421, 0.1119, 0.0816, 0.1771, 0.0986, 0.0503, 0.0488, 0.1273, 0.0298,
        0.0370, 0.1661, 0.0529, 0.0888, 0.0960], dtype=torch.float64), tensor([0.0745, 0.0822, 0.0560, 0.0657, 0.0416

IndexError: ignored