In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from PIL import Image
import os
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader, TensorDataset
from torch.utils.data import Dataset, DataLoader
import numpy as np
import cv2

In [2]:
class LaneDetectionCNN(nn.Module):
    def __init__(self, input_shape):
        super(LaneDetectionCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=5, stride=2, padding=2)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=5, stride=2, padding=2)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1)
        self.conv4 = nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1)

        self.dropout = nn.Dropout(0.5)

        # Calculate flat size dynamically
        self._to_linear = None
        self._calculate_flat_size(input_shape)

        self.fc1 = nn.Linear(self._to_linear, 128)
        self.fc2 = nn.Linear(128, 1)  # Single output neuron for regression

    def _calculate_flat_size(self, input_shape):
        x = torch.zeros(1, *input_shape)
        x = self._forward_conv(x)
        self._to_linear = x.numel()

    def _forward_conv(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = torch.relu(self.conv3(x))
        x = torch.relu(self.conv4(x))
        return x

    def forward(self, x):
        x = self._forward_conv(x)
        x = x.view(x.size(0), -1) 
        x = torch.relu(self.fc1(x))
        x = torch.tanh(self.fc2(x))
        return x

# Training function
def train_model(model, dataloader, criterion, optimizer, n_epochs=10):
    model.train()
    for epoch in range(n_epochs):
        total_loss = 0
        for Xbatch, ybatch in dataloader:
            # Move inputs and labels to device
            Xbatch, ybatch = Xbatch.to(device), ybatch.to(device)

            optimizer.zero_grad()
            y_pred = model(Xbatch)
            loss = criterion(y_pred, ybatch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        print(f"Epoch {epoch+1}/{n_epochs}, Loss: {total_loss/len(dataloader):.4f}")
        
    return model


In [3]:

class ImageDataset(Dataset):
    def __init__(self, image_folder, label_folder, transform=None):
        self.image_folder = image_folder
        self.label_folder = label_folder
        self.transform = transform
        self.image_files = sorted(os.listdir(image_folder))  # Ensure consistent order
        self.label_files = sorted(os.listdir(label_folder))  # Ensure consistent order

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        # Load image
        img_path = os.path.join(self.image_folder, self.image_files[idx])
        image = Image.open(img_path).convert("RGB")

        if self.transform:
            image = self.transform(image)

        # Load corresponding label
        label_path = os.path.join(self.label_folder, self.label_files[idx])
        with open(label_path, "r") as f:
            label = float(f.read().strip())  # Read distance as float

        return image, torch.tensor([label], dtype=torch.float32)

def get_dataloader(image_folder, label_folder, batch_size):
    """
    Create a DataLoader for the dataset.

    Parameters:
    - image_folder: Path to the folder containing images.
    - label_folder: Path to the folder containing labels.
    - batch_size: Batch size for the DataLoader.
    - input_shape: Tuple (height, width) for resizing images.

    Returns:
    - DataLoader object for training.
    """
    transform = transforms.Compose([
        transforms.ToTensor(),  # Convert to tensor without resizing
    ])

    dataset = ImageDataset(image_folder, label_folder, transform=transform)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    return dataloader


def get_input_shape(image_folder):
    """
    Dynamically determine the input shape from the first image in the dataset.

    Parameters:
    - image_folder: Path to the folder containing images.

    Returns:
    - Tuple representing the input shape (channels, height, width).
    """
    # Get the first image in the folder
    image_files = sorted(os.listdir(image_folder))
    if not image_files:
        raise ValueError(f"No images found in folder: {image_folder}")

    # Load the first image
    img_path = os.path.join(image_folder, image_files[0])
    with Image.open(img_path) as img:
        width, height = img.size  # Image dimensions
        channels = len(img.getbands())  # Number of color channels (e.g., RGB = 3)

    return (channels, height, width)




In [None]:
if __name__ == "__main__":

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")


    IMAGE_FOLDER = "road_images/trail1/images"
    LABEL_FOLDER = "road_images/trail1/labels"
    batch_size = 16

    input_shape = get_input_shape(image_folder=IMAGE_FOLDER)
    print(f"Determined input shape: {input_shape}")


    dataloader = get_dataloader(IMAGE_FOLDER, LABEL_FOLDER, batch_size)

    num_images = len(dataloader.dataset)
    print(f"Number of images in the dataset: {num_images}")


    model = LaneDetectionCNN(input_shape).to(device)

    criterion = nn.MSELoss()  # Mean Squared Error Loss for regression
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Train the model
    train_model(model, dataloader, criterion, optimizer, n_epochs=5)

    




    ## notes: 
    ## to display an image: 

    # for Xbatch, ybatch in dataloader:
    #     sample = Xbatch[0]
    #     image_array = (sample.permute(1, 2, 0).numpy() * 255).astype("uint8")
    #     print(type(image_array))  
    #     image = Image.fromarray(image_array)
    #     image.show()
    #     break;

Using device: cuda
Determined input shape: (3, 480, 640)
Number of images in the dataset: 4000
Epoch 1/5, Loss: 0.0006
Epoch 2/5, Loss: 0.0003
Epoch 3/5, Loss: 0.0005
Epoch 4/5, Loss: 0.0003
Epoch 5/5, Loss: 0.0003


In [16]:
torch.save(model.state_dict(), "lane_detection_model.pth")
print("Model weights saved to 'lane_detection_model.pth'")

Model weights saved to 'lane_detection_model.pth'


In [4]:
# Initialize the model architecture
model = LaneDetectionCNN((3, 480, 640))

# Load the model weights
model.load_state_dict(torch.load("lane_detection_model.pth"))
model.eval()  # Set to evaluation mode
print("Model weights loaded from 'lane_detection_model.pth'")

Model weights loaded from 'lane_detection_model.pth'


  model.load_state_dict(torch.load("lane_detection_model.pth"))


In [18]:
for Xbatch, ybatch in dataloader:
    sample = Xbatch[0]
    image_array = (sample.permute(1, 2, 0).numpy() * 255).astype("uint8")
    print(type(image_array))  
    image_bgr = cv2.cvtColor(image_array, cv2.COLOR_RGB2BGR)

    cv2.imshow("Image", image_bgr)
    cv2.waitKey(0)  # Wait for a key press
    cv2.destroyAllWindows()
    break
    

<class 'numpy.ndarray'>


In [5]:
import random

def test_model(model, dataloader, device):
    """
    Test the trained model on a random image from the dataset.

    Parameters:
    - model: The trained neural network model.
    - dataloader: The DataLoader providing the test data.
    - device: The device to use for prediction ('cpu' or 'cuda').

    Returns:
    - Tuple containing the predicted distance and the actual label.
    """
    model.to(device)
    model.eval()  # Set model to evaluation mode

    # Choose a random batch and image
    random_idx = random.randint(0, len(dataloader.dataset) - 1)
    image_tensor, label_tensor = dataloader.dataset[random_idx]

    # Move image tensor and label to device
    image_tensor = image_tensor.to(device).unsqueeze(0)  # Add batch dimension
    label_tensor = label_tensor.to(device)
    
    with torch.no_grad():
        prediction = model(image_tensor)  # Predict the distance

    predicted_distance = prediction.item()
    actual_label = label_tensor.item()

    print(f"Random Image Test:")
    print(f"  Predicted Distance: {predicted_distance}")
    print(f"  Actual Distance: {actual_label}")

    image_array = (image_tensor[0].cpu().permute(1, 2, 0).numpy() * 255).astype("uint8")
    image_bgr = cv2.cvtColor(image_array, cv2.COLOR_RGB2BGR)

    cv2.imshow("Image", image_bgr)
    cv2.waitKey(0)  # Wait for a key press
    cv2.destroyAllWindows()

    return predicted_distance, actual_label



IMAGE_FOLDER = "road_images/trail1/images"
LABEL_FOLDER = "road_images/trail1/labels"
batch_size = 16
dataloader = get_dataloader(IMAGE_FOLDER, LABEL_FOLDER, batch_size)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

for _ in range(10):
    predicted_distance, actual_label = test_model(model, dataloader, device)


Using device: cuda
Random Image Test:
  Predicted Distance: 0.11742646992206573
  Actual Distance: 0.12933236360549927
Random Image Test:
  Predicted Distance: 0.0947289988398552
  Actual Distance: 0.10634204745292664
Random Image Test:
  Predicted Distance: -0.07368995994329453
  Actual Distance: -0.04290331155061722
Random Image Test:
  Predicted Distance: 0.09768050909042358
  Actual Distance: 0.0975923240184784
Random Image Test:
  Predicted Distance: 0.02450891025364399
  Actual Distance: 0.03243619576096535
Random Image Test:
  Predicted Distance: -0.047557760030031204
  Actual Distance: -0.033719055354595184
Random Image Test:
  Predicted Distance: -0.037635453045368195
  Actual Distance: -0.03818884491920471
Random Image Test:
  Predicted Distance: 0.04547379910945892
  Actual Distance: 0.051262252032756805
Random Image Test:
  Predicted Distance: 0.12966281175613403
  Actual Distance: 0.138238787651062
Random Image Test:
  Predicted Distance: 0.005380312446504831
  Actual Dist