In [50]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import os
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import cv2
import numpy as np

In [51]:
# Paths
DATAFRAME_PATH = "../../dataset.csv"
IMAGES_PATH = "../../dataset/images"
MODEL_PATH = "../model"

# Hyperparameters
LEARNING_RATE = 0.001
NUM_EPOCHS = 10
BATCH_SIZE = 32

Importing files created within this projects. 

In [52]:
# from ..model.utils import (
#     its_xyxy_time,
#     its_denormalize_time,
#     get_solar_elevation,
# )

# Dataset


In [135]:
def collate_fn(batch):
    images, boxes = zip(*batch)
    
    # Stack images (they are all the same size after transform)
    images = torch.stack(images)
    
    # Pad the boxes
    max_num_boxes = max(box.size(0) for box in boxes)
    padded_boxes = []
    for box in boxes:
        if box.size(0) < max_num_boxes:
            padded_box = torch.cat([box, torch.zeros((max_num_boxes - box.size(0), 5))], dim=0)
        else:
            padded_box = box
        padded_boxes.append(padded_box)
    
    padded_boxes = torch.stack(padded_boxes)
    
    return images, padded_boxes


def resize_with_padding(img, target_size=(200, 200), padding_color=(0, 0, 0)):
    """
    Resize an image while maintaining aspect ratio and add padding to fill the empty space.

    :param image: input image.
    :param target_size: Tuple (width, height) of the target size.
    :param padding_color: Tuple (B, G, R) color value for padding. Default is white (255, 255, 255).
    """
    # Read the image
    original_height, original_width = img.shape[:2]

    # Calculate the ratio to maintain aspect ratio
    img_ratio = original_width / original_height
    target_ratio = target_size[0] / target_size[1]

    if img_ratio > target_ratio:
        # Image is wider than the target ratio, fit to width
        new_width = target_size[0]
        new_height = int(new_width / img_ratio)
    else:
        # Image is taller than the target ratio, fit to height
        new_height = target_size[1]
        new_width = int(new_height * img_ratio)

    # Resize the image
    resized_img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_AREA)

    # Create a new image with the target size and padding color
    padded_img = np.full((target_size[1], target_size[0], 3), padding_color, dtype=np.uint8)

    # Calculate the padding offsets
    x_offset = (target_size[0] - new_width) // 2
    y_offset = (target_size[1] - new_height) // 2

    # Insert the resized image into the padded image
    padded_img[y_offset:y_offset+new_height, x_offset:x_offset+new_width] = resized_img
    return padded_img

def denormalize_yolo_box(box, img_width, img_height):
    x_center, y_center, width, height = box

    # Scale normalized coordinates to image dimensions
    x_center = float(x_center) * img_width
    y_center = float(y_center) * img_height
    width = float(width) * img_width
    height = float(height) * img_height

    # Convert from [x_center, y_center, width, height] to [x_min, y_min, x_max, y_max]
    x_min = int(x_center - width / 2)
    y_min = int(y_center - height / 2)
    x_max = int(x_center + width / 2)
    y_max = int(y_center + height / 2)

    return [x_min, y_min, x_max, y_max]


In [None]:

class DataFrameDataset(Dataset):
    def __init__(self, dataframe, images_path, transform=None):
        self.dataframe = pd.read_csv(dataframe) # dataframe
        self.transform = transform
        self.images_path = images_path
        self.target_shape = (100, 100)
    
    def __len__(self):
        return len(self.dataframe)
    
    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        
        img_path = row['image']
        img_path = os.path.join(self.images_path, img_path)
        label = row['height']
        
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        bbox = list(map(float, row['bbox'].split(" ")))
        denorm_bbox = denormalize_yolo_box(bbox, img_width=image.shape[1], img_height=image.shape[0])
        image = image[denorm_bbox[1] : denorm_bbox[3], denorm_bbox[0] : denorm_bbox[2]]
        image = resize_with_padding(image, target_size=self.target_shape)


        if self.transform:
            image = self.transform(image)
        
        return image, torch.tensor(label)

# Example transform
transform = transforms.Compose([
    transforms.ToTensor(),
])


In [136]:
dataset = DataFrameDataset(DATAFRAME_PATH, IMAGES_PATH, transform=transform)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)