In [1]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import os
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import cv2
import numpy as np

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
# Paths
DATAFRAME_PATH = "../../dataset.csv"
IMAGES_PATH = "../../dataset/images"
MODEL_PATH = "../model"

# Hyperparameters
LEARNING_RATE = 0.001
NUM_EPOCHS = 10
BATCH_SIZE = 32

Importing files created within this projects. 

In [3]:
# from ..model.utils import (
#     its_xyxy_time,
#     its_denormalize_time,
#     get_solar_elevation,
# )

# Dataset


In [4]:
def collate_fn(batch):
    images, boxes = zip(*batch)
    
    # Stack images (they are all the same size after transform)
    images = torch.stack(images)
    
    # Pad the boxes
    max_num_boxes = max(box.size(0) for box in boxes)
    padded_boxes = []
    for box in boxes:
        if box.size(0) < max_num_boxes:
            padded_box = torch.cat([box, torch.zeros((max_num_boxes - box.size(0), 5))], dim=0)
        else:
            padded_box = box
        padded_boxes.append(padded_box)
    
    padded_boxes = torch.stack(padded_boxes)
    
    return images, padded_boxes


def resize_with_padding(img, target_size=(200, 200), padding_color=(0, 0, 0)):
    """
    Resize an image while maintaining aspect ratio and add padding to fill the empty space.

    :param image: input image.
    :param target_size: Tuple (width, height) of the target size.
    :param padding_color: Tuple (B, G, R) color value for padding. Default is white (255, 255, 255).
    """
    # Read the image
    original_height, original_width = img.shape[:2]

    # Calculate the ratio to maintain aspect ratio
    img_ratio = original_width / original_height
    target_ratio = target_size[0] / target_size[1]

    if img_ratio > target_ratio:
        # Image is wider than the target ratio, fit to width
        new_width = target_size[0]
        new_height = int(new_width / img_ratio)
    else:
        # Image is taller than the target ratio, fit to height
        new_height = target_size[1]
        new_width = int(new_height * img_ratio)

    # Resize the image
    resized_img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_AREA)

    # Create a new image with the target size and padding color
    padded_img = np.full((target_size[1], target_size[0], 3), padding_color, dtype=np.uint8)

    # Calculate the padding offsets
    x_offset = (target_size[0] - new_width) // 2
    y_offset = (target_size[1] - new_height) // 2

    # Insert the resized image into the padded image
    padded_img[y_offset:y_offset+new_height, x_offset:x_offset+new_width] = resized_img
    return padded_img

def denormalize_yolo_box(box, img_width, img_height):
    x_center, y_center, width, height = box

    # Scale normalized coordinates to image dimensions
    x_center = float(x_center) * img_width
    y_center = float(y_center) * img_height
    width = float(width) * img_width
    height = float(height) * img_height

    # Convert from [x_center, y_center, width, height] to [x_min, y_min, x_max, y_max]
    x_min = int(x_center - width / 2)
    y_min = int(y_center - height / 2)
    x_max = int(x_center + width / 2)
    y_max = int(y_center + height / 2)

    return [x_min, y_min, x_max, y_max]


In [5]:

class DataFrameDataset(Dataset):
    def __init__(self, dataframe, images_path, transform=None):
        self.dataframe = dataframe
        self.transform = transform
        self.images_path = images_path
        self.target_shape = (100, 100)
    
    def __len__(self):
        return len(self.dataframe)
    
    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        
        img_path = row['image']
        img_path = os.path.join(self.images_path, img_path)
        height = float(row['height'])
        
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        bbox = list(map(float, row['bbox'].split(" ")))
        denorm_bbox = denormalize_yolo_box(bbox, img_width=image.shape[1], img_height=image.shape[0])
        image = image[denorm_bbox[1] : denorm_bbox[3], denorm_bbox[0] : denorm_bbox[2]]
        image = resize_with_padding(image, target_size=self.target_shape)
    

        if self.transform:
            image = self.transform(image)
        
        return image, torch.tensor(height)

# Example transform
transform = transforms.Compose([
    transforms.ToTensor(),
])


# Model

In [6]:
import torch
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms as T
from torchsummary import summary
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from pprint import pprint
import copy
import numpy as np
import os   
import argparse
from datetime import datetime

import logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

import sys
sys.path.append("../")

from util.util import train_test_split, write_train_file

# ? remove printing of warnings5
import warnings

warnings.filterwarnings("ignore")

from model.layers import Model
from model.dataset import cast_to_device
from model.loss import RMSELoss, combining_loss

sys.path.remove("../")

DEBUG:h5py._conv:Creating converter from 7 to 5
DEBUG:h5py._conv:Creating converter from 5 to 7
DEBUG:h5py._conv:Creating converter from 7 to 5
DEBUG:h5py._conv:Creating converter from 5 to 7


In [7]:
def train_cropped(
    model,
    data_loaders: dict,
    optimizer,
    loss_fn,
    writer,
    num_epochs=10,
    device="cpu",
    shd_loss_weight=1.0,
):
    ...
    print("TRAINING STARTED")

    val_loss_history = []
    train_loss_history = []
    best_model_wts = copy.deepcopy(model.state_dict())
    last_model_wts = copy.deepcopy(model.state_dict())
    best_loss = 1000000
    counters = {"train": 0, "val": 0}

    time_str = datetime.now().strftime("%d_%m_%Y_%H_%M_%S")
    os.makedirs(f"weights/{time_str}", exist_ok=True)
    write_train_file(
        model, optimizer, loss_fn, num_epochs, shd_loss_weight, f"weights/{time_str}"
    )

    for epoch in tqdm(range(num_epochs)):
        print(f"Epoch {epoch} / {num_epochs - 1}", end="\t")

        for phase in ["train", "val"]:
            if phase == "train":
                model.train()
            elif phase == "val":
                model.eval()

            running_height_loss = 0.0

            for x in data_loaders[phase]:
                counters[phase] += 1

                image, labels_height = x
                image = image.to(device)
                labels_height = labels_height.to(device)


                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == "train"):
                    pred_shd_len, pred_solar_angle, pred_height = model(image)
                    pred_shd_len = pred_shd_len.squeeze()
                    pred_height = pred_height.squeeze()
                    pred_solar_angle = pred_solar_angle.squeeze()

                    height_loss = loss_fn(pred_height, labels_height)

                    if phase == "train":
                        logger.debug(f"Pred height: {pred_height}")
                        logger.debug(f"Labels height: {labels_height}")

                    if phase == "train":
                        height_loss.backward()
                        # torch.nn.utils.clip_grad_norm_(
                        #     model.parameters(), max_norm=10, norm_type=1
                        # )
                        optimizer.step()

                    writer.add_scalar(
                        f"Loss Height/{phase} fast", height_loss.item(), counters[phase]
                    )
                    # print(f"Loss Shadow Length/{phase}", shd_loss.item(), epoch)

                    running_height_loss += height_loss.item()

            height_epoch_loss = running_height_loss / (
                len(data_loaders[phase].dataset) / data_loaders[phase].batch_size
            )

            writer.add_scalar(f"Loss Height/{phase}", height_epoch_loss, epoch)

            print(f"{phase} height loss: {height_epoch_loss:.4f}", end="\t")

            if phase == "val" and height_epoch_loss < best_loss:
                best_loss = height_epoch_loss
                best_model_wts = copy.deepcopy(model.state_dict())
                torch.save(best_model_wts, os.path.join("weights", time_str, "best.pt"))

            if phase == "val":
                val_loss_history.append(height_epoch_loss)
                last_model_wts = copy.deepcopy(model.state_dict())
                torch.save(last_model_wts, os.path.join("weights", time_str, "last.pt"))

            if phase == "train":
                train_loss_history.append(height_epoch_loss)

        print()

    print("-" * 30)
    print(f"Training Complete")
    print(f"Best Validation Loss: {best_loss:.4f}")

    return val_loss_history, train_loss_history


In [8]:
def main(args):
    device = torch.device(f"cuda:{args.gpu}" if torch.cuda.is_available() else "cpu")
    print("Training on device:", device)

    df = pd.read_csv(args.data)
    train_df, val_df = train_test_split(df)

    train_df.to_csv("train.csv", index=False)
    val_df.to_csv("val.csv", index=False)

    transform = transforms.Compose([
        transforms.ToTensor(),
    ])
    train_dataset = DataFrameDataset(train_df, IMAGES_PATH, transform=transform)
    val_dataset = DataFrameDataset(val_df, IMAGES_PATH, transform=transform)

    dataloaders = {
        "train": DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True),
        "val": DataLoader(val_dataset, batch_size=args.batch_size, shuffle=True),
    }

    model = Model(shd_len_backbone=args.model, pretrained=args.pretrained).to(device)

    if args.optimizer == "adam":
        optimizer = torch.optim.Adam(
            model.parameters(), lr=args.lr, weight_decay=args.wd
        )
    elif args.optimizer == "sgd":
        optimizer = torch.optim.SGD(
            model.parameters(), lr=args.lr, weight_decay=args.wd
        )
    else:
        raise ValueError("Optimizer not supported")

    if args.loss == "l1":
        loss_fn = torch.nn.L1Loss()
    elif args.loss == "mse":
        loss_fn = torch.nn.MSELoss()
    elif args.loss == "smoothl1":
        loss_fn = torch.nn.SmoothL1Loss()
    elif args.loss == "huber":
        loss_fn = torch.nn.HuberLoss()
    elif args.loss == "rmse":
        loss_fn = RMSELoss()
    else:
        raise ValueError("Loss not supported")

    if args.multi_gpu:
        model = torch.nn.DataParallel(model, device_ids=[0, 1, 2, 3])

    writer = SummaryWriter()

    _, _ = train_cropped(
        model,
        dataloaders,
        optimizer,
        loss_fn,
        writer,
        num_epochs=args.epochs,
        device=device,
        shd_loss_weight=args.shd_loss_weight,
    )

    writer.flush()
    writer.close()

In [9]:
from argparse import Namespace

# Create a Namespace object with your arguments
args = Namespace(
    gpu=0,
    data='../dataset.csv',
    optimizer='adam',
    batch_size=64,
    epochs=50,
    multi_gpu=False,
    loss='l1',
    model='resnet18',
    pretrained=False,
    shd_loss_weight=0,
    lr=0.0001,
    wd=1e-05
)

# Convert the Namespace object to a dictionary
args_dict = vars(args)


# Print the dictionary to verify
print("*******Training Arguments*******")
display(args_dict)


*******Training Arguments*******


{'gpu': 0,
 'data': '../dataset.csv',
 'optimizer': 'adam',
 'batch_size': 64,
 'epochs': 50,
 'multi_gpu': False,
 'loss': 'l1',
 'model': 'resnet18',
 'pretrained': False,
 'shd_loss_weight': 0,
 'lr': 0.0001,
 'wd': 1e-05}

In [10]:
main(args)

Training on device: cuda:0
TRAINING STARTED


  0%|          | 0/50 [00:00<?, ?it/s]

Epoch 0 / 49	

DEBUG:__main__:Pred height: tensor([0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0',
       grad_fn=<SqueezeBackward0>)
DEBUG:__main__:Labels height: tensor([30., 18., 30., 18., 30., 30., 33., 18.], device='cuda:0')


train height loss: 207.0000	val height loss: 960.0000	

  2%|▏         | 1/50 [00:02<02:17,  2.82s/it]DEBUG:__main__:Pred height: tensor([0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0',
       grad_fn=<SqueezeBackward0>)
DEBUG:__main__:Labels height: tensor([33., 18., 30., 18., 30., 30., 30., 18.], device='cuda:0')



Epoch 1 / 49	train height loss: 207.0000	val height loss: 960.0000	

  4%|▍         | 2/50 [00:03<01:10,  1.47s/it]DEBUG:__main__:Pred height: tensor([0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0',
       grad_fn=<SqueezeBackward0>)
DEBUG:__main__:Labels height: tensor([30., 30., 18., 30., 33., 18., 18., 30.], device='cuda:0')



Epoch 2 / 49	train height loss: 207.0000	val height loss: 960.0000	

  6%|▌         | 3/50 [00:03<00:49,  1.05s/it]DEBUG:__main__:Pred height: tensor([0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0',
       grad_fn=<SqueezeBackward0>)
DEBUG:__main__:Labels height: tensor([33., 30., 18., 18., 30., 30., 30., 18.], device='cuda:0')



Epoch 3 / 49	train height loss: 207.0000	val height loss: 960.0000	

  8%|▊         | 4/50 [00:04<00:38,  1.19it/s]DEBUG:__main__:Pred height: tensor([0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0',
       grad_fn=<SqueezeBackward0>)
DEBUG:__main__:Labels height: tensor([30., 18., 33., 30., 18., 18., 30., 30.], device='cuda:0')



Epoch 4 / 49	train height loss: 207.0000	val height loss: 960.0000	

 10%|█         | 5/50 [00:04<00:32,  1.37it/s]DEBUG:__main__:Pred height: tensor([0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0',
       grad_fn=<SqueezeBackward0>)
DEBUG:__main__:Labels height: tensor([18., 30., 33., 30., 18., 30., 30., 18.], device='cuda:0')



Epoch 5 / 49	train height loss: 207.0000	val height loss: 960.0000	

 12%|█▏        | 6/50 [00:05<00:29,  1.48it/s]DEBUG:__main__:Pred height: tensor([0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0',
       grad_fn=<SqueezeBackward0>)
DEBUG:__main__:Labels height: tensor([30., 30., 18., 30., 18., 30., 33., 18.], device='cuda:0')



Epoch 6 / 49	train height loss: 207.0000	val height loss: 960.0000	

 14%|█▍        | 7/50 [00:06<00:27,  1.59it/s]DEBUG:__main__:Pred height: tensor([0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0',
       grad_fn=<SqueezeBackward0>)
DEBUG:__main__:Labels height: tensor([30., 18., 18., 18., 30., 30., 30., 33.], device='cuda:0')



Epoch 7 / 49	train height loss: 207.0000	val height loss: 960.0000	

 16%|█▌        | 8/50 [00:06<00:25,  1.65it/s]DEBUG:__main__:Pred height: tensor([0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0',
       grad_fn=<SqueezeBackward0>)
DEBUG:__main__:Labels height: tensor([33., 30., 18., 30., 18., 30., 30., 18.], device='cuda:0')



Epoch 8 / 49	train height loss: 207.0000	val height loss: 960.0000	

 18%|█▊        | 9/50 [00:07<00:24,  1.70it/s]DEBUG:__main__:Pred height: tensor([0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0',
       grad_fn=<SqueezeBackward0>)
DEBUG:__main__:Labels height: tensor([18., 33., 30., 30., 18., 18., 30., 30.], device='cuda:0')



Epoch 9 / 49	train height loss: 207.0000	val height loss: 960.0000	

 20%|██        | 10/50 [00:07<00:23,  1.73it/s]DEBUG:__main__:Pred height: tensor([0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0',
       grad_fn=<SqueezeBackward0>)
DEBUG:__main__:Labels height: tensor([18., 18., 18., 30., 33., 30., 30., 30.], device='cuda:0')



Epoch 10 / 49	train height loss: 207.0000	val height loss: 960.0000	

 22%|██▏       | 11/50 [00:08<00:21,  1.78it/s]DEBUG:__main__:Pred height: tensor([0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0',
       grad_fn=<SqueezeBackward0>)
DEBUG:__main__:Labels height: tensor([33., 30., 18., 30., 18., 30., 18., 30.], device='cuda:0')



Epoch 11 / 49	train height loss: 207.0000	val height loss: 960.0000	

 24%|██▍       | 12/50 [00:08<00:21,  1.79it/s]DEBUG:__main__:Pred height: tensor([0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0',
       grad_fn=<SqueezeBackward0>)
DEBUG:__main__:Labels height: tensor([18., 30., 33., 30., 30., 18., 30., 18.], device='cuda:0')



Epoch 12 / 49	train height loss: 207.0000	val height loss: 960.0000	

 26%|██▌       | 13/50 [00:09<00:20,  1.82it/s]DEBUG:__main__:Pred height: tensor([0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0',
       grad_fn=<SqueezeBackward0>)
DEBUG:__main__:Labels height: tensor([30., 18., 30., 30., 18., 18., 30., 33.], device='cuda:0')



Epoch 13 / 49	train height loss: 207.0000	val height loss: 960.0000	

 28%|██▊       | 14/50 [00:09<00:19,  1.84it/s]DEBUG:__main__:Pred height: tensor([0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0',
       grad_fn=<SqueezeBackward0>)
DEBUG:__main__:Labels height: tensor([18., 33., 18., 30., 30., 30., 18., 30.], device='cuda:0')



Epoch 14 / 49	train height loss: 207.0000	val height loss: 960.0000	

 30%|███       | 15/50 [00:10<00:18,  1.88it/s]DEBUG:__main__:Pred height: tensor([0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0',
       grad_fn=<SqueezeBackward0>)
DEBUG:__main__:Labels height: tensor([18., 30., 30., 30., 30., 33., 18., 18.], device='cuda:0')



Epoch 15 / 49	train height loss: 207.0000	val height loss: 960.0000	

 32%|███▏      | 16/50 [00:10<00:17,  1.93it/s]DEBUG:__main__:Pred height: tensor([0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0',
       grad_fn=<SqueezeBackward0>)
DEBUG:__main__:Labels height: tensor([30., 33., 30., 18., 30., 18., 18., 30.], device='cuda:0')



Epoch 16 / 49	train height loss: 207.0000	val height loss: 960.0000	

 34%|███▍      | 17/50 [00:11<00:17,  1.93it/s]DEBUG:__main__:Pred height: tensor([0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0',
       grad_fn=<SqueezeBackward0>)
DEBUG:__main__:Labels height: tensor([18., 18., 30., 33., 30., 18., 30., 30.], device='cuda:0')



Epoch 17 / 49	train height loss: 207.0000	val height loss: 960.0000	

 36%|███▌      | 18/50 [00:11<00:21,  1.50it/s]


Epoch 18 / 49	




KeyboardInterrupt: 

In [None]:
torch.clip(torch.tensor([1,2,3, torch.nan]), 0, 1)

tensor([1., 1., 1., nan])