In [17]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# this notebook is used to test why the model is not providing the gradients

In [18]:
import os
import sys
sys.path.append("../scaling")

import yaml

import torch
from torch import nn
import torch.nn.functional as F
from torchvision.transforms import v2
from ultralytics import YOLO
from torchinfo import summary

import torchvision.transforms as transforms

In [19]:
from evaluate import decode_yolov11_segmentation, decode_yolov11_segmentation2
from evaluate import combine_masks, decode_raw_labels
from torch.utils.data import DataLoader
from dataset import WaterLevelDataset, collate_fn
from evaluate import create_binary_mask
from matplotlib.patches import Polygon

In [20]:
import cv2
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

# definitions

In [21]:
base_folder = os.path.dirname(os.curdir)
imgsz = 640  # the default on train.py
best_model_path = os.path.join(base_folder, "../runs/segment/yolo11l-seg-300ep/weights/best.pt")
config_file = os.path.join(base_folder, "../water.yaml")

# configuration file
print(f"Reading {config_file}")
with open(config_file, 'r') as file:
    config = yaml.safe_load(file)

Reading ../water.yaml


In [22]:
# data
imgsz = 640
transform = v2.Compose([
    v2.Resize([imgsz, imgsz]),  # we need to resize both h and w
    # v2.ToTensor()  # deprecated, use the line below
    v2.ToImage(), v2.ToDtype(torch.float32, scale=True)  # notice that v2.ToImage converts to [0.0, 1.0]
])

dataset = WaterLevelDataset(config_path=config_file, transform=transform)
data_loader = DataLoader(dataset, batch_size=1, shuffle=False, collate_fn=collate_fn)  
inputs, targets = next(iter(data_loader))


In [23]:
# decode input into a mask
target_mask = create_binary_mask(targets, imgsz)


In [24]:
# load the best YOLO model
yolo_model = YOLO(best_model_path)
model = yolo_model.model

## Evaluate model

- check the output structure

In [25]:
def check(model, where=""):
    if len(where.strip()) > 0:
        print(where)
    modules = []
    for name, param in model.named_parameters():
        if not param.requires_grad:
            modules.append(name)
    if len(modules) > 0:
        if len(modules) < 10:
            print("ðŸ›‡", ",".join(modules))
        else:
            print("ðŸ›‡ðŸ›‡ðŸ›‡ Too many modules to list =",len(modules))

In [26]:
for name, param in model.named_parameters():
    param.requires_grad = True
    if not param.requires_grad:
        print("ðŸ›‡", name)    
    # print(name, "ðŸ‘Œrequires"  else "ðŸ›‡")

In [27]:
check(model)

In [28]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.9)

check(model, "before .train()")
model.train()  # when this method is called, the parameters become 
check(model, "before zero_grad")
optimizer.zero_grad()
check(model, "before model(inputs)")
outputs = model(inputs)  # (B, C, H, W)
boxes, masks = decode_yolov11_segmentation2(outputs)

pred_mask = combine_masks(masks)

# Compute the loss and its gradients
loss = loss_fn(pred_mask, target_mask)
loss.backward()
# optimizer.step()
check(model, "after optimizer")

before .train()
before zero_grad
before model(inputs)
after optimizer


In [29]:
no_grad = []
num_params = 0
for name, param in model.named_parameters():
    if param.requires_grad:
        num_params += 1
        if param.grad is None:
            no_grad.append(name)
if len(no_grad) == 0:
    print("all parameter have grad")
elif len(no_grad) < 10:
    print(f"Only {no_grad} don't have grad")
else:
    print(f"Too many parameters don't have grad to list. Total = {len(no_grad)}/{num_params}")

Too many parameters don't have grad to list. Total = 67/291


In [30]:
check(model, "after check grad field")

after check grad field


In [31]:
# no_grad

In [32]:
model.model[0].conv.weight.requires_grad

True

In [33]:
model.model[0].conv.weight.grad.min(), model.model[0].conv.weight.grad.max()

(tensor(-8.3269), tensor(7.2993))