# UNet with Resnet 50 Backbone

Train Dataset Size: 

Val Dataset Size: 

Test Dataset Size:

In [1]:
# %pip install torch -q
# %pip install opencv-python -q
# %pip install pycocotools -q
# %pip install timm==0.6.12 -q
# %pip install ipdb -q

In [2]:
from backbones_unet.model.unet import Unet
from backbones_unet.utils.dataset import SemanticSegmentationDataset
from backbones_unet.model.losses import DiceLoss
from backbones_unet.utils.trainer import Trainer
from torchsummaryX import summary
from torch.utils.data import Dataset, DataLoader
from convert_coco_ann_to_mask import convert_coco_to_mask
from torchvision.datasets import ImageFolder

import torchvision
import torch
import os
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

  from .autonotebook import tqdm as notebook_tqdm


cuda


In [3]:
# Test Installation
random_tensor = torch.rand((1, 3, 64, 64))
model = Unet(in_channels=3, num_classes=1) # if no backbone specified, will default to Resnet50
print(model.predict(random_tensor))
# summary(model, random_tensor)

tensor([[[[ 0.0095, -3.1717, -3.0699,  ..., -2.2026, -0.0313, -0.6662],
          [-1.0159, -4.6398, -3.7498,  ..., -0.6888, -0.7272,  0.5047],
          [-0.0358, -4.6281, -5.2019,  ..., -2.5222, -0.1917, -0.4663],
          ...,
          [ 1.0560, -2.7619, -2.5045,  ..., -0.1012, -0.2656, -0.3822],
          [ 1.4382, -2.2161, -1.3311,  ..., -2.4980, -1.9654, -0.7534],
          [ 1.9079,  0.9607,  1.3907,  ...,  1.0014,  1.8487,  0.4266]]]])


In [4]:
# Feel free to add more items here
config = {
    "lr"         : 1e-4,
    "epochs"     : 100,
    "batch_size" : 1,  # Increase if your device can handle it
    "num_classes": 1,
    'truncated_normal_mean' : 0,
    'truncated_normal_std' : 0.2,
}

In [5]:
# create a torch.utils.data.Dataset/DataLoader
annotation_json_path = '/home/sush/klab2/rosbags_collated/sensors_2023-08-03-15-19-03_0/annotation.json'
train_img_path = '/home/sush/klab2/rosbags_collated/sensors_2023-08-03-15-19-03_0/images'
train_mask_path = '/home/sush/klab2/rosbags_collated/sensors_2023-08-03-15-19-03_0/masks'

train_img_path_for_ImageFolder_dataloader = '/home/sush/klab2/rosbags_collated/sensors_2023-08-03-15-19-03_0/images_with_class/'

#! Temporarily using train and val images as same
val_img_path = '/home/sush/klab2/rosbags_collated/sensors_2023-08-03-15-19-03_0/images'
val_mask_path = '/home/sush/klab2/rosbags_collated/sensors_2023-08-03-15-19-03_0/masks'

test_img_path = '/home/sush/klab2/rosbags_collated/sensors_2023-08-03-15-19-03_0/images'

# img_size = (1384, 1032) # = width, height            # currently PtGrey images
img_size = (1024, 1024)

## Extract Masks from the COCO annotations (if not already done)

In [6]:
# convert_coco_to_mask(input_json=annotation_json_path, image_folder=train_img_path, output_folder=train_mask_path)

In [7]:
# Find mean and std of your dataset:
def get_mean_and_std_calculated(IMAGE_DATA_DIR):
    """
    NOTE: The ImageFolder dataloader requires the following file structure:

    root
    |
    └── cat (class label)
        |
        ├──img_2.png
        └──img_1.png

    """
    train_dataset = ImageFolder(IMAGE_DATA_DIR, transform=torchvision.transforms.ToTensor())

    # Initialize lists to store channel-wise means and standard deviations
    channel_wise_means = [0.0, 0.0, 0.0]
    channel_wise_stds = [0.0, 0.0, 0.0]

    # Iterate through the training dataset to calculate means and standard deviations
    for image, _ in train_dataset:
        for i in range(3):  # Assuming RGB images
            channel_wise_means[i] += image[i, :, :].mean().item()
            channel_wise_stds[i] += image[i, :, :].std().item()

    # Calculate the mean and standard deviation for each channel
    num_samples = len(train_dataset)
    channel_wise_means = [mean / num_samples for mean in channel_wise_means]
    channel_wise_stds = [std / num_samples for std in channel_wise_stds]

    # Print the mean and standard deviation for each channel
    print("Mean:", channel_wise_means)
    print("Std:", channel_wise_stds)

    return channel_wise_means, channel_wise_stds

# means, stds = get_mean_and_std_calculated(train_img_path_for_ImageFolder_dataloader)
means = [0.44895144719250346, 0.4951483853617493, 0.4498602793532975]
stds = [0.21388493326245522, 0.24571933703763144, 0.22413276759337405]

normalize_transform = torchvision.transforms.Compose([
    torchvision.transforms.Normalize(mean=means, std=stds) # always normalize only after tensor conversion
    ])

In [8]:
train_dataset = SemanticSegmentationDataset(img_paths=train_img_path, mask_paths=train_mask_path, size=img_size, mode='binary', normalize=normalize_transform, transformations=None)
val_dataset = SemanticSegmentationDataset(img_paths=val_img_path, mask_paths=val_mask_path, size=img_size, mode='binary', normalize=normalize_transform, transformations=None)
test_dataset = SemanticSegmentationDataset(img_paths=val_img_path, mask_paths=None, size=img_size, normalize=normalize_transform, transformations=None)

temp = train_dataset.__getitem__(1)

# Create data loaders
train_loader = DataLoader(
    dataset     = train_dataset,
    batch_size  = config['batch_size'],
    shuffle     = True,
    num_workers = 4,
    pin_memory  = True
)

val_loader = DataLoader(
    dataset     = val_dataset,
    batch_size  = config['batch_size'],
    shuffle     = False,
    num_workers = 2
)

test_loader = DataLoader(
    dataset     = test_dataset,
    batch_size  = config['batch_size'],
    shuffle     = False,
    drop_last   = False,
    num_workers = 2)

In [9]:
model = Unet(
    # backbone='convnext_base', # backbone network name
    backbone='resnet50',
    preprocessing=True,
    in_channels=3, # input channels (1 for gray-scale images, 3 for RGB, etc.)
    num_classes=config["num_classes"],  # output channels (number of classes in your dataset)
    encoder_freeze=True,
    pretrained=True,
)

# model = model().to(device)
random_tensor = torch.rand((1, 3, 1024, 1024))
print(model.predict(random_tensor))

tensor([[[[ 5.2115,  0.2370,  1.2043,  ..., -1.7852, -0.2246, -0.8285],
          [ 4.4277, -2.7750, -1.7991,  ..., -2.6041, -0.2557,  0.4090],
          [ 6.2260, -1.7035, -1.0803,  ..., -0.9164,  0.0862,  1.4595],
          ...,
          [ 5.8238, -4.7048, -3.0375,  ..., -0.4114, -0.4451,  3.8712],
          [ 7.0829,  1.0555,  0.8149,  ...,  2.0249,  0.6730,  0.4979],
          [ 1.0909, -3.1810, -2.8109,  ...,  0.7426,  0.1331, -0.3203]]]])


In [10]:
# Define wandb credentials

import wandb
wandb.login(key="49efd84d0e342f343fb91401332234dea4a3ffe2") #API Key is in your wandb account, under settings (wandb.ai/settings)

run = wandb.init(
    name = "UNet_with_resnet_50", ## Wandb creates random run names if you skip this field
    reinit = True, ### Allows reinitalizing runs when you re-run this cell
    # run_id = ### Insert specific run id here if you want to resume a previous run
    # resume = "must" ### You need this to resume previous runs, but comment out reinit = True when using this
    project = "IDL_Project_Segmentation", ### Project should be created in your wandb account
    config = config ### Wandb Config for your run
)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33msushantj[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/sush/.netrc


In [11]:
checkpoint_path = '/home/sush/klab2/Segmentation_Models/checkpoints/checkpoint.pth'

In [12]:
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.AdamW(params, lr=config['lr'], betas=(0.9, 0.999), weight_decay=0.05)
gamma = 0.8
milestones = [10,20,40,60,80]

# scheduler1 = torch.optim.lr_scheduler.ConstantLR(optimizer, factor=0.9, total_iters=5)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=gamma)
# scheduler3 = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')
# scheduler = torch.optim.lr_scheduler.SequentialLR(optimizer, schedulers=[scheduler1, scheduler2, scheduler3], milestones=[20, 51])

mixed_precision_scaler = torch.cuda.amp.GradScaler()

trainer = Trainer(
    model=model,              # UNet model with Resnet50 backbone
    criterion=DiceLoss(),     # loss function
    optimizer=optimizer,
    epochs=10,
    # scaler=mixed_precision_scaler,
    lr_scheduler=scheduler,
    device=device,
    checkpoint_path=checkpoint_path
)

trainer.fit(train_loader, val_loader)

Epoch 1: 100%|[32m██████████[0m| 98/98 [00:31<00:00,  3.15 training-batch/s, loss=0.133]
Validation: 100%|[32m██████████[0m| 98/98 [00:10<00:00,  8.98 validating-batch/s, loss=0.144]


Val Loss 0.1506
Saving model


Epoch 2:  18%|[32m█▊        [0m| 18/98 [00:06<00:26,  2.97 training-batch/s, loss=0.126]
Traning Model on 10 epochs:  10%|█         | 1/10 [00:48<07:16, 48.54s/it]


KeyboardInterrupt: 

In [17]:
# Check if the checkpoint file exists
if os.path.exists(checkpoint_path):
    # If the checkpoint file exists, load the checkpoint
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch = checkpoint['epoch']  # last epoch
    val_loss = checkpoint['val_loss']  # Update the best accuracy
    # Load the checkpoint and update the scheduler state if it exists in the checkpoint
    if 'scheduler_state_dict' in checkpoint:
        scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
        print("Loaded scheduler state from checkpoint.")
    else:
        print("No scheduler state found in checkpoint.")
    print("Loaded checkpoint from:", checkpoint_path)
else:
    # If the checkpoint file does not exist, start training from scratch
    start_epoch = 0
    print("No checkpoint found at:", checkpoint_path)

print(model)

Loaded scheduler state from checkpoint.
Loaded checkpoint from: /home/sush/klab2/Segmentation_Models/checkpoints/checkpoint.pth
Unet(
  (encoder): FeatureListNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act1): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act1): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (drop_block): Identity()
        (act2): ReLU(inplace=True)
        (aa): Ident