# Step 1: Imports and Setup

In [None]:
#!pip install roboflow

# original images
#from roboflow import Roboflow
#rf = Roboflow(api_key="odKT1WyVSQ7iL8vb5eeU")
#project = rf.workspace("elteikai90").project("palm-trees-lfhgj")
#dataset = project.version(3).download("coco-segmentation")


# augmented images
!pip install roboflow

from roboflow import Roboflow
rf = Roboflow(api_key="odKT1WyVSQ7iL8vb5eeU")
project = rf.workspace("elteikai90").project("palm-trees-lfhgj")
dataset = project.version(2).download("coco-segmentation")


# Step 1: Imports and Setup

Instead of manually crafting diagrams I decided to use torchvision's built in functions to visualize the data. This is a good way to get a feel for the data and to make sure that the data is being loaded correctly.

In [None]:
import os, random, copy, json, requests
import numpy as np
import torch
import torch.utils.data
from torchvision.models.detection import maskrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from torchvision.datasets import CocoDetection, wrap_dataset_for_transforms_v2
from torchvision.transforms import v2 as T
from torchvision.utils import make_grid, draw_bounding_boxes, draw_segmentation_masks
import torchvision.transforms.functional as F
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
from matplotlib import patches, text
from PIL import Image
from pycocotools.coco import COCO

device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
print(f'device: {device}')

# Step 1.1: Helpers for visualizastion

In [None]:
top = 0
left = 1
bottom = 2
right = 3


def calculate_iou(bbox_a, bbox_b):
    yA = torch.max(bbox_a[:, top], bbox_b[:, top])
    xA = torch.max(bbox_a[:, left], bbox_b[:, left])

    yB = torch.min(bbox_a[:, bottom], bbox_b[:, bottom])
    xB = torch.min(bbox_a[:, right], bbox_b[:, right])

    interArea = (xB - xA) * (yB - yA)

    mask_a = (xB - xA) > 0
    mask_b = (yB - yA) > 0

    no_intersect_mask = (mask_a & mask_b) ^ 1

    # compute the area of both the prediction and ground-truth
    # rectangles
    boxAArea = (bbox_a[:, top] - bbox_a[:, bottom]) * (bbox_a[:, left] - bbox_a[:, right])
    boxBArea = (bbox_b[:, top] - bbox_b[:, bottom]) * (bbox_b[:, left] - bbox_b[:, right])

    # compute the intersection over union by taking the intersection
    # area and dividing it by the sum of prediction + ground-truth
    # areas - the interesection area

    iou = interArea.float() / (boxAArea + boxBArea - interArea).float()

    iou[no_intersect_mask] = 0

    # return the intersection over union value
    return iou


def img_to_np(img):
    img = img.cpu().numpy()
    img = np.copy(img)
    img = np.swapaxes(img, 0, 1)
    img = np.swapaxes(img, 1, 2)

    return img

    
def add_bbox(ax, bbox, color, alpha=1, text=""):
    top, left, width, height = bbox

    # Create a rectangle patch and add it to the axis
    rect = patches.Rectangle((left, top), width, height, linewidth=2, edgecolor=color, facecolor='none', alpha=alpha)
    ax.add_patch(rect)
    
    ax.text(
        left,
        top,
        text,
        fontsize=12,
        bbox=dict(
            boxstyle="square",
            fc=color,
        )
    )
    
def visualize_anchors(ax, anchors):
    for a in anchors:
        add_bbox(ax, torch.round(a).long(), [1.0, 1.0, 1.0], 0.2)
        
def visualize_rpn(ax, nms_reg, nms_cls, img, color=[1.0, 1.0, 1.0], draw_all=False):
    np_img = img_to_np(img)

    for r, c in zip(nms_reg, nms_cls):
        if c >= 0.5 or draw_all:
            add_bbox(ax, r, color=color, text="c={:.2f}".format(c))

            
def visualize_rcnn(ax, rcnn_reg, rcnn_cls, color_map):
    for rcnn_r, rcnn_c in zip(rcnn_reg, rcnn_cls):
        cls, index = torch.max(rcnn_c, dim=0)
        
        if index == 0:
            continue
        
        cls_color = color_map[0]
        if int(index) in color_map:
            cls_color = color_map[int(index)]

        add_bbox(ax, rcnn_r, color=cls_color, text="class={}: {:.2f}".format(index, cls))


In [None]:
def coco_segmentation_to_mask(segmentation, bbox, nopad = True, width=512, height=512):
    """
    Converts COCO polygon segmentation to binary mask format.
    Assumes the image dimensions are known (width, height).
    """
    import numpy as np
    from PIL import Image, ImageDraw

    mask = np.zeros((height, width), dtype=np.uint8)
    polygon = segmentation[0]  # Assuming single polygon per object
    img = Image.fromarray(mask)
    draw = ImageDraw.Draw(img)
    draw.polygon(polygon, outline=1, fill=1)
    mask = np.array(img)

    if nopad:
        # Crop to bbox to reduce mask size
        x, y, w, h = map(int, bbox)
        mask = mask[y:y+h, x:x+w]

    return mask

In [None]:
def visualize_mask_on_image(image, target, image_shape):
    # Generate mask
    mask = coco_segmentation_to_mask(target['segmentation'], target['bbox'], width=image_shape[1], height=image_shape[0])

    # Resize mask to match the image size if needed
    mask = np.pad(mask, ((target['bbox'][1], image_shape[0] - mask.shape[0] - target['bbox'][1]), 
                         (target['bbox'][0], image_shape[1] - mask.shape[1] - target['bbox'][0])), 
                  'constant')

    # Create an RGBA version of the image with the mask applied
    masked_image = np.concatenate([image, np.zeros((*image.shape[:2], 1), dtype=image.dtype)], axis=-1)
    mask_color = [0, 255, 255, 128]  # Cyan, 50% transparent 
    masked_image[mask == 1] = mask_color

    return masked_image

In [None]:
def image_to_tensor(image):
    tensor_image = image.clone().detach()
    tensor_image = tensor_image.type(torch.float)
    tensor_image = tensor_image / 255.0
    return tensor_image

# Step 2: Define the Dataset Class

It needs to create a dataset class that can read images and their corresponding annotations from the COCO format.

In [None]:
class PalmTreesDataset(CocoDetection):
    def __init__(self, root, annFile, transforms=None):
        super(PalmTreesDataset, self).__init__(root, annFile)
        self.coco = COCO(annFile)
        self.transforms = transforms

        ids = self.coco.getCatIds()
        le = LabelEncoder()
        le.fit(ids)
        self.le = le

    def __getitem__(self, idx):
        item = super(PalmTreesDataset, self).__getitem__(idx)
        image = item[0]
        target = item[1]

        if self.transforms is not None:
            image = self.transforms(image)

        return image, target

# Step 3: Define Transformations

Define the transformations for the images. I have already introduced some augmentation with Roboflow.
> The example implemented a random flip, I use a built-in method from torchvision.

In [None]:
def get_transform(train):
    transforms = []
    #if train:
    #    transforms.append(T.RandomHorizontalFlip(0.5))
    transforms.append(T.ToDtype(torch.float, scale=True))
    transforms.append(T.ToImage())
    return T.Compose(transforms)


# Step 4: Load the Data

Load the data using the custom dataset class.

In [None]:
BATCH_SIZE=16

# Paths to the data
train_dir = f'{dataset.location}/train'
train_ann = f'{dataset.location}/train/_annotations.coco.json'
valid_dir = f'{dataset.location}/valid'
valid_ann = f'{dataset.location}/valid/_annotations.coco.json'
test_dir = f'{dataset.location}/test'
test_ann = f'{dataset.location}/test/_annotations.coco.json'

# Datasets
dataset_train = PalmTreesDataset(train_dir, train_ann, get_transform(train=True))
dataset_valid = PalmTreesDataset(valid_dir, valid_ann, get_transform(train=False))
dataset_test = PalmTreesDataset(test_dir, test_ann, get_transform(train=False))

dataset_train = wrap_dataset_for_transforms_v2(dataset_train, target_keys=("boxes", "labels", "masks"))
dataset_valid = wrap_dataset_for_transforms_v2(dataset_valid, target_keys=("boxes", "labels", "masks"))
dataset_test = wrap_dataset_for_transforms_v2(dataset_test, target_keys=("boxes", "labels", "masks"))

# Data loaders
data_loader_train = torch.utils.data.DataLoader(
    dataset_train, batch_size=BATCH_SIZE, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))

data_loader_valid = torch.utils.data.DataLoader(
    dataset_valid, batch_size=BATCH_SIZE, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

data_loader_test = torch.utils.data.DataLoader(
    dataset_test, batch_size=BATCH_SIZE, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

print("\n======================\n")
print(f'train size: {len(dataset_train)}')
print(f'valid size: {len(dataset_valid)}')
print(f'test size: {len(dataset_test)}')

# Show some images from the dataset

I found `torchvision.utils.make_grid` clumsy, so I wrote my own function to display the images.

In [None]:
num_images = 20  # Total number of images to display
num_columns = 5  # Number of columns in the grid
num_rows = num_images // num_columns + (num_images % num_columns > 0)  # Calculate the number of rows needed

# Create a figure with the specified number of subplots
fig, axes = plt.subplots(num_rows, num_columns, figsize=(num_columns * 2, num_rows * 2))

# Flatten the axes array for easy iteration
axes = axes.flatten()

# Loop through the dataset and display each image
for i, (image, targets) in enumerate(dataset_train):
    if i >= num_images:
        break  # Stop after displaying the desired number of images
   
    ax = axes[i]
    ax.axis('off')  # Hide the axes

    # transform the floating point image containing values in [0, 1] to [0, 255] 
    image = image.clone().detach()
    # then transform it to uint8.    
    image = image.type(torch.uint8)

    boxes = targets["boxes"]
    colors = ["red"] * len(boxes)
    annotated_image = draw_bounding_boxes(image, boxes=boxes, colors=colors, width=5)
    
    # display masks
    masks = targets["masks"].clone().detach()
    masks = masks.type(torch.bool)
    
    masks_img = draw_segmentation_masks(annotated_image, masks, alpha=0.5)
    ax.imshow(F.to_pil_image(masks_img))

    '''
    # display masks
    image_np = image.permute(1, 2, 0).numpy()  # Convert to numpy and reshape for plotting
    for target in targets:
        masked_image = visualize_mask_on_image(image_np, target, image_np.shape)
        masked_image = masked_image.astype(np.uint8)
        ax.imshow(Image.fromarray(masked_image))  # Overlay the mask
    '''

# If there are any empty subplots, hide them
for j in range(i, num_rows * num_columns):
    axes[j].axis('off')

plt.tight_layout()
plt.show()

# Test the original COCO trained model

In [None]:
CLASS_NAMES91 = ['background', 'person', 'bike', 'auto', 'motorbike', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'hydrant' , 'sign', 'stop sign', 'parking clock', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', ' zebra', 'giraffe', 'hat', 'backpack', 'umbrella', 'shoes', 'glasses', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard' , 'sports ball', 'flying kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hotdog', 'pizza', 'doughnut', 'cake', 'chair ', 'couch', 'potted plant', 'bed', 'mirror', 'dining table', 'window', 'table', 'toilet', 'door', 'tv', 'laptop', 'mouse' , 'remote control', 'keyboard', 'mobile phone', 'microwave oven', 'oven', 'toaster', 'dishwasher', 'refrigerator', 'smoothie', 'book', 'clock', 'vase' , 'scissors', 'teddy bear', 'hair dryer', 'toothbrush', 'comb']

model = maskrcnn_resnet50_fpn(weights='DEFAULT')
#model = torch.load('maskrcnn_weights/mask-rcnn-palmtrees.pt')
#model.load_state_dict(torch.load('maskrcnn_weights/mask-rcnn-palmtrees_weights.pt'))
model.to(device)
model.eval()  # Set the model to evaluation mode

# Function to convert image tensor to PIL
def tensor_to_PIL(image):
    return T.ToPILImage()(image).convert('RGB')

# Get a random image and target from the data loader
images, targets = next(iter(data_loader_train))
image = images[random.randint(0, len(images) - 1)]

# Move image to the same device as the model
tensor_image = image_to_tensor(image)
tensor_image = tensor_image.to(device)

with torch.no_grad():
    prediction = model([tensor_image])

# Function to visualize the image and the model predictions
def show_prediction(image, prediction, threshold=0.5):
    plt.figure(figsize=(10, 10))
    
    pred = prediction[0]
    # Filter out predictions with scores lower than the threshold
    mask = pred['scores'] >= threshold

    boxes = pred['boxes'][mask]
    masks = pred['masks'][mask]
    labels = pred['labels'][mask]
    scores = pred['scores'][mask]
    
    # Convert predicted masks to binary (true/false) format
    binary_masks = masks.squeeze(1) > 0.5  # Assuming masks shape is [num_objects, 1, H, W]

    label_names = [CLASS_NAMES91[l] for l in labels.cpu().numpy()]

    # Draw masks and bounding boxes
    image = draw_segmentation_masks(image, binary_masks, alpha=0.5)
    image = draw_bounding_boxes(image, boxes=boxes, colors='red', width=3)

    # Display the label and score
    for i, box in enumerate(boxes):
        box = box.cpu().numpy()
        plt.text(box[0], box[1], f'{label_names[i]}: {scores[i]:.2f}', color='white', fontsize=12, backgroundcolor='red')

    plt.imshow(F.to_pil_image(image))
    plt.axis('off')
    plt.show()

show_prediction(image, prediction)


# Step 5: Build the Model

Modify the build_model function to suit the number of classes in the dataset (background and palm tree).

The COCO annotation format has a lot of information, but we only need the bounding box and the class label.


In [None]:
def build_model(num_classes):
    # Load a pre-trained model
    model = maskrcnn_resnet50_fpn(weights='DEFAULT')

    # Replace the classifier and mask predictor
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, 256, num_classes)

    return model

# Assuming two classes (background and palm tree)
model = build_model(2)

# Step 6: Train the Model

Set up the optimizer, learning rate scheduler, and training loop.

In [None]:
def visualize_predictions(images, predictions, targets=None, threshold=0.5):
    fig, axs = plt.subplots(len(images), 2, figsize=(12, len(images) * 5))

    for i, (image, prediction, target) in enumerate(zip(images, predictions, targets)):
        # Filter out predictions with scores lower than the threshold
        mask = prediction['scores'] >= threshold

        boxes = prediction['boxes'][mask]
        masks = prediction['masks'][mask]
        labels = prediction['labels'][mask]
        scores = prediction['scores'][mask]

        # Convert predicted masks to binary (true/false) format
        binary_masks = masks.squeeze(1) > 0.5  # Assuming masks shape is [num_objects, 1, H, W]
        
        # Draw masks and bounding boxes
        pred_image = draw_segmentation_masks(image, binary_masks, alpha=0.5)
        pred_image = draw_bounding_boxes(pred_image, boxes=boxes, colors='red', width=3)
        
        
        axs[i, 0].imshow(F.to_pil_image(pred_image))
        axs[i, 0].set_title('Predictions')
        axs[i, 0].axis('off')

        # Display the label and score
        for j, box in enumerate(boxes):
            box = box.cpu().numpy()
            axs[i, 0].text(box[0], box[1], f'{scores[j]:.2f}', color='white', fontsize=12, backgroundcolor='red')

        
        # Draw ground truth if available
        if targets is not None:
            boxes = target["boxes"]
            colors = ["red"] * len(boxes)
            gt_image = draw_bounding_boxes(image, boxes=boxes, colors=colors, width=3)
    
            # display masks
            masks = target["masks"].clone().detach()
            masks = masks.type(torch.bool)
            gt_image = draw_segmentation_masks(gt_image, masks, alpha=0.5)

            axs[i, 1].imshow(F.to_pil_image(gt_image))
            axs[i, 1].set_title('Ground Truth')
            axs[i, 1].axis('off')
        

    plt.tight_layout()
    plt.show()


In [None]:
model = model.to(device)

train_loss_dict_t = {}
train_loss_dict_t['loss_classifier']=0
train_loss_dict_t['loss_box_reg']=0
train_loss_dict_t['loss_objectness']=0
train_loss_dict_t['loss_rpn_box_reg']=0

# SGD
#optimizer = torch.optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)
# Adam
optimizer = torch.optim.AdamW(params=model.parameters(), lr=1e-5)

# Learning Rate Scheduler (Optional): A learning rate scheduler can help adjust the learning rate during training to improve convergence.
#lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
lr_scheduler = None

# Training Loop
# number of epochs
num_epochs = 10
hist_train_classifier=[]
hist_valid_classifier=[]
hist_train_box_reg=[]
hist_valid_box_reg=[]
hist_train_objectness=[]
hist_valid_objectness=[]
hist_train_rpn_box_reg=[]
hist_valid_rpn_box_reg=[]

best_loss=float('inf')
best_model_wts = copy.deepcopy(model.state_dict())
early_stop_tolerant_count=0
early_stop_tolerant=10;


for epoch in range(num_epochs):
    model.train()
    train_loss_dict_t = {}
    train_loss_dict_t['loss_classifier']=0
    train_loss_dict_t['loss_box_reg']=0
    train_loss_dict_t['loss_objectness']=0
    train_loss_dict_t['loss_rpn_box_reg']=0
    train_losses_t=0
    for images, targets in data_loader_train:
        
        tensor_images = [image_to_tensor(image) for image in images]
        images = list(tensor_image.to(device) for tensor_image in tensor_images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        

        train_loss_dict = model(images, targets)
        
        train_losses = sum(loss for loss in train_loss_dict.values())
        train_loss_dict_t={x: train_loss_dict_t.get(x, 0) + train_loss_dict.get(x, 0)/len(dataset_train)*BATCH_SIZE  for x in set(train_loss_dict)}
        train_losses_t=train_losses_t+train_losses 
        optimizer.zero_grad()
        train_losses.backward()
        optimizer.step()
        
        if lr_scheduler is not None:
            lr_scheduler.step()
    

    #===========================================================    
    # Visualize some predictions
    model.eval()
    images, targets = next(iter(data_loader_valid))
    tensor_images = []
    for image in images:
        # Move image to the same device as the model
        tensor_image = image_to_tensor(image)
        tensor_image = tensor_image.to(device)
        tensor_images.append(tensor_image)

    tensor_images = list(img.to(device) for img in tensor_images)
    with torch.no_grad():
        predictions = model(tensor_images)

    visualize_predictions(images, predictions, targets)
    model.train()
    #===========================================================


    # Validation    
    with torch.no_grad():        # model.eval() would give different output
        valid_loss_dict_t = {}
        valid_loss_dict_t['loss_classifier']=0
        valid_loss_dict_t['loss_box_reg']=0
        valid_loss_dict_t['loss_objectness']=0
        valid_loss_dict_t['loss_rpn_box_reg']=0
        valid_losses_t=0
        for images, targets in data_loader_valid:

            tensor_images = [image_to_tensor(image) for image in images]
            images = list(tensor_image.to(device) for tensor_image in tensor_images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
            
            valid_loss_dict = model(images, targets)
            
            valid_losses = sum(loss for loss in valid_loss_dict.values())
            valid_loss_dict_t={x: valid_loss_dict_t.get(x, 0) + valid_loss_dict.get(x, 0)/len(dataset_valid)*BATCH_SIZE  for x in set(valid_loss_dict)}
            valid_losses_t=valid_losses_t+valid_losses  
        valid_losses0=valid_losses/len(dataset_valid)*BATCH_SIZE 
        hist_train_classifier.append(np.array(train_loss_dict_t['loss_classifier'].cpu().detach()))
        hist_valid_classifier.append(np.array(valid_loss_dict_t['loss_classifier'].cpu().detach()))
        hist_train_box_reg.append(np.array(train_loss_dict_t['loss_box_reg'].cpu().detach()))
        hist_valid_box_reg.append(np.array(valid_loss_dict_t['loss_box_reg'].cpu().detach()))
        hist_train_objectness.append(np.array(train_loss_dict_t['loss_objectness'].cpu().detach()))
        hist_valid_objectness.append(np.array(valid_loss_dict_t['loss_objectness'].cpu().detach()))
        hist_train_rpn_box_reg.append(np.array(train_loss_dict_t['loss_rpn_box_reg'].cpu().detach()))
        hist_valid_rpn_box_reg.append(np.array(valid_loss_dict_t['loss_rpn_box_reg'].cpu().detach()))

        # Always save the current best model based on the validation data, and stop the training if no improvements happen after a certain epoch.    
        early_stop_tolerant_count=early_stop_tolerant_count+1
        if valid_losses_t < best_loss:
            early_stop_tolerant_count=0
            best_loss = valid_losses_t
            best_model_wts = copy.deepcopy(model.state_dict())
        if early_stop_tolerant_count>=early_stop_tolerant:
            break 
        if epoch % 1 != 0:
            continue       
        # Display some information along the training
        print("Epoch: ", epoch, ".") 
        print("Training losses: ")
        print("Classifier loss: ", format(train_loss_dict_t['loss_classifier'].item(), ".2f"))
        print("Box regression loss: ", format(train_loss_dict_t['loss_box_reg'].item(), ".2f"))
        print("Objectness loss: ", format(train_loss_dict_t['loss_objectness'].item(), ".2f"))
        print("RPN box regression loss: ", format(train_loss_dict_t['loss_rpn_box_reg'].item(), ".2f"))
        print("Validation losses: ")
        print("Classifier loss: ", format(valid_loss_dict_t['loss_classifier'].item(), ".2f"))
        print("Box regression loss: ", format(valid_loss_dict_t['loss_box_reg'].item(), ".2f"))
        print("Objectness loss: ", format(valid_loss_dict_t['loss_objectness'].item(), ".2f"))
        print("RPN box regression loss: ", format(valid_loss_dict_t['loss_rpn_box_reg'].item(), ".2f"))



# Step 7: Save model

In [None]:
if os.path.isdir('maskrcnn_weights')==False: os.mkdir('maskrcnn_weights') 
torch.save(model, 'maskrcnn_weights/mask-rcnn-palmtrees.pt')
torch.save(best_model_wts, 'maskrcnn_weights/mask-rcnn-palmtrees_weights.pt') 


# Step 8: Test model

In [None]:
#Test
torch.cuda.empty_cache()
model.load_state_dict(best_model_wts)
with torch.no_grad():        # model.eval() would give different output
    test_loss_dict_t = {}
    test_loss_dict_t['loss_classifier']=0
    test_loss_dict_t['loss_box_reg']=0
    test_loss_dict_t['loss_objectness']=0
    test_loss_dict_t['loss_rpn_box_reg']=0
    test_losses_t=0

    for images, targets in data_loader_test:

            tensor_images = [image_to_tensor(image) for image in images]
            images = list(tensor_image.to(device) for tensor_image in tensor_images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            test_loss_dict = model(images, targets)

            test_losses = sum(loss for loss in test_loss_dict.values())
            test_loss_dict_t={x: test_loss_dict_t.get(x, 0) + test_loss_dict.get(x, 0)/len(dataset_test)*BATCH_SIZE  for x in set(test_loss_dict)}


print("Test losses: ")
print("Classifier loss: ", format(test_loss_dict_t['loss_classifier'].item(), ".2f"))
print("Box regression loss: ", format(test_loss_dict_t['loss_box_reg'].item(), ".2f"))
print("Objectness loss: ", format(test_loss_dict_t['loss_objectness'].item(), ".2f"))
print("RPN box regression loss: ", format(test_loss_dict_t['loss_rpn_box_reg'].item(), ".2f"))

In [None]:
model.load_state_dict(torch.load('maskrcnn_weights/mask-rcnn-palmtrees_weights.pt'))