<center><h1>Fast Evaluate Multiple Pytorch Models Notebook</h1>
<h2>Matthias Bartolo</h2>

</center>

#### Package Imports and Constants - Remain the same

In [None]:
import json
import random
import torch
import torchvision
from torchvision.models.detection.retinanet import RetinaNetClassificationHead
from torchvision.models.detection.fcos import FCOSClassificationHead
from torchvision.models.detection.ssd import SSDClassificationHead
from torchvision.models.detection.ssdlite import SSDLiteClassificationHead
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from functools import partial

In [None]:
DIR_INPUT = '../datasets/SODA-Litter-Dataset-All-2' # COCO-Dataset # BDW-Dataset-1 #UAVVASTE-1
MODEL_NAME = 'SODA_Dataset_Tiled_Multi' #'SODA_Dataset_Tiled_Single' # 'BDW_Dataset_Test_Single' # 'BDW_Dataset_Test2_Single' # 'UAVVASTE_Dataset_Test_Single'
DIR_TRAIN = f'{DIR_INPUT}/train'
DIR_VALID = f'{DIR_INPUT}/valid'
DIR_TEST = f'{DIR_INPUT}/test'
DIR_IMAGES = 'images'
DIR_ANNOTATIONS = '_annotations.coco.json'
IMG_RESIZE = (800, 800)

#### Classes - Remain the same

In [None]:
# For COCO Dataset
# # To get the classes from the annotation file
# classes_annotation_path = f'{DIR_TRAIN}/{DIR_ANNOTATIONS}'

# # Load the annotation file
# with open(classes_annotation_path, 'r') as f:
#     coco_data = json.load(f)

# # Extract class categories
# CLASSES = [
#     {
#         "id": category["id"],
#         "name": category["name"],
#         "supercategory": category.get("supercategory", "None"),
#         "color": (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)),
#     }
#     for category in coco_data["categories"]
# ]

# # Change id = 0 to Background
# CLASSES[0]['name'] = 'Background'

# For Litter Multi-Class:
CLASSES = [
    {
        "id": 0,
        "name": "Background",
        "supercategory": "Background",
        "color": [0, 0, 0],  # Black for background
    },  # Background Class which was needed for FasterRCNN
    {
        "id": 1,
        "name": "Clear Plastic Bottle",
        "supercategory": "Bottle",
        "color": [180, 240, 240],  # Muted green for clear plastic bottle
    },
    {
        "id": 2,
        "name": "Drink Can",
        "supercategory": "Can",
        "color": [60, 60, 220],  # Muted red for drink can
    },
    {
        "id": 3,
        "name": "Drink Carton",
        "supercategory": "Carton",
        "color": [60, 180, 255],  # Muted orange for drink carton
    },
    {
        "id": 4,
        "name": "Glass Bottle",
        "supercategory": "Bottle",
        "color": [0, 130, 0],  # Muted blue for glass bottle
    },
    {
        "id": 5,
        "name": "Glass Jar",
        "supercategory": "Glass Jar",
        "color": [100, 70, 50],  # Muted brownish-red for glass jar
    },
    {
        "id": 6,
        "name": "Other Plastic Bottle",
        "supercategory": "Bottle",
        "color": [200, 200, 100],  # Muted cyan for other plastic bottle
    }
]

# For Litter Binary:
# CLASSES = [
#     {
#         "id": 0,
#         "name": "Background",
#         "supercategory": "Background",
#         "color": [0, 0, 0],  # Black for background
#     },  # Background Class which was needed for FasterRCNN
#     {
#         "id": 1,
#         "name": "Litter",
#         "supercategory": "Litter",
#         "color": [80, 150, 80],  # Muted green for clear plastic bottle
#     },
# ]# Also labels.append(1)


NUM_CLASSES = len(CLASSES) # Number of classes in the dataset

#### Image Means and Stds - Remain the same

In [None]:
# Calculating the image mean and standard deviation
"""
From Pytorch documentation:
    - mean (sequence) – Sequence of means for each channel.
    - std (sequence) – Sequence of standard deviations for each channel.
    https://pytorch.org/vision/0.9/_modules/torchvision/models/detection/faster_rcnn.html

    Varies based on the dataset used. For COCO dataset, the mean and standard deviation are:
        - mean = [0.485, 0.456, 0.406]
        - std = [0.229, 0.224, 0.225]

    IMPORTANT: CHANGE THESE FOR FINAL DATASET
"""
# COCO dataset
# img_means = [
#     0.338, 0.320, 0.292, 0.077
# ]

# img_stds = [
#     0.314, 0.304, 0.302, 0.126
# ]

# Pascal VOC dataset
# img_means = [0.452, 0.431, 0.399, 0.142]
# img_stds = [0.275, 0.273, 0.284, 0.216]

# SODA Litter dataset
img_means = [0.467, 0.43, 0.357, 0.021]
img_stds = [0.255, 0.24, 0.233, 0.129]

# BDW dataset
# img_means = [0.534, 0.554, 0.496, 0.041]
# img_stds = [0.183, 0.159, 0.205, 0.182]

# UAVVASTE dataset
# img_means = [0.522, 0.52, 0.446, 0.006]
# img_stds = [0.168, 0.169, 0.178, 0.073]


ALL_PRIVILEGED_INFORMATION_DIRS = [
    "Box_Mask",
]

#### Batch Size - Remain the same

In [None]:
batch_size = 8#4
num_workers = 4#0

#### Specifications - Change

#### RetinaNet Baseline Model

In [None]:
# # Directory Inputs
# SAVE_DIR = f'../runs/RetinaNet/RetinaNet_{MODEL_NAME}_baseline1'

# # Privileged Information Paths
# PRIVILEGED_INFORMATION_DIRS = [

# # Special Constructed Features
#     # "Box_Mask",
# ]

# # Number of input image channels RGB + Extras
# NUM_CHANNELS = 3 + len(PRIVILEGED_INFORMATION_DIRS)

# # Defining Model

# # Set the device to CUDA or CPU
# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# # Load the RetinaNet model with pretrained weights
# weights = torchvision.models.detection.RetinaNet_ResNet50_FPN_Weights.DEFAULT
# pre_trained_model = torchvision.models.detection.retinanet_resnet50_fpn(weights=weights)

# model = pre_trained_model.to(device)

# # Modify the first convolutional layer for Multi-channel input
# model.backbone.body.conv1 = torch.nn.Conv2d(NUM_CHANNELS, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)

# # Initialize the first convolutional layer's weights
# torch.nn.init.kaiming_normal_(model.backbone.body.conv1.weight, mode='fan_out', nonlinearity='relu')

# # Get the number of input features for the classification head
# in_features = model.head.classification_head.cls_logits.in_channels
# num_anchors = model.head.classification_head.num_anchors

# # Modify classification head to match the number of classes for your task
# # RetinaNetClassificationHead is redefined to include the correct number of classes
# model.head.classification_head = RetinaNetClassificationHead(
#     in_channels=in_features,
#     num_classes=NUM_CLASSES,
#     num_anchors=num_anchors,
#     norm_layer=partial(torch.nn.GroupNorm, 32)
# )

# # Move the model to the correct device (e.g., CUDA or CPU)
# model = model.to(device)

# # Verify the model structure
# print(model)

# import evaluate

# evaluate.main_function(
#     DIR_TEST=DIR_TEST,
#     DIR_IMAGES=DIR_IMAGES,
#     DIR_ANNOTATIONS=DIR_ANNOTATIONS,
#     IMG_RESIZE=IMG_RESIZE,
#     SAVE_DIR=SAVE_DIR,
#     CLASSES=CLASSES,
#     NUM_CLASSES=NUM_CLASSES,
#     PRIVILEGED_INFORMATION_DIRS=PRIVILEGED_INFORMATION_DIRS,
#     NUM_CHANNELS=NUM_CHANNELS,
#     img_means=img_means,
#     img_stds=img_stds,
#     ALL_PRIVILEGED_INFORMATION_DIRS=ALL_PRIVILEGED_INFORMATION_DIRS,
#     model=model,
#     BATCH_SIZE=batch_size,
#     NUM_WORKERS=num_workers,
#     model_name='RetinaNet',
# )

# del model

#### RetinaNet Teacher Model

In [None]:
# # Directory Inputs
# SAVE_DIR = f'../runs/RetinaNet/RetinaNet_{MODEL_NAME}_teacher1'

# # Privileged Information Paths
# PRIVILEGED_INFORMATION_DIRS = [

# # Special Constructed Features
#     "Box_Mask",
# ]

# # Number of input image channels RGB + Extras
# NUM_CHANNELS = 3 + len(PRIVILEGED_INFORMATION_DIRS)

# # Defining Model

# # Set the device to CUDA or CPU
# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# # Load the RetinaNet model with pretrained weights
# weights = torchvision.models.detection.RetinaNet_ResNet50_FPN_Weights.DEFAULT
# pre_trained_model = torchvision.models.detection.retinanet_resnet50_fpn(weights=weights)

# model = pre_trained_model.to(device)

# # Modify the first convolutional layer for Multi-channel input
# model.backbone.body.conv1 = torch.nn.Conv2d(NUM_CHANNELS, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)

# # Initialize the first convolutional layer's weights
# torch.nn.init.kaiming_normal_(model.backbone.body.conv1.weight, mode='fan_out', nonlinearity='relu')

# # Get the number of input features for the classification head
# in_features = model.head.classification_head.cls_logits.in_channels
# num_anchors = model.head.classification_head.num_anchors

# # Modify classification head to match the number of classes for your task
# # RetinaNetClassificationHead is redefined to include the correct number of classes
# model.head.classification_head = RetinaNetClassificationHead(
#     in_channels=in_features,
#     num_classes=NUM_CLASSES,
#     num_anchors=num_anchors,
#     norm_layer=partial(torch.nn.GroupNorm, 32)
# )

# # Move the model to the correct device (e.g., CUDA or CPU)
# model = model.to(device)

# # Verify the model structure
# print(model)

# import evaluate

# evaluate.main_function(
#     DIR_TEST=DIR_TEST,
#     DIR_IMAGES=DIR_IMAGES,
#     DIR_ANNOTATIONS=DIR_ANNOTATIONS,
#     IMG_RESIZE=IMG_RESIZE,
#     SAVE_DIR=SAVE_DIR,
#     CLASSES=CLASSES,
#     NUM_CLASSES=NUM_CLASSES,
#     PRIVILEGED_INFORMATION_DIRS=PRIVILEGED_INFORMATION_DIRS,
#     NUM_CHANNELS=NUM_CHANNELS,
#     img_means=img_means,
#     img_stds=img_stds,
#     ALL_PRIVILEGED_INFORMATION_DIRS=ALL_PRIVILEGED_INFORMATION_DIRS,
#     model=model,
#     BATCH_SIZE=batch_size,
#     NUM_WORKERS=num_workers,
#     model_name='RetinaNet',
# )

# del model

#### RetinaNet Student Model

In [None]:
# # Directory Inputs
# SAVE_DIR = f'../runs/RetinaNet/RetinaNet_{MODEL_NAME}_student1'

# # Privileged Information Paths
# PRIVILEGED_INFORMATION_DIRS = [

# # Special Constructed Features
#     # "Box_Mask",
# ]

# # Number of input image channels RGB + Extras
# NUM_CHANNELS = 3 + len(PRIVILEGED_INFORMATION_DIRS)

# # Defining Student Model
# # Set the device to CUDA or CPU
# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# # Load the RetinaNet model with pretrained weights
# weights = torchvision.models.detection.RetinaNet_ResNet50_FPN_Weights.DEFAULT
# pre_trained_model = torchvision.models.detection.retinanet_resnet50_fpn(weights=weights)

# model = pre_trained_model.to(device)

# # Get the number of input features for the classification head
# in_features = model.head.classification_head.cls_logits.in_channels
# num_anchors = model.head.classification_head.num_anchors

# # Modify classification head to match the number of classes for your task
# # RetinaNetClassificationHead is redefined to include the correct number of classes
# model.head.classification_head = RetinaNetClassificationHead(
#     in_channels=in_features,
#     num_classes=NUM_CLASSES,
#     num_anchors=num_anchors,
#     norm_layer=partial(torch.nn.GroupNorm, 32)
# )

# # Move the model to the correct device (e.g., CUDA or CPU)
# model = model.to(device)

# # Verify the model structure
# print(model)

# import evaluate

# evaluate.main_function(
#     DIR_TEST=DIR_TEST,
#     DIR_IMAGES=DIR_IMAGES,
#     DIR_ANNOTATIONS=DIR_ANNOTATIONS,
#     IMG_RESIZE=IMG_RESIZE,
#     SAVE_DIR=SAVE_DIR,
#     CLASSES=CLASSES,
#     NUM_CLASSES=NUM_CLASSES,
#     PRIVILEGED_INFORMATION_DIRS=PRIVILEGED_INFORMATION_DIRS,
#     NUM_CHANNELS=NUM_CHANNELS,
#     img_means=img_means,
#     img_stds=img_stds,
#     ALL_PRIVILEGED_INFORMATION_DIRS=ALL_PRIVILEGED_INFORMATION_DIRS,
#     model=model,
#     BATCH_SIZE=batch_size,
#     NUM_WORKERS=num_workers,
#     model_name='RetinaNet',
# )

# del model

In [None]:
# # Directory Inputs
# SAVE_DIR = f'../runs/RetinaNet/RetinaNet_{MODEL_NAME}_student2'

# # Privileged Information Paths
# PRIVILEGED_INFORMATION_DIRS = [

# # Special Constructed Features
#     # "Box_Mask",
# ]

# # Number of input image channels RGB + Extras
# NUM_CHANNELS = 3 + len(PRIVILEGED_INFORMATION_DIRS)

# # Defining Student Model
# # Set the device to CUDA or CPU
# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# # Load the RetinaNet model with pretrained weights
# weights = torchvision.models.detection.RetinaNet_ResNet50_FPN_Weights.DEFAULT
# pre_trained_model = torchvision.models.detection.retinanet_resnet50_fpn(weights=weights)

# model = pre_trained_model.to(device)

# # Get the number of input features for the classification head
# in_features = model.head.classification_head.cls_logits.in_channels
# num_anchors = model.head.classification_head.num_anchors

# # Modify classification head to match the number of classes for your task
# # RetinaNetClassificationHead is redefined to include the correct number of classes
# model.head.classification_head = RetinaNetClassificationHead(
#     in_channels=in_features,
#     num_classes=NUM_CLASSES,
#     num_anchors=num_anchors,
#     norm_layer=partial(torch.nn.GroupNorm, 32)
# )

# # Move the model to the correct device (e.g., CUDA or CPU)
# model = model.to(device)

# # Verify the model structure
# print(model)

# import evaluate

# evaluate.main_function(
#     DIR_TEST=DIR_TEST,
#     DIR_IMAGES=DIR_IMAGES,
#     DIR_ANNOTATIONS=DIR_ANNOTATIONS,
#     IMG_RESIZE=IMG_RESIZE,
#     SAVE_DIR=SAVE_DIR,
#     CLASSES=CLASSES,
#     NUM_CLASSES=NUM_CLASSES,
#     PRIVILEGED_INFORMATION_DIRS=PRIVILEGED_INFORMATION_DIRS,
#     NUM_CHANNELS=NUM_CHANNELS,
#     img_means=img_means,
#     img_stds=img_stds,
#     ALL_PRIVILEGED_INFORMATION_DIRS=ALL_PRIVILEGED_INFORMATION_DIRS,
#     model=model,
#     BATCH_SIZE=batch_size,
#     NUM_WORKERS=num_workers,
#     model_name='RetinaNet',
# )

# del model

In [None]:
# Directory Inputs
SAVE_DIR = f'../runs/RetinaNet/RetinaNet_{MODEL_NAME}_student3'

# Privileged Information Paths
PRIVILEGED_INFORMATION_DIRS = [

# Special Constructed Features
    # "Box_Mask",
]

# Number of input image channels RGB + Extras
NUM_CHANNELS = 3 + len(PRIVILEGED_INFORMATION_DIRS)

# Defining Student Model
# Set the device to CUDA or CPU
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Load the RetinaNet model with pretrained weights
weights = torchvision.models.detection.RetinaNet_ResNet50_FPN_Weights.DEFAULT
pre_trained_model = torchvision.models.detection.retinanet_resnet50_fpn(weights=weights)

model = pre_trained_model.to(device)

# Get the number of input features for the classification head
in_features = model.head.classification_head.cls_logits.in_channels
num_anchors = model.head.classification_head.num_anchors

# Modify classification head to match the number of classes for your task
# RetinaNetClassificationHead is redefined to include the correct number of classes
model.head.classification_head = RetinaNetClassificationHead(
    in_channels=in_features,
    num_classes=NUM_CLASSES,
    num_anchors=num_anchors,
    norm_layer=partial(torch.nn.GroupNorm, 32)
)

# Move the model to the correct device (e.g., CUDA or CPU)
model = model.to(device)

# Verify the model structure
print(model)

import evaluate

evaluate.main_function(
    DIR_TEST=DIR_TEST,
    DIR_IMAGES=DIR_IMAGES,
    DIR_ANNOTATIONS=DIR_ANNOTATIONS,
    IMG_RESIZE=IMG_RESIZE,
    SAVE_DIR=SAVE_DIR,
    CLASSES=CLASSES,
    NUM_CLASSES=NUM_CLASSES,
    PRIVILEGED_INFORMATION_DIRS=PRIVILEGED_INFORMATION_DIRS,
    NUM_CHANNELS=NUM_CHANNELS,
    img_means=img_means,
    img_stds=img_stds,
    ALL_PRIVILEGED_INFORMATION_DIRS=ALL_PRIVILEGED_INFORMATION_DIRS,
    model=model,
    BATCH_SIZE=batch_size,
    NUM_WORKERS=num_workers,
    model_name='RetinaNet',
)

del model

In [None]:
# Directory Inputs
SAVE_DIR = f'../runs/RetinaNet/RetinaNet_{MODEL_NAME}_student4'

# Privileged Information Paths
PRIVILEGED_INFORMATION_DIRS = [

# Special Constructed Features
    # "Box_Mask",
]

# Number of input image channels RGB + Extras
NUM_CHANNELS = 3 + len(PRIVILEGED_INFORMATION_DIRS)

# Defining Student Model
# Set the device to CUDA or CPU
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Load the RetinaNet model with pretrained weights
weights = torchvision.models.detection.RetinaNet_ResNet50_FPN_Weights.DEFAULT
pre_trained_model = torchvision.models.detection.retinanet_resnet50_fpn(weights=weights)

model = pre_trained_model.to(device)

# Get the number of input features for the classification head
in_features = model.head.classification_head.cls_logits.in_channels
num_anchors = model.head.classification_head.num_anchors

# Modify classification head to match the number of classes for your task
# RetinaNetClassificationHead is redefined to include the correct number of classes
model.head.classification_head = RetinaNetClassificationHead(
    in_channels=in_features,
    num_classes=NUM_CLASSES,
    num_anchors=num_anchors,
    norm_layer=partial(torch.nn.GroupNorm, 32)
)

# Move the model to the correct device (e.g., CUDA or CPU)
model = model.to(device)

# Verify the model structure
print(model)

import evaluate

evaluate.main_function(
    DIR_TEST=DIR_TEST,
    DIR_IMAGES=DIR_IMAGES,
    DIR_ANNOTATIONS=DIR_ANNOTATIONS,
    IMG_RESIZE=IMG_RESIZE,
    SAVE_DIR=SAVE_DIR,
    CLASSES=CLASSES,
    NUM_CLASSES=NUM_CLASSES,
    PRIVILEGED_INFORMATION_DIRS=PRIVILEGED_INFORMATION_DIRS,
    NUM_CHANNELS=NUM_CHANNELS,
    img_means=img_means,
    img_stds=img_stds,
    ALL_PRIVILEGED_INFORMATION_DIRS=ALL_PRIVILEGED_INFORMATION_DIRS,
    model=model,
    BATCH_SIZE=batch_size,
    NUM_WORKERS=num_workers,
    model_name='RetinaNet',
)

del model

#### FCOS Baseline Model

In [None]:
# # Directory Inputs
# SAVE_DIR = f'../runs/FCOS/FCOS_{MODEL_NAME}_baseline1'

# # Privileged Information Paths
# PRIVILEGED_INFORMATION_DIRS = [

# # Special Constructed Features
#     # "Box_Mask",
# ]

# # Number of input image channels RGB + Extras
# NUM_CHANNELS = 3 + len(PRIVILEGED_INFORMATION_DIRS)

# # Defining Model

# # Set the device to CUDA or CPU
# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# # Load the FCOS model with pretrained weights
# weights = torchvision.models.detection.FCOS_ResNet50_FPN_Weights.DEFAULT
# pre_trained_model = torchvision.models.detection.fcos_resnet50_fpn(weights=weights)

# model = pre_trained_model.to(device)

# # Modify the first convolutional layer for 4-channel input
# model.backbone.body.conv1 = torch.nn.Conv2d(NUM_CHANNELS, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)

# # Initialize the first convolutional layer's weights
# torch.nn.init.kaiming_normal_(model.backbone.body.conv1.weight, mode='fan_out', nonlinearity='relu')

# # Get the correct number of input features for the classifier
# # Get the number of input channels from the classification head
# in_features = model.head.classification_head.cls_logits.in_channels
# num_anchors = model.head.classification_head.num_anchors

# # Modify classification head to match the number of classes for your task
# # FCOSClassificationHead is redefined to include the correct number of classes
# model.head.classification_head = FCOSClassificationHead(
#     in_channels=in_features,
#     num_classes=NUM_CLASSES,
#     num_anchors=num_anchors,
#     norm_layer=partial(torch.nn.GroupNorm, 32)
# )

# # Move the model to the correct device (e.g., CUDA or CPU)
# model = model.to(device)

# # Verify the model structure
# print(model)

# import evaluate

# evaluate.main_function(
#     DIR_TEST=DIR_TEST,
#     DIR_IMAGES=DIR_IMAGES,
#     DIR_ANNOTATIONS=DIR_ANNOTATIONS,
#     IMG_RESIZE=IMG_RESIZE,
#     SAVE_DIR=SAVE_DIR,
#     CLASSES=CLASSES,
#     NUM_CLASSES=NUM_CLASSES,
#     PRIVILEGED_INFORMATION_DIRS=PRIVILEGED_INFORMATION_DIRS,
#     NUM_CHANNELS=NUM_CHANNELS,
#     img_means=img_means,
#     img_stds=img_stds,
#     ALL_PRIVILEGED_INFORMATION_DIRS=ALL_PRIVILEGED_INFORMATION_DIRS,
#     model=model,
#     BATCH_SIZE=batch_size,
#     NUM_WORKERS=num_workers,
#     model_name='FCOS',
# )

# del model

#### FCOS Teacher Model

In [None]:
# # Directory Inputs
# SAVE_DIR = f'../runs/FCOS/FCOS_{MODEL_NAME}_teacher1'

# # Privileged Information Paths
# PRIVILEGED_INFORMATION_DIRS = [

# # Special Constructed Features
#     "Box_Mask",
# ]

# # Number of input image channels RGB + Extras
# NUM_CHANNELS = 3 + len(PRIVILEGED_INFORMATION_DIRS)

# # Defining Model

# # Set the device to CUDA or CPU
# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# # Load the FCOS model with pretrained weights
# weights = torchvision.models.detection.FCOS_ResNet50_FPN_Weights.DEFAULT
# pre_trained_model = torchvision.models.detection.fcos_resnet50_fpn(weights=weights)

# model = pre_trained_model.to(device)

# # Modify the first convolutional layer for 4-channel input
# model.backbone.body.conv1 = torch.nn.Conv2d(NUM_CHANNELS, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)

# # Initialize the first convolutional layer's weights
# torch.nn.init.kaiming_normal_(model.backbone.body.conv1.weight, mode='fan_out', nonlinearity='relu')

# # Get the correct number of input features for the classifier
# # Get the number of input channels from the classification head
# in_features = model.head.classification_head.cls_logits.in_channels
# num_anchors = model.head.classification_head.num_anchors

# # Modify classification head to match the number of classes for your task
# # FCOSClassificationHead is redefined to include the correct number of classes
# model.head.classification_head = FCOSClassificationHead(
#     in_channels=in_features,
#     num_classes=NUM_CLASSES,
#     num_anchors=num_anchors,
#     norm_layer=partial(torch.nn.GroupNorm, 32)
# )

# # Move the model to the correct device (e.g., CUDA or CPU)
# model = model.to(device)

# # Verify the model structure
# print(model)

# import evaluate

# evaluate.main_function(
#     DIR_TEST=DIR_TEST,
#     DIR_IMAGES=DIR_IMAGES,
#     DIR_ANNOTATIONS=DIR_ANNOTATIONS,
#     IMG_RESIZE=IMG_RESIZE,
#     SAVE_DIR=SAVE_DIR,
#     CLASSES=CLASSES,
#     NUM_CLASSES=NUM_CLASSES,
#     PRIVILEGED_INFORMATION_DIRS=PRIVILEGED_INFORMATION_DIRS,
#     NUM_CHANNELS=NUM_CHANNELS,
#     img_means=img_means,
#     img_stds=img_stds,
#     ALL_PRIVILEGED_INFORMATION_DIRS=ALL_PRIVILEGED_INFORMATION_DIRS,
#     model=model,
#     BATCH_SIZE=batch_size,
#     NUM_WORKERS=num_workers,
#     model_name='FCOS',
# )

# del model

#### FCOS Student Model

In [None]:
# # Directory Inputs
# SAVE_DIR = f'../runs/FCOS/FCOS_{MODEL_NAME}_student1'

# # Privileged Information Paths
# PRIVILEGED_INFORMATION_DIRS = [

# # Special Constructed Features
#     # "Box_Mask",
# ]

# # Number of input image channels RGB + Extras
# NUM_CHANNELS = 3 + len(PRIVILEGED_INFORMATION_DIRS)

# # Defining Model

# # Set the device to CUDA or CPU
# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# # Load the FCOS model with pretrained weights
# weights = torchvision.models.detection.FCOS_ResNet50_FPN_Weights.DEFAULT
# pre_trained_model = torchvision.models.detection.fcos_resnet50_fpn(weights=weights)

# model = pre_trained_model.to(device)

# # Modify the first convolutional layer for 4-channel input
# model.backbone.body.conv1 = torch.nn.Conv2d(NUM_CHANNELS, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)

# # Initialize the first convolutional layer's weights
# torch.nn.init.kaiming_normal_(model.backbone.body.conv1.weight, mode='fan_out', nonlinearity='relu')

# # Get the correct number of input features for the classifier
# # Get the number of input channels from the classification head
# in_features = model.head.classification_head.cls_logits.in_channels
# num_anchors = model.head.classification_head.num_anchors

# # Modify classification head to match the number of classes for your task
# # FCOSClassificationHead is redefined to include the correct number of classes
# model.head.classification_head = FCOSClassificationHead(
#     in_channels=in_features,
#     num_classes=NUM_CLASSES,
#     num_anchors=num_anchors,
#     norm_layer=partial(torch.nn.GroupNorm, 32)
# )

# # Move the model to the correct device (e.g., CUDA or CPU)
# model = model.to(device)

# # Verify the model structure
# print(model)

# import evaluate

# evaluate.main_function(
#     DIR_TEST=DIR_TEST,
#     DIR_IMAGES=DIR_IMAGES,
#     DIR_ANNOTATIONS=DIR_ANNOTATIONS,
#     IMG_RESIZE=IMG_RESIZE,
#     SAVE_DIR=SAVE_DIR,
#     CLASSES=CLASSES,
#     NUM_CLASSES=NUM_CLASSES,
#     PRIVILEGED_INFORMATION_DIRS=PRIVILEGED_INFORMATION_DIRS,
#     NUM_CHANNELS=NUM_CHANNELS,
#     img_means=img_means,
#     img_stds=img_stds,
#     ALL_PRIVILEGED_INFORMATION_DIRS=ALL_PRIVILEGED_INFORMATION_DIRS,
#     model=model,
#     BATCH_SIZE=batch_size,
#     NUM_WORKERS=num_workers,
#     model_name='FCOS',
# )

# del model

In [None]:
# # Directory Inputs
# SAVE_DIR = f'../runs/FCOS/FCOS_{MODEL_NAME}_student2'

# # Privileged Information Paths
# PRIVILEGED_INFORMATION_DIRS = [

# # Special Constructed Features
#     # "Box_Mask",
# ]

# # Number of input image channels RGB + Extras
# NUM_CHANNELS = 3 + len(PRIVILEGED_INFORMATION_DIRS)

# # Defining Model

# # Set the device to CUDA or CPU
# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# # Load the FCOS model with pretrained weights
# weights = torchvision.models.detection.FCOS_ResNet50_FPN_Weights.DEFAULT
# pre_trained_model = torchvision.models.detection.fcos_resnet50_fpn(weights=weights)

# model = pre_trained_model.to(device)

# # Modify the first convolutional layer for 4-channel input
# model.backbone.body.conv1 = torch.nn.Conv2d(NUM_CHANNELS, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)

# # Initialize the first convolutional layer's weights
# torch.nn.init.kaiming_normal_(model.backbone.body.conv1.weight, mode='fan_out', nonlinearity='relu')

# # Get the correct number of input features for the classifier
# # Get the number of input channels from the classification head
# in_features = model.head.classification_head.cls_logits.in_channels
# num_anchors = model.head.classification_head.num_anchors

# # Modify classification head to match the number of classes for your task
# # FCOSClassificationHead is redefined to include the correct number of classes
# model.head.classification_head = FCOSClassificationHead(
#     in_channels=in_features,
#     num_classes=NUM_CLASSES,
#     num_anchors=num_anchors,
#     norm_layer=partial(torch.nn.GroupNorm, 32)
# )

# # Move the model to the correct device (e.g., CUDA or CPU)
# model = model.to(device)

# # Verify the model structure
# print(model)

# import evaluate

# evaluate.main_function(
#     DIR_TEST=DIR_TEST,
#     DIR_IMAGES=DIR_IMAGES,
#     DIR_ANNOTATIONS=DIR_ANNOTATIONS,
#     IMG_RESIZE=IMG_RESIZE,
#     SAVE_DIR=SAVE_DIR,
#     CLASSES=CLASSES,
#     NUM_CLASSES=NUM_CLASSES,
#     PRIVILEGED_INFORMATION_DIRS=PRIVILEGED_INFORMATION_DIRS,
#     NUM_CHANNELS=NUM_CHANNELS,
#     img_means=img_means,
#     img_stds=img_stds,
#     ALL_PRIVILEGED_INFORMATION_DIRS=ALL_PRIVILEGED_INFORMATION_DIRS,
#     model=model,
#     BATCH_SIZE=batch_size,
#     NUM_WORKERS=num_workers,
#     model_name='FCOS',
# )

# del model

In [None]:
# Directory Inputs
SAVE_DIR = f'../runs/FCOS/FCOS_{MODEL_NAME}_student3'

# Privileged Information Paths
PRIVILEGED_INFORMATION_DIRS = [

# Special Constructed Features
    # "Box_Mask",
]

# Number of input image channels RGB + Extras
NUM_CHANNELS = 3 + len(PRIVILEGED_INFORMATION_DIRS)

# Defining Model

# Set the device to CUDA or CPU
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Load the FCOS model with pretrained weights
weights = torchvision.models.detection.FCOS_ResNet50_FPN_Weights.DEFAULT
pre_trained_model = torchvision.models.detection.fcos_resnet50_fpn(weights=weights)

model = pre_trained_model.to(device)

# Modify the first convolutional layer for 4-channel input
model.backbone.body.conv1 = torch.nn.Conv2d(NUM_CHANNELS, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)

# Initialize the first convolutional layer's weights
torch.nn.init.kaiming_normal_(model.backbone.body.conv1.weight, mode='fan_out', nonlinearity='relu')

# Get the correct number of input features for the classifier
# Get the number of input channels from the classification head
in_features = model.head.classification_head.cls_logits.in_channels
num_anchors = model.head.classification_head.num_anchors

# Modify classification head to match the number of classes for your task
# FCOSClassificationHead is redefined to include the correct number of classes
model.head.classification_head = FCOSClassificationHead(
    in_channels=in_features,
    num_classes=NUM_CLASSES,
    num_anchors=num_anchors,
    norm_layer=partial(torch.nn.GroupNorm, 32)
)

# Move the model to the correct device (e.g., CUDA or CPU)
model = model.to(device)

# Verify the model structure
print(model)

import evaluate

evaluate.main_function(
    DIR_TEST=DIR_TEST,
    DIR_IMAGES=DIR_IMAGES,
    DIR_ANNOTATIONS=DIR_ANNOTATIONS,
    IMG_RESIZE=IMG_RESIZE,
    SAVE_DIR=SAVE_DIR,
    CLASSES=CLASSES,
    NUM_CLASSES=NUM_CLASSES,
    PRIVILEGED_INFORMATION_DIRS=PRIVILEGED_INFORMATION_DIRS,
    NUM_CHANNELS=NUM_CHANNELS,
    img_means=img_means,
    img_stds=img_stds,
    ALL_PRIVILEGED_INFORMATION_DIRS=ALL_PRIVILEGED_INFORMATION_DIRS,
    model=model,
    BATCH_SIZE=batch_size,
    NUM_WORKERS=num_workers,
    model_name='FCOS',
)

del model

In [None]:
# Directory Inputs
SAVE_DIR = f'../runs/FCOS/FCOS_{MODEL_NAME}_student4'

# Privileged Information Paths
PRIVILEGED_INFORMATION_DIRS = [

# Special Constructed Features
    # "Box_Mask",
]

# Number of input image channels RGB + Extras
NUM_CHANNELS = 3 + len(PRIVILEGED_INFORMATION_DIRS)

# Defining Model

# Set the device to CUDA or CPU
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Load the FCOS model with pretrained weights
weights = torchvision.models.detection.FCOS_ResNet50_FPN_Weights.DEFAULT
pre_trained_model = torchvision.models.detection.fcos_resnet50_fpn(weights=weights)

model = pre_trained_model.to(device)

# Modify the first convolutional layer for 4-channel input
model.backbone.body.conv1 = torch.nn.Conv2d(NUM_CHANNELS, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)

# Initialize the first convolutional layer's weights
torch.nn.init.kaiming_normal_(model.backbone.body.conv1.weight, mode='fan_out', nonlinearity='relu')

# Get the correct number of input features for the classifier
# Get the number of input channels from the classification head
in_features = model.head.classification_head.cls_logits.in_channels
num_anchors = model.head.classification_head.num_anchors

# Modify classification head to match the number of classes for your task
# FCOSClassificationHead is redefined to include the correct number of classes
model.head.classification_head = FCOSClassificationHead(
    in_channels=in_features,
    num_classes=NUM_CLASSES,
    num_anchors=num_anchors,
    norm_layer=partial(torch.nn.GroupNorm, 32)
)

# Move the model to the correct device (e.g., CUDA or CPU)
model = model.to(device)

# Verify the model structure
print(model)

import evaluate

evaluate.main_function(
    DIR_TEST=DIR_TEST,
    DIR_IMAGES=DIR_IMAGES,
    DIR_ANNOTATIONS=DIR_ANNOTATIONS,
    IMG_RESIZE=IMG_RESIZE,
    SAVE_DIR=SAVE_DIR,
    CLASSES=CLASSES,
    NUM_CLASSES=NUM_CLASSES,
    PRIVILEGED_INFORMATION_DIRS=PRIVILEGED_INFORMATION_DIRS,
    NUM_CHANNELS=NUM_CHANNELS,
    img_means=img_means,
    img_stds=img_stds,
    ALL_PRIVILEGED_INFORMATION_DIRS=ALL_PRIVILEGED_INFORMATION_DIRS,
    model=model,
    BATCH_SIZE=batch_size,
    NUM_WORKERS=num_workers,
    model_name='FCOS',
)

del model

#### Faster R-CNN Baseline Model

In [None]:
# # Directory Inputs
# SAVE_DIR = f'../runs/FasterRCNN/FasterRCNN_{MODEL_NAME}_baseline1'

# # Privileged Information Paths
# PRIVILEGED_INFORMATION_DIRS = [

# # Special Constructed Features
#     # "Box_Mask",
# ]

# # Number of input image channels RGB + Extras
# NUM_CHANNELS = 3 + len(PRIVILEGED_INFORMATION_DIRS)

# # Defining Model

# # Set the device to CUDA or CPU
# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# # Load the FasterRCNN model with pretrained weights
# weights = torchvision.models.detection.FasterRCNN_ResNet50_FPN_Weights.DEFAULT
# pre_trained_model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=weights)

# model = pre_trained_model.to(device)

# # Modify the first convolutional layer for 4-channel input
# model.backbone.body.conv1 = torch.nn.Conv2d(NUM_CHANNELS, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False).to(device)

# # Initialize the first convolutional layer's weights (was not working with the default initialization)
# torch.nn.init.kaiming_normal_(model.backbone.body.conv1.weight, mode='fan_out', nonlinearity='relu')

# # Get number of input features for the classifier
# in_features = model.roi_heads.box_predictor.cls_score.in_features

# # Replace the pre-trained head with a new one
# model.roi_heads.box_predictor = FastRCNNPredictor(in_features, NUM_CLASSES).to(device)
# # norm_layer=partial(torch.nn.GroupNorm, 32),  # Normalization layer

# # Move the model to the correct device (e.g., CUDA or CPU)
# model = model.to(device)
# # These don't seem to be necessary, but are included for completeness
# model.to(device)
# model.backbone.body.conv1.to(device)
# model.rpn.to(device)
# model.roi_heads.to(device)

# # Verify the model structure
# print(model)

# import evaluate

# evaluate.main_function(
#     DIR_TEST=DIR_TEST,
#     DIR_IMAGES=DIR_IMAGES,
#     DIR_ANNOTATIONS=DIR_ANNOTATIONS,
#     IMG_RESIZE=IMG_RESIZE,
#     SAVE_DIR=SAVE_DIR,
#     CLASSES=CLASSES,
#     NUM_CLASSES=NUM_CLASSES,
#     PRIVILEGED_INFORMATION_DIRS=PRIVILEGED_INFORMATION_DIRS,
#     NUM_CHANNELS=NUM_CHANNELS,
#     img_means=img_means,
#     img_stds=img_stds,
#     ALL_PRIVILEGED_INFORMATION_DIRS=ALL_PRIVILEGED_INFORMATION_DIRS,
#     model=model,
#     BATCH_SIZE=batch_size,
#     NUM_WORKERS=num_workers,
#     model_name='Faster R-CNN',
# )

# del model

#### Faster R-CNN Teacher Model

In [None]:
# # Directory Inputs
# SAVE_DIR = f'../runs/FasterRCNN/FasterRCNN_{MODEL_NAME}_teacher1'

# # Privileged Information Paths
# PRIVILEGED_INFORMATION_DIRS = [

# # Special Constructed Features
#     "Box_Mask",
# ]

# # Number of input image channels RGB + Extras
# NUM_CHANNELS = 3 + len(PRIVILEGED_INFORMATION_DIRS)

# # Defining Model

# # Set the device to CUDA or CPU
# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# # Load the FasterRCNN model with pretrained weights
# weights = torchvision.models.detection.FasterRCNN_ResNet50_FPN_Weights.DEFAULT
# pre_trained_model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=weights)

# model = pre_trained_model.to(device)

# # Modify the first convolutional layer for 4-channel input
# model.backbone.body.conv1 = torch.nn.Conv2d(NUM_CHANNELS, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False).to(device)

# # Initialize the first convolutional layer's weights (was not working with the default initialization)
# torch.nn.init.kaiming_normal_(model.backbone.body.conv1.weight, mode='fan_out', nonlinearity='relu')

# # Get number of input features for the classifier
# in_features = model.roi_heads.box_predictor.cls_score.in_features

# # Replace the pre-trained head with a new one
# model.roi_heads.box_predictor = FastRCNNPredictor(in_features, NUM_CLASSES).to(device)
# # norm_layer=partial(torch.nn.GroupNorm, 32),  # Normalization layer

# # Move the model to the correct device (e.g., CUDA or CPU)
# model = model.to(device)
# # These don't seem to be necessary, but are included for completeness
# model.to(device)
# model.backbone.body.conv1.to(device)
# model.rpn.to(device)
# model.roi_heads.to(device)

# # Verify the model structure
# print(model)

# import evaluate

# evaluate.main_function(
#     DIR_TEST=DIR_TEST,
#     DIR_IMAGES=DIR_IMAGES,
#     DIR_ANNOTATIONS=DIR_ANNOTATIONS,
#     IMG_RESIZE=IMG_RESIZE,
#     SAVE_DIR=SAVE_DIR,
#     CLASSES=CLASSES,
#     NUM_CLASSES=NUM_CLASSES,
#     PRIVILEGED_INFORMATION_DIRS=PRIVILEGED_INFORMATION_DIRS,
#     NUM_CHANNELS=NUM_CHANNELS,
#     img_means=img_means,
#     img_stds=img_stds,
#     ALL_PRIVILEGED_INFORMATION_DIRS=ALL_PRIVILEGED_INFORMATION_DIRS,
#     model=model,
#     BATCH_SIZE=batch_size,
#     NUM_WORKERS=num_workers,
#     model_name='Faster R-CNN',
# )

# del model

#### Faster R-CNN Student Model

In [None]:
# # Directory Inputs
# SAVE_DIR = f'../runs/FasterRCNN/FasterRCNN_{MODEL_NAME}_student1'

# # Privileged Information Paths
# PRIVILEGED_INFORMATION_DIRS = [

# # Special Constructed Features
#     # "Box_Mask",
# ]

# # Number of input image channels RGB + Extras
# NUM_CHANNELS = 3 + len(PRIVILEGED_INFORMATION_DIRS)

# # Defining Model

# # Set the device to CUDA or CPU
# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# # Load the FasterRCNN model with pretrained weights
# weights = torchvision.models.detection.FasterRCNN_ResNet50_FPN_Weights.DEFAULT
# pre_trained_model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=weights)

# model = pre_trained_model.to(device)

# # Modify the first convolutional layer for 4-channel input
# model.backbone.body.conv1 = torch.nn.Conv2d(NUM_CHANNELS, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False).to(device)

# # Initialize the first convolutional layer's weights (was not working with the default initialization)
# torch.nn.init.kaiming_normal_(model.backbone.body.conv1.weight, mode='fan_out', nonlinearity='relu')

# # Get number of input features for the classifier
# in_features = model.roi_heads.box_predictor.cls_score.in_features

# # Replace the pre-trained head with a new one
# model.roi_heads.box_predictor = FastRCNNPredictor(in_features, NUM_CLASSES).to(device)
# # norm_layer=partial(torch.nn.GroupNorm, 32),  # Normalization layer

# # Move the model to the correct device (e.g., CUDA or CPU)
# model = model.to(device)
# # These don't seem to be necessary, but are included for completeness
# model.to(device)
# model.backbone.body.conv1.to(device)
# model.rpn.to(device)
# model.roi_heads.to(device)

# # Verify the model structure
# print(model)

# import evaluate

# evaluate.main_function(
#     DIR_TEST=DIR_TEST,
#     DIR_IMAGES=DIR_IMAGES,
#     DIR_ANNOTATIONS=DIR_ANNOTATIONS,
#     IMG_RESIZE=IMG_RESIZE,
#     SAVE_DIR=SAVE_DIR,
#     CLASSES=CLASSES,
#     NUM_CLASSES=NUM_CLASSES,
#     PRIVILEGED_INFORMATION_DIRS=PRIVILEGED_INFORMATION_DIRS,
#     NUM_CHANNELS=NUM_CHANNELS,
#     img_means=img_means,
#     img_stds=img_stds,
#     ALL_PRIVILEGED_INFORMATION_DIRS=ALL_PRIVILEGED_INFORMATION_DIRS,
#     model=model,
#     BATCH_SIZE=batch_size,
#     NUM_WORKERS=num_workers,
#     model_name='Faster R-CNN',
# )

# del model

In [None]:
# # Directory Inputs
# SAVE_DIR = f'../runs/FasterRCNN/FasterRCNN_{MODEL_NAME}_student2'

# # Privileged Information Paths
# PRIVILEGED_INFORMATION_DIRS = [

# # Special Constructed Features
#     # "Box_Mask",
# ]

# # Number of input image channels RGB + Extras
# NUM_CHANNELS = 3 + len(PRIVILEGED_INFORMATION_DIRS)

# # Defining Model

# # Set the device to CUDA or CPU
# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# # Load the FasterRCNN model with pretrained weights
# weights = torchvision.models.detection.FasterRCNN_ResNet50_FPN_Weights.DEFAULT
# pre_trained_model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=weights)

# model = pre_trained_model.to(device)

# # Modify the first convolutional layer for 4-channel input
# model.backbone.body.conv1 = torch.nn.Conv2d(NUM_CHANNELS, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False).to(device)

# # Initialize the first convolutional layer's weights (was not working with the default initialization)
# torch.nn.init.kaiming_normal_(model.backbone.body.conv1.weight, mode='fan_out', nonlinearity='relu')

# # Get number of input features for the classifier
# in_features = model.roi_heads.box_predictor.cls_score.in_features

# # Replace the pre-trained head with a new one
# model.roi_heads.box_predictor = FastRCNNPredictor(in_features, NUM_CLASSES).to(device)
# # norm_layer=partial(torch.nn.GroupNorm, 32),  # Normalization layer

# # Move the model to the correct device (e.g., CUDA or CPU)
# model = model.to(device)
# # These don't seem to be necessary, but are included for completeness
# model.to(device)
# model.backbone.body.conv1.to(device)
# model.rpn.to(device)
# model.roi_heads.to(device)

# # Verify the model structure
# print(model)

# import evaluate

# evaluate.main_function(
#     DIR_TEST=DIR_TEST,
#     DIR_IMAGES=DIR_IMAGES,
#     DIR_ANNOTATIONS=DIR_ANNOTATIONS,
#     IMG_RESIZE=IMG_RESIZE,
#     SAVE_DIR=SAVE_DIR,
#     CLASSES=CLASSES,
#     NUM_CLASSES=NUM_CLASSES,
#     PRIVILEGED_INFORMATION_DIRS=PRIVILEGED_INFORMATION_DIRS,
#     NUM_CHANNELS=NUM_CHANNELS,
#     img_means=img_means,
#     img_stds=img_stds,
#     ALL_PRIVILEGED_INFORMATION_DIRS=ALL_PRIVILEGED_INFORMATION_DIRS,
#     model=model,
#     BATCH_SIZE=batch_size,
#     NUM_WORKERS=num_workers,
#     model_name='Faster R-CNN',
# )

# del model

In [None]:
# Directory Inputs
SAVE_DIR = f'../runs/FasterRCNN/FasterRCNN_{MODEL_NAME}_student3'

# Privileged Information Paths
PRIVILEGED_INFORMATION_DIRS = [

# Special Constructed Features
    # "Box_Mask",
]

# Number of input image channels RGB + Extras
NUM_CHANNELS = 3 + len(PRIVILEGED_INFORMATION_DIRS)

# Defining Model

# Set the device to CUDA or CPU
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Load the FasterRCNN model with pretrained weights
weights = torchvision.models.detection.FasterRCNN_ResNet50_FPN_Weights.DEFAULT
pre_trained_model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=weights)

model = pre_trained_model.to(device)

# Modify the first convolutional layer for 4-channel input
model.backbone.body.conv1 = torch.nn.Conv2d(NUM_CHANNELS, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False).to(device)

# Initialize the first convolutional layer's weights (was not working with the default initialization)
torch.nn.init.kaiming_normal_(model.backbone.body.conv1.weight, mode='fan_out', nonlinearity='relu')

# Get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# Replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, NUM_CLASSES).to(device)
# norm_layer=partial(torch.nn.GroupNorm, 32),  # Normalization layer

# Move the model to the correct device (e.g., CUDA or CPU)
model = model.to(device)
# These don't seem to be necessary, but are included for completeness
model.to(device)
model.backbone.body.conv1.to(device)
model.rpn.to(device)
model.roi_heads.to(device)

# Verify the model structure
print(model)

import evaluate

evaluate.main_function(
    DIR_TEST=DIR_TEST,
    DIR_IMAGES=DIR_IMAGES,
    DIR_ANNOTATIONS=DIR_ANNOTATIONS,
    IMG_RESIZE=IMG_RESIZE,
    SAVE_DIR=SAVE_DIR,
    CLASSES=CLASSES,
    NUM_CLASSES=NUM_CLASSES,
    PRIVILEGED_INFORMATION_DIRS=PRIVILEGED_INFORMATION_DIRS,
    NUM_CHANNELS=NUM_CHANNELS,
    img_means=img_means,
    img_stds=img_stds,
    ALL_PRIVILEGED_INFORMATION_DIRS=ALL_PRIVILEGED_INFORMATION_DIRS,
    model=model,
    BATCH_SIZE=batch_size,
    NUM_WORKERS=num_workers,
    model_name='Faster R-CNN',
)

del model

In [None]:
# Directory Inputs
SAVE_DIR = f'../runs/FasterRCNN/FasterRCNN_{MODEL_NAME}_student4'

# Privileged Information Paths
PRIVILEGED_INFORMATION_DIRS = [

# Special Constructed Features
    # "Box_Mask",
]

# Number of input image channels RGB + Extras
NUM_CHANNELS = 3 + len(PRIVILEGED_INFORMATION_DIRS)

# Defining Model

# Set the device to CUDA or CPU
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Load the FasterRCNN model with pretrained weights
weights = torchvision.models.detection.FasterRCNN_ResNet50_FPN_Weights.DEFAULT
pre_trained_model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=weights)

model = pre_trained_model.to(device)

# Modify the first convolutional layer for 4-channel input
model.backbone.body.conv1 = torch.nn.Conv2d(NUM_CHANNELS, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False).to(device)

# Initialize the first convolutional layer's weights (was not working with the default initialization)
torch.nn.init.kaiming_normal_(model.backbone.body.conv1.weight, mode='fan_out', nonlinearity='relu')

# Get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# Replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, NUM_CLASSES).to(device)
# norm_layer=partial(torch.nn.GroupNorm, 32),  # Normalization layer

# Move the model to the correct device (e.g., CUDA or CPU)
model = model.to(device)
# These don't seem to be necessary, but are included for completeness
model.to(device)
model.backbone.body.conv1.to(device)
model.rpn.to(device)
model.roi_heads.to(device)

# Verify the model structure
print(model)

import evaluate

evaluate.main_function(
    DIR_TEST=DIR_TEST,
    DIR_IMAGES=DIR_IMAGES,
    DIR_ANNOTATIONS=DIR_ANNOTATIONS,
    IMG_RESIZE=IMG_RESIZE,
    SAVE_DIR=SAVE_DIR,
    CLASSES=CLASSES,
    NUM_CLASSES=NUM_CLASSES,
    PRIVILEGED_INFORMATION_DIRS=PRIVILEGED_INFORMATION_DIRS,
    NUM_CHANNELS=NUM_CHANNELS,
    img_means=img_means,
    img_stds=img_stds,
    ALL_PRIVILEGED_INFORMATION_DIRS=ALL_PRIVILEGED_INFORMATION_DIRS,
    model=model,
    BATCH_SIZE=batch_size,
    NUM_WORKERS=num_workers,
    model_name='Faster R-CNN',
)

del model

#### SSD Baseline Model

In [None]:
# # Directory Inputs
# SAVE_DIR = f'../runs/SSD/SSD_{MODEL_NAME}_baseline1'

# # Privileged Information Paths
# PRIVILEGED_INFORMATION_DIRS = [

# # Special Constructed Features
#     # "Box_Mask",
# ]

# # Number of input image channels RGB + Extras
# NUM_CHANNELS = 3 + len(PRIVILEGED_INFORMATION_DIRS)

# # Defining Model

# # Set the device to CUDA or CPU
# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# # Load the pretrained SSD model (standard SSD)
# weights = torchvision.models.detection.SSD300_VGG16_Weights.DEFAULT
# pre_trained_model = torchvision.models.detection.ssd300_vgg16(weights=weights)

# model = pre_trained_model.to(device)

# # Modify the first convolutional layer for 4-channel input
# # In SSD, the input convolution layer is part of the VGG model's backbone
# model.backbone.features[0] = torch.nn.Conv2d(NUM_CHANNELS, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))

# # Initialize the first convolutional layer's weights
# torch.nn.init.kaiming_normal_(model.backbone.features[0].weight, mode='fan_out', nonlinearity='relu')

# # Modify the classification head
# # SSD uses a set of convolutional layers for the classification head, which needs to be adapted for your number of classes
# # We need to retrieve the correct number of channels for each feature map in the SSD model
# in_channels = [layer.in_channels for layer in model.head.classification_head.module_list]
# num_anchors = model.anchor_generator.num_anchors_per_location()

# # Redefine the classification head to match the number of classes
# model.head.classification_head = SSDClassificationHead(
#     in_channels=in_channels,  # List of input channels for each feature map
#     num_anchors=num_anchors,  # List of anchors per location for each feature map
#     num_classes=NUM_CLASSES,  # Number of classes (including background)
#     # norm_layer=partial(torch.nn.GroupNorm, 32)
# )

# # Move the model to the correct device (e.g., CUDA or CPU)
# model = model.to(device)

# # Verify the model structure
# print(model)

# import evaluate

# evaluate.main_function(
#     DIR_TEST=DIR_TEST,
#     DIR_IMAGES=DIR_IMAGES,
#     DIR_ANNOTATIONS=DIR_ANNOTATIONS,
#     IMG_RESIZE=IMG_RESIZE,
#     SAVE_DIR=SAVE_DIR,
#     CLASSES=CLASSES,
#     NUM_CLASSES=NUM_CLASSES,
#     PRIVILEGED_INFORMATION_DIRS=PRIVILEGED_INFORMATION_DIRS,
#     NUM_CHANNELS=NUM_CHANNELS,
#     img_means=img_means,
#     img_stds=img_stds,
#     ALL_PRIVILEGED_INFORMATION_DIRS=ALL_PRIVILEGED_INFORMATION_DIRS,
#     model=model,
#     BATCH_SIZE=batch_size,
#     NUM_WORKERS=num_workers,
#     model_name='SSD',
# )

# del model

#### SSD Teacher Model

In [None]:
# # Directory Inputs
# SAVE_DIR = f'../runs/SSD/SSD_{MODEL_NAME}_teacher1'

# # Privileged Information Paths
# PRIVILEGED_INFORMATION_DIRS = [

# # Special Constructed Features
#     "Box_Mask",
# ]

# # Number of input image channels RGB + Extras
# NUM_CHANNELS = 3 + len(PRIVILEGED_INFORMATION_DIRS)

# # Defining Model

# # Set the device to CUDA or CPU
# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# # Load the pretrained SSD model (standard SSD)
# weights = torchvision.models.detection.SSD300_VGG16_Weights.DEFAULT
# pre_trained_model = torchvision.models.detection.ssd300_vgg16(weights=weights)

# model = pre_trained_model.to(device)

# # Modify the first convolutional layer for 4-channel input
# # In SSD, the input convolution layer is part of the VGG model's backbone
# model.backbone.features[0] = torch.nn.Conv2d(NUM_CHANNELS, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))

# # Initialize the first convolutional layer's weights
# torch.nn.init.kaiming_normal_(model.backbone.features[0].weight, mode='fan_out', nonlinearity='relu')

# # Modify the classification head
# # SSD uses a set of convolutional layers for the classification head, which needs to be adapted for your number of classes
# # We need to retrieve the correct number of channels for each feature map in the SSD model
# in_channels = [layer.in_channels for layer in model.head.classification_head.module_list]
# num_anchors = model.anchor_generator.num_anchors_per_location()

# # Redefine the classification head to match the number of classes
# model.head.classification_head = SSDClassificationHead(
#     in_channels=in_channels,  # List of input channels for each feature map
#     num_anchors=num_anchors,  # List of anchors per location for each feature map
#     num_classes=NUM_CLASSES,  # Number of classes (including background)
#     # norm_layer=partial(torch.nn.GroupNorm, 32)
# )

# # Move the model to the correct device (e.g., CUDA or CPU)
# model = model.to(device)

# # Verify the model structure
# print(model)

# import evaluate

# evaluate.main_function(
#     DIR_TEST=DIR_TEST,
#     DIR_IMAGES=DIR_IMAGES,
#     DIR_ANNOTATIONS=DIR_ANNOTATIONS,
#     IMG_RESIZE=IMG_RESIZE,
#     SAVE_DIR=SAVE_DIR,
#     CLASSES=CLASSES,
#     NUM_CLASSES=NUM_CLASSES,
#     PRIVILEGED_INFORMATION_DIRS=PRIVILEGED_INFORMATION_DIRS,
#     NUM_CHANNELS=NUM_CHANNELS,
#     img_means=img_means,
#     img_stds=img_stds,
#     ALL_PRIVILEGED_INFORMATION_DIRS=ALL_PRIVILEGED_INFORMATION_DIRS,
#     model=model,
#     BATCH_SIZE=batch_size,
#     NUM_WORKERS=num_workers,
#     model_name='SSD',
# )

# del model

#### SSD Student Model

In [None]:
# # Directory Inputs
# SAVE_DIR = f'../runs/SSD/SSD_{MODEL_NAME}_student1'

# # Privileged Information Paths
# PRIVILEGED_INFORMATION_DIRS = [

# # Special Constructed Features
#     # "Box_Mask",
# ]

# # Number of input image channels RGB + Extras
# NUM_CHANNELS = 3 + len(PRIVILEGED_INFORMATION_DIRS)

# # Defining Model

# # Set the device to CUDA or CPU
# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# # Load the pretrained SSD model (standard SSD)
# weights = torchvision.models.detection.SSD300_VGG16_Weights.DEFAULT
# pre_trained_model = torchvision.models.detection.ssd300_vgg16(weights=weights)

# model = pre_trained_model.to(device)

# # Modify the first convolutional layer for 4-channel input
# # In SSD, the input convolution layer is part of the VGG model's backbone
# model.backbone.features[0] = torch.nn.Conv2d(NUM_CHANNELS, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))

# # Initialize the first convolutional layer's weights
# torch.nn.init.kaiming_normal_(model.backbone.features[0].weight, mode='fan_out', nonlinearity='relu')

# # Modify the classification head
# # SSD uses a set of convolutional layers for the classification head, which needs to be adapted for your number of classes
# # We need to retrieve the correct number of channels for each feature map in the SSD model
# in_channels = [layer.in_channels for layer in model.head.classification_head.module_list]
# num_anchors = model.anchor_generator.num_anchors_per_location()

# # Redefine the classification head to match the number of classes
# model.head.classification_head = SSDClassificationHead(
#     in_channels=in_channels,  # List of input channels for each feature map
#     num_anchors=num_anchors,  # List of anchors per location for each feature map
#     num_classes=NUM_CLASSES,  # Number of classes (including background)
#     # norm_layer=partial(torch.nn.GroupNorm, 32)
# )

# # Move the model to the correct device (e.g., CUDA or CPU)
# model = model.to(device)

# # Verify the model structure
# print(model)

# import evaluate

# evaluate.main_function(
#     DIR_TEST=DIR_TEST,
#     DIR_IMAGES=DIR_IMAGES,
#     DIR_ANNOTATIONS=DIR_ANNOTATIONS,
#     IMG_RESIZE=IMG_RESIZE,
#     SAVE_DIR=SAVE_DIR,
#     CLASSES=CLASSES,
#     NUM_CLASSES=NUM_CLASSES,
#     PRIVILEGED_INFORMATION_DIRS=PRIVILEGED_INFORMATION_DIRS,
#     NUM_CHANNELS=NUM_CHANNELS,
#     img_means=img_means,
#     img_stds=img_stds,
#     ALL_PRIVILEGED_INFORMATION_DIRS=ALL_PRIVILEGED_INFORMATION_DIRS,
#     model=model,
#     BATCH_SIZE=batch_size,
#     NUM_WORKERS=num_workers,
#     model_name='SSD',
# )

# del model

In [None]:
# # Directory Inputs
# SAVE_DIR = f'../runs/SSD/SSD_{MODEL_NAME}_student2'

# # Privileged Information Paths
# PRIVILEGED_INFORMATION_DIRS = [

# # Special Constructed Features
#     # "Box_Mask",
# ]

# # Number of input image channels RGB + Extras
# NUM_CHANNELS = 3 + len(PRIVILEGED_INFORMATION_DIRS)

# # Defining Model

# # Set the device to CUDA or CPU
# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# # Load the pretrained SSD model (standard SSD)
# weights = torchvision.models.detection.SSD300_VGG16_Weights.DEFAULT
# pre_trained_model = torchvision.models.detection.ssd300_vgg16(weights=weights)

# model = pre_trained_model.to(device)

# # Modify the first convolutional layer for 4-channel input
# # In SSD, the input convolution layer is part of the VGG model's backbone
# model.backbone.features[0] = torch.nn.Conv2d(NUM_CHANNELS, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))

# # Initialize the first convolutional layer's weights
# torch.nn.init.kaiming_normal_(model.backbone.features[0].weight, mode='fan_out', nonlinearity='relu')

# # Modify the classification head
# # SSD uses a set of convolutional layers for the classification head, which needs to be adapted for your number of classes
# # We need to retrieve the correct number of channels for each feature map in the SSD model
# in_channels = [layer.in_channels for layer in model.head.classification_head.module_list]
# num_anchors = model.anchor_generator.num_anchors_per_location()

# # Redefine the classification head to match the number of classes
# model.head.classification_head = SSDClassificationHead(
#     in_channels=in_channels,  # List of input channels for each feature map
#     num_anchors=num_anchors,  # List of anchors per location for each feature map
#     num_classes=NUM_CLASSES,  # Number of classes (including background)
#     # norm_layer=partial(torch.nn.GroupNorm, 32)
# )

# # Move the model to the correct device (e.g., CUDA or CPU)
# model = model.to(device)

# # Verify the model structure
# print(model)

# import evaluate

# evaluate.main_function(
#     DIR_TEST=DIR_TEST,
#     DIR_IMAGES=DIR_IMAGES,
#     DIR_ANNOTATIONS=DIR_ANNOTATIONS,
#     IMG_RESIZE=IMG_RESIZE,
#     SAVE_DIR=SAVE_DIR,
#     CLASSES=CLASSES,
#     NUM_CLASSES=NUM_CLASSES,
#     PRIVILEGED_INFORMATION_DIRS=PRIVILEGED_INFORMATION_DIRS,
#     NUM_CHANNELS=NUM_CHANNELS,
#     img_means=img_means,
#     img_stds=img_stds,
#     ALL_PRIVILEGED_INFORMATION_DIRS=ALL_PRIVILEGED_INFORMATION_DIRS,
#     model=model,
#     BATCH_SIZE=batch_size,
#     NUM_WORKERS=num_workers,
#     model_name='SSD',
# )

# del model

In [None]:
# Directory Inputs
SAVE_DIR = f'../runs/SSD/SSD_{MODEL_NAME}_student3'

# Privileged Information Paths
PRIVILEGED_INFORMATION_DIRS = [

# Special Constructed Features
    # "Box_Mask",
]

# Number of input image channels RGB + Extras
NUM_CHANNELS = 3 + len(PRIVILEGED_INFORMATION_DIRS)

# Defining Model

# Set the device to CUDA or CPU
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Load the pretrained SSD model (standard SSD)
weights = torchvision.models.detection.SSD300_VGG16_Weights.DEFAULT
pre_trained_model = torchvision.models.detection.ssd300_vgg16(weights=weights)

model = pre_trained_model.to(device)

# Modify the first convolutional layer for 4-channel input
# In SSD, the input convolution layer is part of the VGG model's backbone
model.backbone.features[0] = torch.nn.Conv2d(NUM_CHANNELS, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))

# Initialize the first convolutional layer's weights
torch.nn.init.kaiming_normal_(model.backbone.features[0].weight, mode='fan_out', nonlinearity='relu')

# Modify the classification head
# SSD uses a set of convolutional layers for the classification head, which needs to be adapted for your number of classes
# We need to retrieve the correct number of channels for each feature map in the SSD model
in_channels = [layer.in_channels for layer in model.head.classification_head.module_list]
num_anchors = model.anchor_generator.num_anchors_per_location()

# Redefine the classification head to match the number of classes
model.head.classification_head = SSDClassificationHead(
    in_channels=in_channels,  # List of input channels for each feature map
    num_anchors=num_anchors,  # List of anchors per location for each feature map
    num_classes=NUM_CLASSES,  # Number of classes (including background)
    # norm_layer=partial(torch.nn.GroupNorm, 32)
)

# Move the model to the correct device (e.g., CUDA or CPU)
model = model.to(device)

# Verify the model structure
print(model)

import evaluate

evaluate.main_function(
    DIR_TEST=DIR_TEST,
    DIR_IMAGES=DIR_IMAGES,
    DIR_ANNOTATIONS=DIR_ANNOTATIONS,
    IMG_RESIZE=IMG_RESIZE,
    SAVE_DIR=SAVE_DIR,
    CLASSES=CLASSES,
    NUM_CLASSES=NUM_CLASSES,
    PRIVILEGED_INFORMATION_DIRS=PRIVILEGED_INFORMATION_DIRS,
    NUM_CHANNELS=NUM_CHANNELS,
    img_means=img_means,
    img_stds=img_stds,
    ALL_PRIVILEGED_INFORMATION_DIRS=ALL_PRIVILEGED_INFORMATION_DIRS,
    model=model,
    BATCH_SIZE=batch_size,
    NUM_WORKERS=num_workers,
    model_name='SSD',
)

del model

In [None]:
# Directory Inputs
SAVE_DIR = f'../runs/SSD/SSD_{MODEL_NAME}_student4'

# Privileged Information Paths
PRIVILEGED_INFORMATION_DIRS = [

# Special Constructed Features
    # "Box_Mask",
]

# Number of input image channels RGB + Extras
NUM_CHANNELS = 3 + len(PRIVILEGED_INFORMATION_DIRS)

# Defining Model

# Set the device to CUDA or CPU
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Load the pretrained SSD model (standard SSD)
weights = torchvision.models.detection.SSD300_VGG16_Weights.DEFAULT
pre_trained_model = torchvision.models.detection.ssd300_vgg16(weights=weights)

model = pre_trained_model.to(device)

# Modify the first convolutional layer for 4-channel input
# In SSD, the input convolution layer is part of the VGG model's backbone
model.backbone.features[0] = torch.nn.Conv2d(NUM_CHANNELS, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))

# Initialize the first convolutional layer's weights
torch.nn.init.kaiming_normal_(model.backbone.features[0].weight, mode='fan_out', nonlinearity='relu')

# Modify the classification head
# SSD uses a set of convolutional layers for the classification head, which needs to be adapted for your number of classes
# We need to retrieve the correct number of channels for each feature map in the SSD model
in_channels = [layer.in_channels for layer in model.head.classification_head.module_list]
num_anchors = model.anchor_generator.num_anchors_per_location()

# Redefine the classification head to match the number of classes
model.head.classification_head = SSDClassificationHead(
    in_channels=in_channels,  # List of input channels for each feature map
    num_anchors=num_anchors,  # List of anchors per location for each feature map
    num_classes=NUM_CLASSES,  # Number of classes (including background)
    # norm_layer=partial(torch.nn.GroupNorm, 32)
)

# Move the model to the correct device (e.g., CUDA or CPU)
model = model.to(device)

# Verify the model structure
print(model)

import evaluate

evaluate.main_function(
    DIR_TEST=DIR_TEST,
    DIR_IMAGES=DIR_IMAGES,
    DIR_ANNOTATIONS=DIR_ANNOTATIONS,
    IMG_RESIZE=IMG_RESIZE,
    SAVE_DIR=SAVE_DIR,
    CLASSES=CLASSES,
    NUM_CLASSES=NUM_CLASSES,
    PRIVILEGED_INFORMATION_DIRS=PRIVILEGED_INFORMATION_DIRS,
    NUM_CHANNELS=NUM_CHANNELS,
    img_means=img_means,
    img_stds=img_stds,
    ALL_PRIVILEGED_INFORMATION_DIRS=ALL_PRIVILEGED_INFORMATION_DIRS,
    model=model,
    BATCH_SIZE=batch_size,
    NUM_WORKERS=num_workers,
    model_name='SSD',
)

del model

#### SSDLite Baseline Model

In [None]:
# # Directory Inputs
# SAVE_DIR = f'../runs/SSDLite/SSDLite_{MODEL_NAME}_baseline1'

# # Privileged Information Paths
# PRIVILEGED_INFORMATION_DIRS = [

# # Special Constructed Features
#     # "Box_Mask",
# ]

# # Number of input image channels RGB + Extras
# NUM_CHANNELS = 3 + len(PRIVILEGED_INFORMATION_DIRS)

# # Defining Model

# # Set the device to CUDA or CPU
# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# # Load the pretrained SSD model (SSDLite320 MobileNetV3)
# weights = torchvision.models.detection.SSDLite320_MobileNet_V3_Large_Weights.DEFAULT
# pre_trained_model = torchvision.models.detection.ssdlite320_mobilenet_v3_large(weights=weights)

# model = pre_trained_model.to(device)

# # Modify the first convolutional layer for 4-channel input
# # In SSD, the input convolution layer is part of the VGG model's backbone
# model.backbone.features[0][0][0] = torch.nn.Conv2d(NUM_CHANNELS, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)

# # Initialize the first convolutional layer's weights
# torch.nn.init.kaiming_normal_(model.backbone.features[0][0][0].weight, mode='fan_out', nonlinearity='relu')

# # Modify the classification head
# # SSD uses a set of convolutional layers for the classification head, which needs to be adapted for your number of classes
# # https://stackoverflow.com/questions/71094251/fine-tuning-ssd-lite-in-torchvision
# # from torchvision.models.detection import _utils as det_utils
# # Forward a dummy image through the backbone to get output channels
# tmp_img = torch.zeros((1, NUM_CHANNELS, 640, 640), dtype=torch.float32, device=device)
# model.to(device)
# with torch.no_grad():
#     features = model.backbone(tmp_img)

# # Extract feature map channels
# if isinstance(features, torch.Tensor):
#     in_channels = [features.shape[1]]  # Single feature map
# else:
#     in_channels = [f.shape[1] for f in features.values()]  # Multiple feature maps


# num_anchors = model.anchor_generator.num_anchors_per_location()

# # Redefine the classification head to match the number of classes
# model.head.classification_head = SSDLiteClassificationHead(
#     in_channels=in_channels,  # List of input channels for each feature map
#     num_anchors=num_anchors,  # List of anchors per location for each feature map
#     num_classes=NUM_CLASSES,  # Number of classes (including background)
#     norm_layer=partial(torch.nn.GroupNorm, 32),  # Normalization layer
# )

# # Set the number of classes in the model
# model.num_classes = NUM_CLASSES
# model.head.num_classes = NUM_CLASSES

# # Move the model to the correct device (e.g., CUDA or CPU)
# model = model.to(device)

# # Verify the model structure
# print(model)

# import evaluate

# evaluate.main_function(
#     DIR_TEST=DIR_TEST,
#     DIR_IMAGES=DIR_IMAGES,
#     DIR_ANNOTATIONS=DIR_ANNOTATIONS,
#     IMG_RESIZE=IMG_RESIZE,
#     SAVE_DIR=SAVE_DIR,
#     CLASSES=CLASSES,
#     NUM_CLASSES=NUM_CLASSES,
#     PRIVILEGED_INFORMATION_DIRS=PRIVILEGED_INFORMATION_DIRS,
#     NUM_CHANNELS=NUM_CHANNELS,
#     img_means=img_means,
#     img_stds=img_stds,
#     ALL_PRIVILEGED_INFORMATION_DIRS=ALL_PRIVILEGED_INFORMATION_DIRS,
#     model=model,
#     BATCH_SIZE=batch_size,
#     NUM_WORKERS=num_workers,
#     model_name='SSDLite',
# )

# del model

#### SSDLite Teacher Model

In [None]:
# # Directory Inputs
# SAVE_DIR = f'../runs/SSDLite/SSDLite_{MODEL_NAME}_teacher1'

# # Privileged Information Paths
# PRIVILEGED_INFORMATION_DIRS = [

# # Special Constructed Features
#     "Box_Mask",
# ]

# # Number of input image channels RGB + Extras
# NUM_CHANNELS = 3 + len(PRIVILEGED_INFORMATION_DIRS)

# # Defining Model

# # Set the device to CUDA or CPU
# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# # Load the pretrained SSD model (SSDLite320 MobileNetV3)
# weights = torchvision.models.detection.SSDLite320_MobileNet_V3_Large_Weights.DEFAULT
# pre_trained_model = torchvision.models.detection.ssdlite320_mobilenet_v3_large(weights=weights)

# model = pre_trained_model.to(device)

# # Modify the first convolutional layer for 4-channel input
# # In SSD, the input convolution layer is part of the VGG model's backbone
# model.backbone.features[0][0][0] = torch.nn.Conv2d(NUM_CHANNELS, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)

# # Initialize the first convolutional layer's weights
# torch.nn.init.kaiming_normal_(model.backbone.features[0][0][0].weight, mode='fan_out', nonlinearity='relu')

# # Modify the classification head
# # SSD uses a set of convolutional layers for the classification head, which needs to be adapted for your number of classes
# # https://stackoverflow.com/questions/71094251/fine-tuning-ssd-lite-in-torchvision
# # from torchvision.models.detection import _utils as det_utils
# # Forward a dummy image through the backbone to get output channels
# tmp_img = torch.zeros((1, NUM_CHANNELS, 640, 640), dtype=torch.float32, device=device)
# model.to(device)
# with torch.no_grad():
#     features = model.backbone(tmp_img)

# # Extract feature map channels
# if isinstance(features, torch.Tensor):
#     in_channels = [features.shape[1]]  # Single feature map
# else:
#     in_channels = [f.shape[1] for f in features.values()]  # Multiple feature maps


# num_anchors = model.anchor_generator.num_anchors_per_location()

# # Redefine the classification head to match the number of classes
# model.head.classification_head = SSDLiteClassificationHead(
#     in_channels=in_channels,  # List of input channels for each feature map
#     num_anchors=num_anchors,  # List of anchors per location for each feature map
#     num_classes=NUM_CLASSES,  # Number of classes (including background)
#     norm_layer=partial(torch.nn.GroupNorm, 32),  # Normalization layer
# )

# # Set the number of classes in the model
# model.num_classes = NUM_CLASSES
# model.head.num_classes = NUM_CLASSES

# # Move the model to the correct device (e.g., CUDA or CPU)
# model = model.to(device)

# # Verify the model structure
# print(model)

# import evaluate

# evaluate.main_function(
#     DIR_TEST=DIR_TEST,
#     DIR_IMAGES=DIR_IMAGES,
#     DIR_ANNOTATIONS=DIR_ANNOTATIONS,
#     IMG_RESIZE=IMG_RESIZE,
#     SAVE_DIR=SAVE_DIR,
#     CLASSES=CLASSES,
#     NUM_CLASSES=NUM_CLASSES,
#     PRIVILEGED_INFORMATION_DIRS=PRIVILEGED_INFORMATION_DIRS,
#     NUM_CHANNELS=NUM_CHANNELS,
#     img_means=img_means,
#     img_stds=img_stds,
#     ALL_PRIVILEGED_INFORMATION_DIRS=ALL_PRIVILEGED_INFORMATION_DIRS,
#     model=model,
#     BATCH_SIZE=batch_size,
#     NUM_WORKERS=num_workers,
#     model_name='SSDLite',
# )

# del model

#### SSDLite Student Model

In [None]:
# # Directory Inputs
# SAVE_DIR = f'../runs/SSDLite/SSDLite_{MODEL_NAME}_student1'

# # Privileged Information Paths
# PRIVILEGED_INFORMATION_DIRS = [

# # Special Constructed Features
#     # "Box_Mask",
# ]

# # Number of input image channels RGB + Extras
# NUM_CHANNELS = 3 + len(PRIVILEGED_INFORMATION_DIRS)

# # Defining Model

# # Set the device to CUDA or CPU
# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# # Load the pretrained SSD model (SSDLite320 MobileNetV3)
# weights = torchvision.models.detection.SSDLite320_MobileNet_V3_Large_Weights.DEFAULT
# pre_trained_model = torchvision.models.detection.ssdlite320_mobilenet_v3_large(weights=weights)

# model = pre_trained_model.to(device)

# # Modify the first convolutional layer for 4-channel input
# # In SSD, the input convolution layer is part of the VGG model's backbone
# model.backbone.features[0][0][0] = torch.nn.Conv2d(NUM_CHANNELS, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)

# # Initialize the first convolutional layer's weights
# torch.nn.init.kaiming_normal_(model.backbone.features[0][0][0].weight, mode='fan_out', nonlinearity='relu')

# # Modify the classification head
# # SSD uses a set of convolutional layers for the classification head, which needs to be adapted for your number of classes
# # https://stackoverflow.com/questions/71094251/fine-tuning-ssd-lite-in-torchvision
# # from torchvision.models.detection import _utils as det_utils
# # Forward a dummy image through the backbone to get output channels
# tmp_img = torch.zeros((1, NUM_CHANNELS, 640, 640), dtype=torch.float32, device=device)
# model.to(device)
# with torch.no_grad():
#     features = model.backbone(tmp_img)

# # Extract feature map channels
# if isinstance(features, torch.Tensor):
#     in_channels = [features.shape[1]]  # Single feature map
# else:
#     in_channels = [f.shape[1] for f in features.values()]  # Multiple feature maps


# num_anchors = model.anchor_generator.num_anchors_per_location()

# # Redefine the classification head to match the number of classes
# model.head.classification_head = SSDLiteClassificationHead(
#     in_channels=in_channels,  # List of input channels for each feature map
#     num_anchors=num_anchors,  # List of anchors per location for each feature map
#     num_classes=NUM_CLASSES,  # Number of classes (including background)
#     norm_layer=partial(torch.nn.GroupNorm, 32),  # Normalization layer
# )

# # Set the number of classes in the model
# model.num_classes = NUM_CLASSES
# model.head.num_classes = NUM_CLASSES

# # Move the model to the correct device (e.g., CUDA or CPU)
# model = model.to(device)

# # Verify the model structure
# print(model)

# import evaluate

# evaluate.main_function(
#     DIR_TEST=DIR_TEST,
#     DIR_IMAGES=DIR_IMAGES,
#     DIR_ANNOTATIONS=DIR_ANNOTATIONS,
#     IMG_RESIZE=IMG_RESIZE,
#     SAVE_DIR=SAVE_DIR,
#     CLASSES=CLASSES,
#     NUM_CLASSES=NUM_CLASSES,
#     PRIVILEGED_INFORMATION_DIRS=PRIVILEGED_INFORMATION_DIRS,
#     NUM_CHANNELS=NUM_CHANNELS,
#     img_means=img_means,
#     img_stds=img_stds,
#     ALL_PRIVILEGED_INFORMATION_DIRS=ALL_PRIVILEGED_INFORMATION_DIRS,
#     model=model,
#     BATCH_SIZE=batch_size,
#     NUM_WORKERS=num_workers,
#     model_name='SSDLite',
# )

# del model

In [None]:
# # Directory Inputs
# SAVE_DIR = f'../runs/SSDLite/SSDLite_{MODEL_NAME}_student2'

# # Privileged Information Paths
# PRIVILEGED_INFORMATION_DIRS = [

# # Special Constructed Features
#     # "Box_Mask",
# ]

# # Number of input image channels RGB + Extras
# NUM_CHANNELS = 3 + len(PRIVILEGED_INFORMATION_DIRS)

# # Defining Model

# # Set the device to CUDA or CPU
# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# # Load the pretrained SSD model (SSDLite320 MobileNetV3)
# weights = torchvision.models.detection.SSDLite320_MobileNet_V3_Large_Weights.DEFAULT
# pre_trained_model = torchvision.models.detection.ssdlite320_mobilenet_v3_large(weights=weights)

# model = pre_trained_model.to(device)

# # Modify the first convolutional layer for 4-channel input
# # In SSD, the input convolution layer is part of the VGG model's backbone
# model.backbone.features[0][0][0] = torch.nn.Conv2d(NUM_CHANNELS, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)

# # Initialize the first convolutional layer's weights
# torch.nn.init.kaiming_normal_(model.backbone.features[0][0][0].weight, mode='fan_out', nonlinearity='relu')

# # Modify the classification head
# # SSD uses a set of convolutional layers for the classification head, which needs to be adapted for your number of classes
# # https://stackoverflow.com/questions/71094251/fine-tuning-ssd-lite-in-torchvision
# # from torchvision.models.detection import _utils as det_utils
# # Forward a dummy image through the backbone to get output channels
# tmp_img = torch.zeros((1, NUM_CHANNELS, 640, 640), dtype=torch.float32, device=device)
# model.to(device)
# with torch.no_grad():
#     features = model.backbone(tmp_img)

# # Extract feature map channels
# if isinstance(features, torch.Tensor):
#     in_channels = [features.shape[1]]  # Single feature map
# else:
#     in_channels = [f.shape[1] for f in features.values()]  # Multiple feature maps


# num_anchors = model.anchor_generator.num_anchors_per_location()

# # Redefine the classification head to match the number of classes
# model.head.classification_head = SSDLiteClassificationHead(
#     in_channels=in_channels,  # List of input channels for each feature map
#     num_anchors=num_anchors,  # List of anchors per location for each feature map
#     num_classes=NUM_CLASSES,  # Number of classes (including background)
#     norm_layer=partial(torch.nn.GroupNorm, 32),  # Normalization layer
# )

# # Set the number of classes in the model
# model.num_classes = NUM_CLASSES
# model.head.num_classes = NUM_CLASSES

# # Move the model to the correct device (e.g., CUDA or CPU)
# model = model.to(device)

# # Verify the model structure
# print(model)

# import evaluate

# evaluate.main_function(
#     DIR_TEST=DIR_TEST,
#     DIR_IMAGES=DIR_IMAGES,
#     DIR_ANNOTATIONS=DIR_ANNOTATIONS,
#     IMG_RESIZE=IMG_RESIZE,
#     SAVE_DIR=SAVE_DIR,
#     CLASSES=CLASSES,
#     NUM_CLASSES=NUM_CLASSES,
#     PRIVILEGED_INFORMATION_DIRS=PRIVILEGED_INFORMATION_DIRS,
#     NUM_CHANNELS=NUM_CHANNELS,
#     img_means=img_means,
#     img_stds=img_stds,
#     ALL_PRIVILEGED_INFORMATION_DIRS=ALL_PRIVILEGED_INFORMATION_DIRS,
#     model=model,
#     BATCH_SIZE=batch_size,
#     NUM_WORKERS=num_workers,
#     model_name='SSDLite',
# )

# del model

In [None]:
# Directory Inputs
SAVE_DIR = f'../runs/SSDLite/SSDLite_{MODEL_NAME}_student3'

# Privileged Information Paths
PRIVILEGED_INFORMATION_DIRS = [

# Special Constructed Features
    # "Box_Mask",
]

# Number of input image channels RGB + Extras
NUM_CHANNELS = 3 + len(PRIVILEGED_INFORMATION_DIRS)

# Defining Model

# Set the device to CUDA or CPU
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Load the pretrained SSD model (SSDLite320 MobileNetV3)
weights = torchvision.models.detection.SSDLite320_MobileNet_V3_Large_Weights.DEFAULT
pre_trained_model = torchvision.models.detection.ssdlite320_mobilenet_v3_large(weights=weights)

model = pre_trained_model.to(device)

# Modify the first convolutional layer for 4-channel input
# In SSD, the input convolution layer is part of the VGG model's backbone
model.backbone.features[0][0][0] = torch.nn.Conv2d(NUM_CHANNELS, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)

# Initialize the first convolutional layer's weights
torch.nn.init.kaiming_normal_(model.backbone.features[0][0][0].weight, mode='fan_out', nonlinearity='relu')

# Modify the classification head
# SSD uses a set of convolutional layers for the classification head, which needs to be adapted for your number of classes
# https://stackoverflow.com/questions/71094251/fine-tuning-ssd-lite-in-torchvision
# from torchvision.models.detection import _utils as det_utils
# Forward a dummy image through the backbone to get output channels
tmp_img = torch.zeros((1, NUM_CHANNELS, 640, 640), dtype=torch.float32, device=device)
model.to(device)
with torch.no_grad():
    features = model.backbone(tmp_img)

# Extract feature map channels
if isinstance(features, torch.Tensor):
    in_channels = [features.shape[1]]  # Single feature map
else:
    in_channels = [f.shape[1] for f in features.values()]  # Multiple feature maps


num_anchors = model.anchor_generator.num_anchors_per_location()

# Redefine the classification head to match the number of classes
model.head.classification_head = SSDLiteClassificationHead(
    in_channels=in_channels,  # List of input channels for each feature map
    num_anchors=num_anchors,  # List of anchors per location for each feature map
    num_classes=NUM_CLASSES,  # Number of classes (including background)
    norm_layer=partial(torch.nn.GroupNorm, 32),  # Normalization layer
)

# Set the number of classes in the model
model.num_classes = NUM_CLASSES
model.head.num_classes = NUM_CLASSES

# Move the model to the correct device (e.g., CUDA or CPU)
model = model.to(device)

# Verify the model structure
print(model)

import evaluate

evaluate.main_function(
    DIR_TEST=DIR_TEST,
    DIR_IMAGES=DIR_IMAGES,
    DIR_ANNOTATIONS=DIR_ANNOTATIONS,
    IMG_RESIZE=IMG_RESIZE,
    SAVE_DIR=SAVE_DIR,
    CLASSES=CLASSES,
    NUM_CLASSES=NUM_CLASSES,
    PRIVILEGED_INFORMATION_DIRS=PRIVILEGED_INFORMATION_DIRS,
    NUM_CHANNELS=NUM_CHANNELS,
    img_means=img_means,
    img_stds=img_stds,
    ALL_PRIVILEGED_INFORMATION_DIRS=ALL_PRIVILEGED_INFORMATION_DIRS,
    model=model,
    BATCH_SIZE=batch_size,
    NUM_WORKERS=num_workers,
    model_name='SSDLite',
)

del model

In [None]:
# Directory Inputs
SAVE_DIR = f'../runs/SSDLite/SSDLite_{MODEL_NAME}_student4'

# Privileged Information Paths
PRIVILEGED_INFORMATION_DIRS = [

# Special Constructed Features
    # "Box_Mask",
]

# Number of input image channels RGB + Extras
NUM_CHANNELS = 3 + len(PRIVILEGED_INFORMATION_DIRS)

# Defining Model

# Set the device to CUDA or CPU
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Load the pretrained SSD model (SSDLite320 MobileNetV3)
weights = torchvision.models.detection.SSDLite320_MobileNet_V3_Large_Weights.DEFAULT
pre_trained_model = torchvision.models.detection.ssdlite320_mobilenet_v3_large(weights=weights)

model = pre_trained_model.to(device)

# Modify the first convolutional layer for 4-channel input
# In SSD, the input convolution layer is part of the VGG model's backbone
model.backbone.features[0][0][0] = torch.nn.Conv2d(NUM_CHANNELS, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)

# Initialize the first convolutional layer's weights
torch.nn.init.kaiming_normal_(model.backbone.features[0][0][0].weight, mode='fan_out', nonlinearity='relu')

# Modify the classification head
# SSD uses a set of convolutional layers for the classification head, which needs to be adapted for your number of classes
# https://stackoverflow.com/questions/71094251/fine-tuning-ssd-lite-in-torchvision
# from torchvision.models.detection import _utils as det_utils
# Forward a dummy image through the backbone to get output channels
tmp_img = torch.zeros((1, NUM_CHANNELS, 640, 640), dtype=torch.float32, device=device)
model.to(device)
with torch.no_grad():
    features = model.backbone(tmp_img)

# Extract feature map channels
if isinstance(features, torch.Tensor):
    in_channels = [features.shape[1]]  # Single feature map
else:
    in_channels = [f.shape[1] for f in features.values()]  # Multiple feature maps


num_anchors = model.anchor_generator.num_anchors_per_location()

# Redefine the classification head to match the number of classes
model.head.classification_head = SSDLiteClassificationHead(
    in_channels=in_channels,  # List of input channels for each feature map
    num_anchors=num_anchors,  # List of anchors per location for each feature map
    num_classes=NUM_CLASSES,  # Number of classes (including background)
    norm_layer=partial(torch.nn.GroupNorm, 32),  # Normalization layer
)

# Set the number of classes in the model
model.num_classes = NUM_CLASSES
model.head.num_classes = NUM_CLASSES

# Move the model to the correct device (e.g., CUDA or CPU)
model = model.to(device)

# Verify the model structure
print(model)

import evaluate

evaluate.main_function(
    DIR_TEST=DIR_TEST,
    DIR_IMAGES=DIR_IMAGES,
    DIR_ANNOTATIONS=DIR_ANNOTATIONS,
    IMG_RESIZE=IMG_RESIZE,
    SAVE_DIR=SAVE_DIR,
    CLASSES=CLASSES,
    NUM_CLASSES=NUM_CLASSES,
    PRIVILEGED_INFORMATION_DIRS=PRIVILEGED_INFORMATION_DIRS,
    NUM_CHANNELS=NUM_CHANNELS,
    img_means=img_means,
    img_stds=img_stds,
    ALL_PRIVILEGED_INFORMATION_DIRS=ALL_PRIVILEGED_INFORMATION_DIRS,
    model=model,
    BATCH_SIZE=batch_size,
    NUM_WORKERS=num_workers,
    model_name='SSDLite',
)

del model