# Method 1 : retrain using VEDAI + RetinaNet

## A] Acquiring data

In [1]:
import os
# from google.colab import drive
# !pip install picologging
# logging.basicConfig()
# logger = logging.getLogger()
# drive.mount('/content/drive')
DRIVE_ROOT = os.path.join("..")
VEDAI_PATH = os.path.join(DRIVE_ROOT, "data","vedai")
CORRECTED_PATH = os.path.join(DRIVE_ROOT, "data", "vedai_corrected")
ENGINE_PATH = os.path.join(DRIVE_ROOT, "object_detection_ign", "detection","engine.py")
UTILS_PATH = os.path.join(DRIVE_ROOT, "object_detection_ign", "detection", "utils.py")
DETECTION_PATH = os.path.join(DRIVE_ROOT, "detection")
CLASSES_DICT = {1: "car",
                2: "truck",
                3: "pickup",
                4: "tractor",
                5: "camping car",
                6: "boat",
                7: "motorcycle",
                8: "bus",
                9: "van",
                10: "other",
                11: "small plane",
                12: "large plane"}

In [2]:
import sys
module_path = os.path.abspath(os.path.join("..", "object_detection_ign", "detection"))
if module_path not in sys.path:
    sys.path.append(module_path)
import shutil
from functools import partial
from math import log10, floor
from torchvision.models.detection import retinanet_resnet50_fpn_v2, RetinaNet_ResNet50_FPN_V2_Weights, retinanet_resnet50_fpn, RetinaNet_ResNet50_FPN_Weights
from torchvision.models.detection.retinanet import RetinaNetClassificationHead
import pandas as pd
import torch
from torch import nn
from torchvision.ops import nms
import numpy as np
from tqdm import tqdm
from PIL import Image
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision.transforms import ToPILImage, ToTensor, Compose, RandomHorizontalFlip
from torchvision.io import read_image
from torchvision.utils import draw_bounding_boxes
import picologging as logging
logging.basicConfig()
logger = logging.getLogger()

In [3]:
def test_collate(batch):
  imgs = [x[0] for x in batch]
  targets = [x[1] for x in batch]
  return imgs, targets

def generate_corrected_dataset(root):
  annotation_folder = os.path.join(root, "vedai", "annotations")
  annotation_files = list(sorted(os.listdir(annotation_folder)))
  filtered_annotation_ids = [annotation_file.replace('.txt', '') for annotation_file in annotation_files if annotation_file.endswith('.txt')]
  original_folder_path = os.path.join(root, "vedai")
  corrected_folder_path = os.path.join(root, "vedai_corrected")
  if not os.path.isdir(corrected_folder_path):
    os.mkdir(corrected_folder_path)
    os.mkdir(os.path.join(corrected_folder_path, "annotations"))
    os.mkdir(os.path.join(corrected_folder_path, "images"))
  new_id = 0
  for old_id in tqdm(filtered_annotation_ids):
    src_annotation_file = os.path.join(original_folder_path, "annotations", old_id+'.txt')
    is_valid_annotation = pd.read_csv(src_annotation_file, sep=' ', header=None).notna().all().all()
    if is_valid_annotation:
      dest_annotation_file = os.path.join(corrected_folder_path, "annotations", str(new_id)+'.txt')
      src_image_file = os.path.join(original_folder_path, "images", old_id+'.jpg')
      dest_image_file = os.path.join(corrected_folder_path, "images", str(new_id)+'.jpg')
      shutil.copy2(src_annotation_file, dest_annotation_file)
      shutil.copy2(src_image_file, dest_image_file)
      new_id += 1
    else:
      logger.warning("Invalid annotation detected, skipping image in new dataset.")

def convert_idx_to_id(idx):
  if idx==0:
    nb_digits = 1
  else:
    nb_digits = int(log10(idx))+1
  converted_idx = '0'*(8-nb_digits) + str(idx)
  return converted_idx

def convert_id_to_idx(image_id):
  if image_id == '0'*8:
    return 0
  else:
    return int(image_id.lstrip('0'))

def train_test_split(dataset, proportion=0.8, batch_size=8, pin_memory = torch.cuda.is_available()):
  nb_images = len(dataset)
  lengths = [floor(nb_images*(proportion)), floor(nb_images*(1-proportion))]
  try:
    train, test = random_split(dataset, lengths=lengths, generator=torch.Generator().manual_seed(42))
  except:
    print("Imprecise floor rounding detected, adding one to test dataset to compensate.")
    lengths[1] = lengths[1] + 1
    train, test = random_split(dataset, lengths=lengths)
  print(f"There are {lengths[0]} entries in the train dataset and {lengths[1]} entries in the test dataset.")
  train_dataloader = DataLoader(train, batch_size=batch_size, collate_fn=test_collate, shuffle=True)
  test_dataloader = DataLoader(test, batch_size=batch_size, collate_fn=test_collate, shuffle=True)
  return train_dataloader, test_dataloader

#V2
def build_model(mode, num_classes, model_path=None, sample=None):
  satellite_model = retinanet_resnet50_fpn_v2(weights=RetinaNet_ResNet50_FPN_V2_Weights.DEFAULT)
  device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
  if mode=="train":
    for param in satellite_model.parameters():
        param.requires_grad = False
  num_anchors = satellite_model.head.classification_head.num_anchors
  in_channels = satellite_model.head.classification_head.cls_logits.in_channels
  new_classification_head = RetinaNetClassificationHead(in_channels=in_channels, num_anchors=num_anchors, num_classes=num_classes, norm_layer = partial(nn.GroupNorm, 32))
  satellite_model.head.classification_head = new_classification_head
  satellite_model.to(device)
  if mode == "train":
    satellite_model.train()
    counter = 0
    requires_grad = [p.requires_grad for p in satellite_model.parameters()]
    for element in requires_grad:
      if element:
        counter += 1 
    logger.info(f"{counter} parameters will be trained, {len(requires_grad) - counter} parameters won't be trained.")
  elif mode == "inference":
    if torch.cuda.is_available():
      kwargs = {}
    else:
      kwargs = {"map_location": torch.device('cpu')}
    satellite_model.load_state_dict(torch.load(model_path, **kwargs))
    satellite_model.eval()
  # elif mode == "export_inference":
  #   satellite_model.load_state_dict(torch.load(model_path, **kwargs))
  #   satellite_model.eval()
  #   satellite_model = satellite_model.cpu()
  #   torch.onnx.export(satellite_model, images, os.path.join(DRIVE_ROOT,"model_export_v3.onnx"), verbose=True,opset_version=12)
    
  return satellite_model

In [4]:
# %%bash
# if [ ! -d $VEDAI_PATH ]; then git clone https://github.com/nikitalpopov/vedai.git $DRIVE_ROOT/vedai; fi
# if [ ! -d $DETECTION_PATH ]; then apt install subversion && svn checkout https://github.com/pytorch/vision/trunk/references/detection $DETECTION_PATH; fi && cp -a $DETECTION_PATH/. .


In [5]:
if not os.path.exists(CORRECTED_PATH):
  generate_corrected_dataset(DRIVE_ROOT)
from engine import train_one_epoch, evaluate

In [6]:
class VEDAI(Dataset):
  def __init__(self, root_folder, classes_dict, transforms=None):
    self.classes_dict = classes_dict
    self.num_classes = len(self.classes_dict.keys()) + 1
    self.root = root_folder
    # self.is_corrected = is_corrected
    self.annotations_dir = os.path.join(root_folder, "annotations")
    self.images_dir = os.path.join(root_folder, "images")
    # self.images = list(sorted(os.listdir(self.images_dir)))
    self.annotations = self.merge_annotation_folder()
    self.transforms = transforms
    
  def __len__(self):
    return len(self.annotations.index.unique())

  def __getitem__(self, idx):
    img_path = os.path.join(self.images_dir, str(idx)+'.jpg')
    img = Image.open(img_path)
    sliced_df = self.annotations.loc[[idx], ["x_min", "y_min", "x_max", "y_max", "labels"]]
    target = {}
    target["labels"] = torch.tensor(sliced_df["labels"].values, dtype=torch.int64)
    target["boxes"] = torch.tensor(sliced_df[["x_min", "y_min", "x_max", "y_max"]].values, dtype=torch.float32)
    target["area"] = (target["boxes"][:, 3] - target["boxes"][:, 1]) * (target["boxes"][:, 2] - target["boxes"][:, 0])
    target["image_id"] = torch.tensor([idx])
    if self.transforms:
      img = self.transforms(img)
    return img, target

  def merge_annotation_folder(self):
    # TO-DO: turn into dynamic variables
    abs_width, abs_length = 1024, 1024
    indices = list(range(len(os.listdir(self.annotations_dir))))
    files = [str(idx) + '.txt' for idx in indices]
    # else:
    #   files = [filename for filename in list(sorted(os.listdir(self.annotations_dir))) if filename.endswith('.txt')]
    #   indices = [convert_id_to_idx(filename.replace('.txt', '')) for filename in files]
    abs_filepaths = [os.path.join(self.annotations_dir, img_file) for img_file in files]
    annotations = pd.DataFrame(columns=["x", "y", "width", "length", "idx"])
    # for img_file, filepath in zip(files, abs_filepaths):
    for idx, img_file in zip(indices, abs_filepaths):
      temp_annotation = pd.read_csv(img_file, sep=' ', names=["x", "y", "width", "length"]).reset_index(drop=False)
      # temp_annotation["image_id"] = img_file.split('.')[0]
      temp_annotation["idx"] = idx
      annotations = pd.concat([annotations, temp_annotation])
    annotations = annotations.rename(columns={"index":"labels"}).set_index("idx")
    annotations["labels"] = (annotations["labels"] + 1).astype(int)
    # annotations.index.name = None
    annotations["labels_name"] = annotations["labels"].replace(self.classes_dict)
    annotations["x_min"] = (annotations["x"] - annotations["width"]/2) * abs_width
    annotations["y_min"] = (annotations["y"] - annotations["length"]/2) * abs_length
    annotations["x_max"] = (annotations["x"] + annotations["width"]/2) * abs_width
    annotations["y_max"] = (annotations["y"] + annotations["length"]/2) * abs_length
    return annotations

  def show_image_bbox(self, idx):
    _, target = self[idx]
    img = read_image(os.path.join(self.images_dir, str(idx)+'.jpg'))
    labels_names = [self.classes_dict[label] for label in target["labels"].tolist()]
    img = draw_bounding_boxes(img, target["boxes"], labels=labels_names, colors="red")
    img = ToPILImage()(img)
    display(img)

  def show_prediction_bbox(self, idx, prediction):
    img = read_image(os.path.join(self.images_dir, str(idx)+'.jpg'))
    labels_names = [self.classes_dict[label] for label in prediction["labels"].tolist()]
    img = draw_bounding_boxes(img, prediction["boxes"], labels=labels_names, colors="red")
    img = ToPILImage()(img)
    display(img)

transforms = Compose(
    [ToTensor(),
    #  RandomHorizontalFlip(0.5)
     ])
dataset = VEDAI(CORRECTED_PATH, CLASSES_DICT, transforms=transforms)

In [7]:
# dataset.show_image_bbox(550)

In [8]:
train_dataloader, test_dataloader = train_test_split(dataset, batch_size=2, proportion=0.8)

Imprecise floor rounding detected, adding one to test dataset to compensate.
There are 996 entries in the train dataset and 250 entries in the test dataset.


## B] Building model

In [None]:
num_epochs = 10
feature_extract = True
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [None]:
satellite_model = build_model(mode="train", num_classes=dataset.num_classes)
params = [p for p in satellite_model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                              momentum=0.9, weight_decay=0.0005)
  # and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=3,
                                                gamma=0.1)

for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(satellite_model, optimizer, train_dataloader, device, epoch, print_freq=5)
    # update the learning rate
    lr_scheduler.step()
    
    # evaluate on the test dataset
    # evaluate(satellite_model, test_dataloader, device=device)
torch.save(satellite_model.state_dict(), os.path.join(DRIVE_ROOT, 'model_export_v3.pth'))

In [None]:
images, targets = next(iter(train_dataloader))

In [None]:
torch.onnx.export(satellite_model.cpu(), (images, targets), os.path.join(DRIVE_ROOT,"model_export_v3.onnx"), verbose=True, opset_version=12)

# C] Export model

In [9]:
from pytorch2keras import pytorch_to_keras
# we should specify shape of the input tensor


In [10]:
def convert_model_to_onnx(model, sample_input, export_path, mode='train'):
    # satellite_model.load_state_dict(torch.load(model_path))
    model = model.cpu()
    if mode=='train':
        satellite_model.train()
    else:
        satellite_model.eval()
    torch.onnx.export(satellite_model, sample_input, export_path, verbose=True, opset_version=12)

def convert_onnx_to_tflite():
    pass

In [11]:
sample_input = next(iter(train_dataloader))

In [15]:
sample_input

([tensor([[[0.7608, 0.7608, 0.7294,  ..., 0.3255, 0.3569, 0.3686],
           [0.7529, 0.7373, 0.6980,  ..., 0.3373, 0.3843, 0.3882],
           [0.7333, 0.7333, 0.7098,  ..., 0.3529, 0.3922, 0.4000],
           ...,
           [0.4784, 0.4784, 0.4863,  ..., 0.4157, 0.3725, 0.3882],
           [0.4784, 0.4784, 0.4824,  ..., 0.3843, 0.3294, 0.3569],
           [0.4706, 0.4745, 0.4784,  ..., 0.3608, 0.3098, 0.3412]],
  
          [[0.7098, 0.7098, 0.6824,  ..., 0.3412, 0.3843, 0.3882],
           [0.7020, 0.7059, 0.6667,  ..., 0.3686, 0.3961, 0.4000],
           [0.6863, 0.6863, 0.6667,  ..., 0.3725, 0.4039, 0.3922],
           ...,
           [0.5137, 0.5176, 0.5137,  ..., 0.3922, 0.3608, 0.3922],
           [0.5059, 0.5059, 0.5020,  ..., 0.3765, 0.3333, 0.3647],
           [0.4980, 0.4941, 0.4980,  ..., 0.3922, 0.3255, 0.3529]],
  
          [[0.6431, 0.6353, 0.5961,  ..., 0.2745, 0.3137, 0.3098],
           [0.6392, 0.6314, 0.5922,  ..., 0.2863, 0.3216, 0.3333],
           [0.6235, 0.

In [16]:
import onnx
from onnx_tf.backend import prepare

ONNX_MODEL_PATH = os.path.join(DRIVE_ROOT, "models", "model_export_v3.onnx")
satellite_model = build_model(mode="train", num_classes=dataset.num_classes)
# convert_model_to_onnx(satellite_model, sample_input, )
 
onnx_model = onnx.load(ONNX_MODEL_PATH)
tf_rep = prepare(onnx_model)
tf_rep.export_graph("model.pb")

 The versions of TensorFlow you are currently using is 2.11.0 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons
[I 221127 18:53:03 1822606608:77] 14 parameters will be trained, 189 parameters won't be trained.


Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


KeyError: in user code:

    File "c:\Users\berto\mambaforge\envs\satellite\lib\site-packages\onnx_tf\backend_tf_module.py", line 99, in __call__  *
        output_ops = self.backend._onnx_node_to_tensorflow_op(onnx_node,
    File "c:\Users\berto\mambaforge\envs\satellite\lib\site-packages\onnx_tf\backend.py", line 347, in _onnx_node_to_tensorflow_op  *
        return handler.handle(node, tensor_dict=tensor_dict, strict=strict)
    File "c:\Users\berto\mambaforge\envs\satellite\lib\site-packages\onnx_tf\handlers\handler.py", line 59, in handle  *
        return ver_handle(node, **kwargs)
    File "c:\Users\berto\mambaforge\envs\satellite\lib\site-packages\onnx_tf\handlers\backend\pad.py", line 91, in version_11  *
        return cls._common(node, **kwargs)
    File "c:\Users\berto\mambaforge\envs\satellite\lib\site-packages\onnx_tf\handlers\backend\pad.py", line 73, in _common  *
        constant_values = tensor_dict[node.inputs[2]] if len(

    KeyError: ''


In [14]:
k_model = pytorch_to_keras(satellite_model, sample_input, verbose=True)

INFO:pytorch2keras:Converter is called.


AssertionError: targets should not be none when in training mode

# D] Inference

In [None]:
MODEL_PATH = os.path.join(DRIVE_ROOT, "model_export_v3.pth")

In [None]:
import torch
from torchvision.models.detection import retinanet_resnet50_fpn_v2, RetinaNet_ResNet50_FPN_V2_Weights, retinanet_resnet50_fpn, RetinaNet_ResNet50_FPN_Weights
from torchvision.models.detection.retinanet import RetinaNetClassificationHead

In [None]:
satellite_model = build_model("inference", num_classes=num_classes, model_path=MODEL_PATH)

In [None]:
IDX = 12
def apply_nms(orig_prediction, iou_thresh=0.2):
    for prediction in orig_prediction:
    # torchvision returns the indices of the bboxes to keep
      keep = nms(prediction['boxes'], prediction['scores'], iou_thresh)
      prediction['boxes'] = prediction['boxes'][keep]
      prediction['scores'] = prediction['scores'][keep]
      prediction['labels'] = prediction['labels'][keep]
    return orig_prediction

img, target = dataset[IDX]
predictions = satellite_model([img])
nms_predictions = apply_nms(predictions, iou_thresh=0.01)
dataset.show_prediction_bbox(IDX, nms_predictions)

In [None]:
IMG_PATH = "test_img.jpg"
img = ToTensor()(Image.open(IMG_PATH))
predictions = satellite_model([img])
nms_predictions = apply_nms(predictions, iou_thresh=0.01)
labels_names = [CLASSES_DICT[label] for label in predictions[0]["labels"].tolist()]
img = read_image("test_img.jpg")
img = draw_bounding_boxes(img, predictions[0]["boxes"], labels=labels_names, colors="red")
img = ToPILImage()(img)
display(img)

# show_image_bbox(IDX, os.path.join(CORRECTED_PATH, "images"), nms_predictions, CLASSES_DICT)

# Method 4 : using keras-retinanet github

In [None]:
! git clone https://github.com/fizyr/keras-retinanet.git
%cd keras-retinanet
!pip install .

# Method 6 : using the SSD7 training tutorial

In [None]:
!git clone https://github.com/pierluigiferrari/ssd_keras.git
%cd ssd_keras

In [None]:
!pip install keras==2.10

In [None]:
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, TerminateOnNaN, CSVLogger
from keras import backend as K
from keras.models import load_model
from math import ceil
import numpy as np
from matplotlib import pyplot as plt

from models.keras_ssd7 import build_model
from keras_loss_function.keras_ssd_loss import SSDLoss
from keras_layers.keras_layer_AnchorBoxes import AnchorBoxes
from keras_layers.keras_layer_DecodeDetections import DecodeDetections
from keras_layers.keras_layer_DecodeDetectionsFast import DecodeDetectionsFast

from ssd_encoder_decoder.ssd_input_encoder import SSDInputEncoder
from ssd_encoder_decoder.ssd_output_decoder import decode_detections, decode_detections_fast

from data_generator.object_detection_2d_data_generator import DataGenerator
from data_generator.object_detection_2d_misc_utils import apply_inverse_transforms
from data_generator.data_augmentation_chain_variable_input_size import DataAugmentationVariableInputSize
from data_generator.data_augmentation_chain_constant_input_size import DataAugmentationConstantInputSize
from data_generator.data_augmentation_chain_original_ssd import SSDDataAugmentation

%matplotlib inline

In [None]:
img_height = 300 # Height of the input images
img_width = 480 # Width of the input images
img_channels = 3 # Number of color channels of the input images
intensity_mean = 127.5 # Set this to your preference (maybe `None`). The current settings transform the input pixel values to the interval `[-1,1]`.
intensity_range = 127.5 # Set this to your preference (maybe `None`). The current settings transform the input pixel values to the interval `[-1,1]`.
n_classes = 5 # Number of positive classes
scales = [0.08, 0.16, 0.32, 0.64, 0.96] # An explicit list of anchor box scaling factors. If this is passed, it will override `min_scale` and `max_scale`.
aspect_ratios = [0.5, 1.0, 2.0] # The list of aspect ratios for the anchor boxes
two_boxes_for_ar1 = True # Whether or not you want to generate two anchor boxes for aspect ratio 1
steps = None # In case you'd like to set the step sizes for the anchor box grids manually; not recommended
offsets = None # In case you'd like to set the offsets for the anchor box grids manually; not recommended
clip_boxes = False # Whether or not to clip the anchor boxes to lie entirely within the image boundaries
variances = [1.0, 1.0, 1.0, 1.0] # The list of variances by which the encoded target coordinates are scaled
normalize_coords = True # Whether or not the model is supposed to use coordinates relative to the image size