# Method 1 : retrain using VEDAI + RetinaNet

## A] Acquiring data

In [1]:
!git clone https://github.com/nikitalpopov/vedai.git

Cloning into 'vedai'...
remote: Enumerating objects: 2539, done.[K
remote: Counting objects: 100% (3/3), done.[K
remote: Compressing objects: 100% (3/3), done.[K
remote: Total 2539 (delta 0), reused 0 (delta 0), pack-reused 2536[K
Receiving objects: 100% (2539/2539), 1.21 GiB | 34.70 MiB/s, done.
Resolving deltas: 100% (4/4), done.
Checking out files: 100% (2521/2521), done.


In [9]:
import os
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import ToPILImage
from torchvision.io import read_image
from torchvision.utils import draw_bounding_boxes
import pandas as pd
import torch
import tensorflow as tf
import numpy as np

In [3]:
TEST_IMAGE_ID = '00000127'
ANNOTATIONS_FOLDER = os.path.join("vedai","annotations")
TEST_IMAGE_PATH = os.path.join("vedai","images", TEST_IMAGE_ID + '.jpg')
TEST_ANNOTATION_PATH = os.path.join(ANNOTATIONS_FOLDER, TEST_IMAGE_ID + '.txt')

In [4]:
CLASSES_DICT = {1: "car",
                2: "truck",
                3: "pickup",
                4: "tractor",
                5: "camping car",
                6: "boat",
                7: "motorcycle",
                8: "bus",
                9: "van",
                10: "other",
                11: "small plane",
                12: "large plane"}

In [5]:
# image_frame = pd.read_csv(TEST_ANNOTATION_PATH, sep=' ', names=["x", "y", "width", "length"]).reset_index()
# image_frame = image_frame.rename(columns={"index":"label"})
# image_frame["label"] = (image_frame["label"] + 1).replace(CLASSES_DICT)
# abs_width, abs_length = 1024, 1024
# image_frame["x_min"] = (image_frame["x"] - image_frame["width"]/2) * abs_width
# image_frame["y_min"] = (image_frame["y"] - image_frame["length"]/2) * abs_length
# image_frame["x_max"] = (image_frame["x"] + image_frame["width"]/2) * abs_width
# image_frame["y_max"] = (image_frame["y"] + image_frame["length"]/2) * abs_length
# sliced_df = image_frame.loc[:, ["x_min", "y_min", "x_max", "y_max", ]].values
# bbox = torch.tensor(sliced_df, dtype=torch.int)
# bbox

In [6]:
# img = read_image(TEST_IMAGE_PATH)
# print(img.shape)
# print(bbox.shape)
# img = draw_bounding_boxes(img, bbox, labels=image_frame["label"], colors="red")
# img = ToPILImage()(img)
# display(img)

In [6]:
class VEDAI(Dataset):
  def __init__(self, root_folder, classes_dict, transform=None):
    self.classes_dict = classes_dict
    self.root_folder = root_folder
    self.annotations_dir = os.path.join(root_folder, "annotations")
    self.images_dir = os.path.join(root_folder, "images")
    self.images = list(sorted(os.listdir(self.images_dir)))
    self.annotations = self.merge_annotation_folder()
    self.transform = transform
    
  def __len__(self):
    return len(self.annotations)

  def __getitem__(self, idx):
    img_path = os.path.join(self.images_dir, self.images[idx])
    img = read_image(img_path)
    sliced_df = self.annotations.loc[[idx], ["x_min", "y_min", "x_max", "y_max", "label", "label_name"]]
    target = {}
    target["label_name"] = sliced_df["label_name"].tolist()
    target["label"] = torch.tensor(sliced_df["label"].values, dtype=torch.int32)
    target["bbox"] = torch.from_numpy(sliced_df[["x_min", "y_min", "x_max", "y_max"]].values)
    target["image_id"] = torch.tensor([idx])
    return img, target

  
  def merge_annotation_folder(self):
    # TO-DO: turn into dynamic variables
    abs_width, abs_length = 1024, 1024
    files = list(sorted(os.listdir(self.annotations_dir)))
    abs_filepaths = [os.path.join(self.annotations_dir, img_file) for img_file in files]
    annotations = pd.DataFrame(columns=["x", "y", "width", "length", "image_id"])
    # for img_file, filepath in zip(files, abs_filepaths):
    for idx, img_file in enumerate(abs_filepaths):
      temp_annotation = pd.read_csv(img_file, sep=' ', names=["x", "y", "width", "length"]).reset_index(drop=False)
      # temp_annotation["image_id"] = img_file.split('.')[0]
      temp_annotation["image_id"] = idx
      annotations = pd.concat([annotations, temp_annotation])
    annotations = annotations.rename(columns={"index":"label"}).set_index("image_id")
    annotations["label_name"] = (annotations["label"] + 1).replace(self.classes_dict)
    annotations["x_min"] = (annotations["x"] - annotations["width"]/2) * abs_width
    annotations["y_min"] = (annotations["y"] - annotations["length"]/2) * abs_length
    annotations["x_max"] = (annotations["x"] + annotations["width"]/2) * abs_width
    annotations["y_max"] = (annotations["y"] + annotations["length"]/2) * abs_length
    return annotations


  def show_image_bbox(self, idx):
    img, target = self.__getitem__(idx) 
    img = draw_bounding_boxes(img, target["bbox"], labels=target["label_name"], colors="red")
    img = ToPILImage()(img)
    display(img)

dataset = VEDAI("vedai", CLASSES_DICT)

In [30]:
print(dataset[1])

(tensor([[[139, 151, 158,  ..., 171, 177, 177],
         [136, 148, 159,  ..., 169, 185, 159],
         [145, 147, 155,  ..., 156, 163, 140],
         ...,
         [ 93,  90, 125,  ...,  92,  74,  71],
         [ 96,  90, 111,  ..., 103,  84,  71],
         [115, 102, 102,  ..., 115, 107,  96]],

        [[129, 143, 150,  ..., 157, 165, 169],
         [124, 136, 147,  ..., 159, 173, 148],
         [130, 134, 143,  ..., 148, 151, 126],
         ...,
         [101,  94, 127,  ...,  94,  75,  74],
         [108, 101, 115,  ..., 104,  85,  72],
         [113, 103, 105,  ..., 116, 106,  96]],

        [[117, 130, 137,  ..., 144, 153, 156],
         [108, 122, 133,  ..., 150, 157, 126],
         [111, 115, 129,  ..., 137, 139, 113],
         ...,
         [ 90,  79, 113,  ...,  83,  67,  67],
         [ 96,  85, 101,  ...,  96,  79,  67],
         [100,  98,  98,  ..., 108, 101,  88]]], dtype=torch.uint8), {'label_name': ['car', 'van'], 'label': tensor([0, 8], dtype=torch.int32), 'bbox': te

In [None]:
def train_test_split(dataset, split=0.8):
  print(len(dataset))
  

In [26]:
from math import floor
lengths = [floor(len(dataset)*(0.8)), floor(len(dataset)*(0.2))+1]
train, test = torch.utils.data.random_split(dataset, lengths=lengths)
train_dataloader = DataLoader(train, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test, batch_size=64, shuffle=True)

In [27]:
images,targets = next(iter(train_dataloader))

IndexError: ignored

## B] Building model

In [7]:
from torchvision.models.detection import retinanet_resnet50_fpn_v2, RetinaNet_ResNet50_FPN_V2_Weights

In [8]:
satellite_model = retinanet_resnet50_fpn_v2(weights=RetinaNet_ResNet50_FPN_V2_Weights.COCO_V1, progress=True, trainable_backbone_layers=1)

Downloading: "https://download.pytorch.org/models/retinanet_resnet50_fpn_v2_coco-5905b1c5.pth" to /root/.cache/torch/hub/checkpoints/retinanet_resnet50_fpn_v2_coco-5905b1c5.pth


  0%|          | 0.00/146M [00:00<?, ?B/s]

In [None]:
satellite_model

RetinaNet(
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      

# Method 4 : using keras-retinanet github

In [None]:
! git clone https://github.com/fizyr/keras-retinanet.git
%cd keras-retinanet
!pip install .

Cloning into 'keras-retinanet'...
remote: Enumerating objects: 6220, done.[K
remote: Counting objects: 100% (15/15), done.[K
remote: Compressing objects: 100% (13/13), done.[K
remote: Total 6220 (delta 5), reused 6 (delta 2), pack-reused 6205[K
Receiving objects: 100% (6220/6220), 13.48 MiB | 10.73 MiB/s, done.
Resolving deltas: 100% (4205/4205), done.
/content/keras-retinanet
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Processing /content/keras-retinanet
[33m  DEPRECATION: A future pip version will change local packages to be built in-place without first copying to a temporary directory. We recommend you use --use-feature=in-tree-build to test your packages with this new behavior before it becomes the default.
   pip 21.3 will remove support for this functionality. You can find discussion regarding this at https://github.com/pypa/pip/issues/7555.[0m
Collecting keras-resnet==0.2.0
  Downloading keras-resnet-0.2.0.tar.gz (9.3 

# Method 5 : using pytorch-vedai

In [None]:
!git clone https://github.com/MichelHalmes/pytorch-vedai.git
!git clone https://github.com/nikitalpopov/vedai
%cd pytorch-vedai
!pip install .

Cloning into 'pytorch-vedai'...
remote: Enumerating objects: 380, done.[K
remote: Counting objects: 100% (380/380), done.[K
remote: Compressing objects: 100% (235/235), done.[K
remote: Total 380 (delta 231), reused 283 (delta 134), pack-reused 0[K
Receiving objects: 100% (380/380), 3.57 MiB | 33.56 MiB/s, done.
Resolving deltas: 100% (231/231), done.
fatal: destination path 'vedai' already exists and is not an empty directory.
/content/pytorch-vedai
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Processing /content/pytorch-vedai
[33m  DEPRECATION: A future pip version will change local packages to be built in-place without first copying to a temporary directory. We recommend you use --use-feature=in-tree-build to test your packages with this new behavior before it becomes the default.
   pip 21.3 will remove support for this functionality. You can find discussion regarding this at https://github.com/pypa/pip/issues/7555.[0m
Coll

In [None]:
class VEDAI(Dataset):

  def __getitem__(self, key):
    return self.data[key]

In [None]:
!train_distr

Traceback (most recent call last):
  File "/usr/local/bin/train_distr", line 5, in <module>
    from src.entrypoints.train_distributed import main
ModuleNotFoundError: No module named 'src.entrypoints'


# Method 6 : using the SSD7 training tutorial

In [None]:
!git clone https://github.com/pierluigiferrari/ssd_keras.git
%cd ssd_keras

fatal: destination path 'ssd_keras' already exists and is not an empty directory.
/content/ssd_keras


In [None]:
!pip install keras==2.10

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, TerminateOnNaN, CSVLogger
from keras import backend as K
from keras.models import load_model
from math import ceil
import numpy as np
from matplotlib import pyplot as plt

from models.keras_ssd7 import build_model
from keras_loss_function.keras_ssd_loss import SSDLoss
from keras_layers.keras_layer_AnchorBoxes import AnchorBoxes
from keras_layers.keras_layer_DecodeDetections import DecodeDetections
from keras_layers.keras_layer_DecodeDetectionsFast import DecodeDetectionsFast

from ssd_encoder_decoder.ssd_input_encoder import SSDInputEncoder
from ssd_encoder_decoder.ssd_output_decoder import decode_detections, decode_detections_fast

from data_generator.object_detection_2d_data_generator import DataGenerator
from data_generator.object_detection_2d_misc_utils import apply_inverse_transforms
from data_generator.data_augmentation_chain_variable_input_size import DataAugmentationVariableInputSize
from data_generator.data_augmentation_chain_constant_input_size import DataAugmentationConstantInputSize
from data_generator.data_augmentation_chain_original_ssd import SSDDataAugmentation

%matplotlib inline

ModuleNotFoundError: ignored

In [None]:
img_height = 300 # Height of the input images
img_width = 480 # Width of the input images
img_channels = 3 # Number of color channels of the input images
intensity_mean = 127.5 # Set this to your preference (maybe `None`). The current settings transform the input pixel values to the interval `[-1,1]`.
intensity_range = 127.5 # Set this to your preference (maybe `None`). The current settings transform the input pixel values to the interval `[-1,1]`.
n_classes = 5 # Number of positive classes
scales = [0.08, 0.16, 0.32, 0.64, 0.96] # An explicit list of anchor box scaling factors. If this is passed, it will override `min_scale` and `max_scale`.
aspect_ratios = [0.5, 1.0, 2.0] # The list of aspect ratios for the anchor boxes
two_boxes_for_ar1 = True # Whether or not you want to generate two anchor boxes for aspect ratio 1
steps = None # In case you'd like to set the step sizes for the anchor box grids manually; not recommended
offsets = None # In case you'd like to set the offsets for the anchor box grids manually; not recommended
clip_boxes = False # Whether or not to clip the anchor boxes to lie entirely within the image boundaries
variances = [1.0, 1.0, 1.0, 1.0] # The list of variances by which the encoded target coordinates are scaled
normalize_coords = True # Whether or not the model is supposed to use coordinates relative to the image size