# FCOS Model

We will use the FCOS model from the [FCOS](./documentation/fcos.pdf) paper. This model don't use anchored bounding box.

## Imports

In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.io import read_image
from torchvision.transforms import (
    Compose,
    Normalize,
    Resize,
    ToPILImage,
    ToTensor,
)
import torchvision

from tqdm import tqdm


## Build the Dataset Class

We will use a coco subset to train this model.



### Custom Dataset Class

This part is not yet working.

In [15]:
def map_to_class(cat) -> torch.int8:
    map = {
    "triton": 1,
    "grenouille-crapaud": 2,
    "planche": 3,
    "feuille": 4,
    "souris": 5,
    "insecte": 6,
    }
    return map.get(cat, -1)

"""
Classe pour charger les données.
Les images sont dans le dossier img_dir et les labels dans un fichier csv annotations_file.
Les labels sont des entiers [0,1] qui représentent la présence d'une planche.
"""

class CrapaudDataset(Dataset):

    def __init__(self,
                 annotations_file,
                 img_dir,
                 transform=None,
                 target_transform=None) -> None:
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = read_image(img_path)
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

### Coco Vision Dataset

This is the official coco custom dataset from torchvision. I don't know the difference between CocoDetection and CocoCaptions.

In [16]:
import os.path
from typing import Any, Callable, List, Optional, Tuple

from PIL import Image

from torchvision.datasets import VisionDataset


class CocoDetection(VisionDataset):
    """`MS Coco Detection <https://cocodataset.org/#detection-2016>`_ Dataset.

    It requires the `COCO API to be installed <https://github.com/pdollar/coco/tree/master/PythonAPI>`_.

    Args:
        root (string): Root directory where images are downloaded to.
        annFile (string): Path to json annotation file.
        transform (callable, optional): A function/transform that  takes in an PIL image
            and returns a transformed version. E.g, ``transforms.PILToTensor``
        target_transform (callable, optional): A function/transform that takes in the
            target and transforms it.
        transforms (callable, optional): A function/transform that takes input sample and its target as entry
            and returns a transformed version.
    """

    def __init__(
        self,
        root: str,
        annFile: str,
        transform: Optional[Callable] = None,
        target_transform: Optional[Callable] = None,
        transforms: Optional[Callable] = None,
    ) -> None:
        super().__init__(root, transforms, transform, target_transform)
        from pycocotools.coco import COCO

        self.coco = COCO(annFile)
        self.ids = list(sorted(self.coco.imgs.keys()))

    def _load_image(self, id: int) -> Image.Image:
        path = self.coco.loadImgs(id)[0]["file_name"]
        return Image.open(os.path.join(self.root, path)).convert("RGB")

    def _load_target(self, id: int) -> List[Any]:
        return self.coco.loadAnns(self.coco.getAnnIds(id))

    def __getitem__(self, index: int) -> Tuple[Any, Any]:
        id = self.ids[index]
        image = self._load_image(id)
        target = self._load_target(id)

        if self.transforms is not None:
            image, target = self.transforms(image, target)

        return image, target

    def __len__(self) -> int:
        return len(self.ids)


class CocoCaptions(CocoDetection):
    """`MS Coco Captions <https://cocodataset.org/#captions-2015>`_ Dataset.

    It requires the `COCO API to be installed <https://github.com/pdollar/coco/tree/master/PythonAPI>`_.

    Args:
        root (string): Root directory where images are downloaded to.
        annFile (string): Path to json annotation file.
        transform (callable, optional): A function/transform that  takes in an PIL image
            and returns a transformed version. E.g, ``transforms.PILToTensor``
        target_transform (callable, optional): A function/transform that takes in the
            target and transforms it.
        transforms (callable, optional): A function/transform that takes input sample and its target as entry
            and returns a transformed version.

    Example:

        .. code:: python

            import torchvision.datasets as dset
            import torchvision.transforms as transforms
            cap = dset.CocoCaptions(root = 'dir where images are',
                                    annFile = 'json annotation file',
                                    transform=transforms.PILToTensor())

            print('Number of samples: ', len(cap))
            img, target = cap[3] # load 4th sample

            print("Image Size: ", img.size())
            print(target)

        Output: ::

            Number of samples: 82783
            Image Size: (3L, 427L, 640L)
            [u'A plane emitting smoke stream flying over a mountain.',
            u'A plane darts across a bright blue sky behind a mountain covered in snow',
            u'A plane leaves a contrail above the snowy mountain top.',
            u'A mountain that has a plane flying overheard in the distance.',
            u'A mountain view with a plume of smoke in the background']

    """

    def _load_target(self, id: int) -> List[str]:
        return [ann["caption"] for ann in super()._load_target(id)]

## Data Importation

We instantiate the dataset and the dataloader.

In [38]:
# Pipeline de transformation des images
img_pipeline = Compose(
    [
        ToPILImage(),
        Resize((256, 256)),
        ToTensor(),
    ]
)

# Chargement des données
ds_train = CocoDetection(
    root=os.path.join("subset","coco_subset","train"),
    annFile=os.path.join("subset","coco_subset", "train.json"),
    transform=None,
)
ds_val = CocoDetection(
    root=os.path.join("subset","coco_subset","val"),
    annFile=os.path.join("subset","coco_subset", "val.json"),
    transform=None,
)

# Hyperparamètres
batch_size = 4
validation_split = 0.2
shuffle_dataset = True
random_seed = 42
hp = dict(num_epochs=3, learning_rate=0.001, momentum=0.9)

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


## Train/Test Split

In [40]:
train_loader = torch.utils.data.DataLoader(ds_train, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(ds_val, batch_size=batch_size, shuffle=True)

In [44]:
type(iter(train_loader))

torch.utils.data.dataloader._SingleProcessDataLoaderIter

In [27]:
# function to show an image
def imshow(img):
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


# get some random training images
batch = iter(train_loader)


In [26]:
type(batch)

torch.utils.data.dataloader._SingleProcessDataLoaderIter

In [None]:
images, labels = next(batch)


# show images
imshow(torchvision.utils.make_grid(images))

# print labels
print(" ".join(f"{l}" for l in labels))

### Import Json

This is a proof of concept to import a json file.

import json
path = os.path.join("subset","coco_subset", "train.json")
with open(path,'r') as f:
    data = json.loads(f.read())

df2 = pd.json_normalize(data, record_path=['images'])
df2

In [45]:
import torch
import torchvision
from torchvision.models import MobileNet_V2_Weights
from torchvision.models.detection import FCOS
from torchvision.models.detection.anchor_utils import AnchorGenerator


# load a pre-trained model for classification and return
# only the features
backbone = torchvision.models.mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT).features
# FCOS needs to know the number of
# output channels in a backbone. For mobilenet_v2, it's 1280
# so we need to add it here
backbone.out_channels = 1280
        # let's make the network generate 5 x 3 anchors per spatial
# location, with 5 different sizes and 3 different aspect
# ratios. We have a Tuple[Tuple[int]] because each feature
# map could potentially have different sizes and
# aspect ratios
anchor_generator = AnchorGenerator(
    sizes=((8,), (16,), (32,), (64,), (128,)),
    aspect_ratios=((1.0,),)
)
# put the pieces together inside a FCOS model
model = FCOS(
    backbone,
    num_classes=80,
    anchor_generator=anchor_generator,
)
model.eval()
x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
predictions = model(x)

Downloading: "https://download.pytorch.org/models/mobilenet_v2-7ebf99e0.pth" to /home/olivier/.cache/torch/hub/checkpoints/mobilenet_v2-7ebf99e0.pth


  0%|          | 0.00/13.6M [00:00<?, ?B/s]