In [None]:
# Torch - Torchvision - IceVision - IceData - MMDetection - YOLOv5 - EfficientDet Installation
!wget https://raw.githubusercontent.com/airctic/icevision/master/icevision_install.sh

# Choose your installation target: cuda11 or cuda10 or cpu
!bash icevision_install.sh cuda11 master

In [None]:
pip install wandb -qq

In [None]:
# Restart kernel after installation
import IPython
IPython.Application.instance().kernel.do_shutdown(True)

In [6]:
from icevision.all import *

In [7]:
WANDB_PROJECT = "mlops-assignment-001"
ENTITY = None # set this to team name if working in a team
# BDD_CLASSES = {i:c for i,c in enumerate(['background', 'road', 'traffic light', 'traffic sign', 'person', 'vehicle', 'bicycle'])}
RAW_DATA_AT = 'kaggle_obj_det'
PROCESSED_DATA_AT = 'kaggle_obj_det_split'

In [8]:
import wandb
import pandas as pd
from PIL import Image

In [9]:
train_config = SimpleNamespace(
    framework="fastai",
    img_size=384,
    batch_size=8,
    augment=True, # use data augmentation
    epochs=1, 
    lr=0.00145,
    pretrained=True,  # whether to use pretrained encoder
    seed=42,
)

In [10]:
run = wandb.init(project=WANDB_PROJECT, entity=ENTITY, job_type="training", config=train_config)

04/17/2023 14:15:32 - ERROR - wandb.jupyter -   Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mpmon[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [16]:
processed_data_at = run.use_artifact(f'{PROCESSED_DATA_AT}:latest')
processed_dataset_dir = Path(processed_data_at.download())
#processed_dataset_dir = Path('artifacts/kaggle_obj_det_split:v0')
splits = pd.read_csv(processed_dataset_dir / 'data_split.csv')

[34m[1mwandb[0m: Downloading large artifact kaggle_obj_det_split:latest, 12833.47MB. 43676 files... 
[34m[1mwandb[0m:   43676 of 43676 files downloaded.  
Done. 0:0:39.7


In [17]:
processed_dataset_dir

Path('artifacts/kaggle_obj_det_split:v0')

In [18]:
annot_dict = json.load(open(processed_dataset_dir / "train_sample.json"))

In [19]:
images_corr = pd.DataFrame(annot_dict['images'])

In [20]:
classes = annot_dict["categories"]
class_map = {i: c["name"] for i,c in enumerate(classes)}
idx_map = {c["id"]: i for i,c in enumerate(classes)}

In [21]:
class_map

{0: 'chair', 1: 'couch', 2: 'tv', 3: 'remote', 4: 'book', 5: 'vase'}

In [22]:
df = pd.DataFrame(annot_dict["annotations"])
df = df.merge(images_corr, how='left', left_on="image_id", right_on="id")

In [23]:
df.drop(columns="id", inplace=True)

In [24]:
df

Unnamed: 0,image_id,bbox,category_id,file_name
0,318219,"[505.24, 0.0, 47.86, 309.25]",72,000000318219.jpg
1,318219,"[470.68, 0.0, 45.93, 191.86]",72,000000318219.jpg
2,318219,"[442.51, 0.0, 43.39, 119.87]",72,000000318219.jpg
3,554625,"[380.74, 112.85, 40.62, 248.82]",72,000000554625.jpg
4,554625,"[339.13, 32.99, 32.99, 175.5]",72,000000554625.jpg
...,...,...,...,...
87101,516168,"[402.35, 174.64, 41.92, 5.3]",84,000000516168.jpg
87102,516168,"[392.48, 176.97, 54.12, 9.48]",84,000000516168.jpg
87103,516168,"[338.2, 115.55, 49.63, 57.9]",84,000000516168.jpg
87104,516168,"[429.18, 221.54, 22.58, 14.09]",84,000000516168.jpg


In [25]:
class ClassMapCustom(ClassMap):
    def __init__(
        self,
        classes: Dict = None,
        background: Optional[str] = BACKGROUND,
    ):
        self._lock = True   
        self._background = background
        # self._id2class = {c["id"]: c["name"] for c in classes}
        self._id2class = {i+1: c["name"] for i,c in enumerate(classes)}
        self._id2class[0] = self._background
        self._class2id = {value: key for key, value in self._id2class.items()}

In [26]:
template_record = ObjectDetectionRecord()

In [27]:
class CustomCocoParser(Parser):
    def __init__(self, template_record, data_dir):
        super().__init__(template_record=template_record)
        self.data_dir = data_dir
        annot_dict = json.load(open(data_dir / "train_sample.json"))
        df = pd.DataFrame(annot_dict["annotations"])
        images_corr = pd.DataFrame(annot_dict["images"])
        df = df.merge(images_corr, how='left', left_on="image_id", right_on="id")
        df.drop(columns="id", inplace=True)
        self.df = df
        self.add_size()
        classes = annot_dict["categories"]
        class_map = {c["id"]: c["name"] for c in classes}
        self.df["category"] = self.df["category_id"].replace(class_map)
        idx_map = {c["id"]: i+1 for i,c in enumerate(classes)}
        self.df["category_id"] = self.df["category_id"].replace(idx_map)
        # Test to fix issue ====================================================
        classes = [c["name"] for c in classes]
        # ======================================================================
        # self.class_map = ClassMapCustom(classes)
        self.class_map = ClassMap(classes)

    def __iter__(self) -> Any:
        for o in self.df.itertuples():
            yield o

    def __len__(self) -> int:
        return len(self.df)

    def add_size(self):
      image_height = []
      image_width = []
      for i in self.df.file_name:
        image = Image.open(processed_dataset_dir / "images" / i)
        width, height = image.size
        image_height.append(height)
        image_width.append(width)
      self.df["height"] = image_height
      self.df["width"] = image_width

    def record_id(self, o) -> Hashable:
        return o.file_name

    def parse_fields(self, o, record, is_new):
        if is_new:
            record.set_filepath(self.data_dir / 'images' / o.file_name)
            record.set_img_size(ImgSize(width=o.width, height=o.height))
            record.detection.set_class_map(self.class_map)

        record.detection.add_bboxes([BBox.from_xywh(o.bbox[0], o.bbox[1], o.bbox[2], o.bbox[3])])
        record.detection.add_labels([o.category])

In [28]:
parser = CustomCocoParser(template_record=template_record, data_dir = processed_dataset_dir)

In [29]:
split_train = splits[splits['Stage']=="train"]["File_Name"].tolist()
split_val = splits[splits['Stage']=="val"]["File_Name"].tolist()
split_test = splits[splits['Stage']=="test"]["File_Name"].tolist()

In [30]:
splitter_list = []
splitter_list.append(split_train)
splitter_list.append(split_val)
splitter_list.append(split_test)

In [31]:
splitter = FixedSplitter(splitter_list)

In [32]:
train_records, valid_records, test_records = parser.parse(data_splitter=splitter)

100%|██████████| 87106/87106 [00:03<00:00, 21962.70it/s]
[1m[1mINFO    [0m[1m[0m - [1m[34m[1mAutofixing records[0m[1m[34m[0m[1m[0m | [36micevision.parsers.parser[0m:[36mparse[0m:[36m122[0m
100%|██████████| 21837/21837 [00:23<00:00, 929.15it/s]


In [33]:
# Transforms
# size is set to 384 because EfficientDet requires its inputs to be divisible by 128
image_size = 384
train_tfms = tfms.A.Adapter([*tfms.A.aug_tfms(size=image_size, presize=512), tfms.A.Normalize()])
valid_tfms = tfms.A.Adapter([*tfms.A.resize_and_pad(image_size), tfms.A.Normalize()])

In [34]:
# Datasets
train_ds = Dataset(train_records, train_tfms)
valid_ds = Dataset(valid_records, valid_tfms)

In [35]:
# Show an element of the train_ds with augmentation transformations applied
samples = [valid_ds[2100] for _ in range(3)]
show_samples(samples, ncols=3)

In [36]:
samples

[BaseRecord
 
 common: 
 	- Image size ImgSize(width=384, height=384)
 	- Record ID: 000000275644.jpg
 	- Filepath: artifacts/kaggle_obj_det_split:v0/images/000000275644.jpg
 	- Img: 384x384x3 <np.ndarray> Image
 detection: 
 	- BBoxes: [<BBox (xmin:73.18199999999999, ymin:214.38600000000002, xmax:383.14200000000005, ymax:384.0)>]
 	- Class Map: <ClassMap: {'background': 0, 'chair': 1, 'couch': 2, 'tv': 3, 'remote': 4, 'book': 5, 'vase': 6}>
 	- Labels: [5],
 BaseRecord
 
 common: 
 	- Image size ImgSize(width=384, height=384)
 	- Record ID: 000000275644.jpg
 	- Filepath: artifacts/kaggle_obj_det_split:v0/images/000000275644.jpg
 	- Img: 384x384x3 <np.ndarray> Image
 detection: 
 	- BBoxes: [<BBox (xmin:73.18199999999999, ymin:214.38600000000002, xmax:383.14200000000005, ymax:384.0)>]
 	- Class Map: <ClassMap: {'background': 0, 'chair': 1, 'couch': 2, 'tv': 3, 'remote': 4, 'book': 5, 'vase': 6}>
 	- Labels: [5],
 BaseRecord
 
 common: 
 	- Image size ImgSize(width=384, height=384)
 	- 

### Creating a model
Selections only take two simple lines of code. For example, to try the mmdet library using the retinanet model and the resnet50_fpn_1x backbone  could be specified by:
```
model_type = models.mmdet.retinanet
backbone = model_type.backbones.resnet50_fpn_1x(pretrained=True)
```
As pretrained models are used by default, we typically leave this out of the backbone creation step.

We've selected a few of the many options below. You can easily pick which option you want to try by setting the value of `selection`. This shows you how easy it is to try new libraries, models, and backbones.


In [37]:
# Just change the value of selection to try another model

selection = 12

extra_args = {}

if selection == 0:
  model_type = models.mmdet.vfnet
  backbone = model_type.backbones.resnet50_fpn_mstrain_2x

if selection == 1:
  model_type = models.mmdet.retinanet
  backbone = model_type.backbones.resnet50_fpn_1x
  # extra_args['cfg_options'] = { 
  #   'model.bbox_head.loss_bbox.loss_weight': 2,
  #   'model.bbox_head.loss_cls.loss_weight': 0.8,
  #    }

if selection == 2:
  model_type = models.mmdet.faster_rcnn
  backbone = model_type.backbones.resnet101_fpn_2x
  # extra_args['cfg_options'] = { 
  #   'model.roi_head.bbox_head.loss_bbox.loss_weight': 2,
  #   'model.roi_head.bbox_head.loss_cls.loss_weight': 0.8,
  #    }

if selection == 3:
  model_type = models.mmdet.ssd
  backbone = model_type.backbones.ssd300

if selection == 4:
  model_type = models.mmdet.yolox
  backbone = model_type.backbones.yolox_s_8x8

if selection == 5:
  model_type = models.mmdet.yolof
  backbone = model_type.backbones.yolof_r50_c5_8x8_1x_coco

if selection == 6:
  model_type = models.mmdet.detr
  backbone = model_type.backbones.r50_8x2_150e_coco

if selection == 7:
  model_type = models.mmdet.deformable_detr
  backbone = model_type.backbones.twostage_refine_r50_16x2_50e_coco

if selection == 8:
  model_type = models.mmdet.fsaf
  backbone = model_type.backbones.x101_64x4d_fpn_1x_coco

if selection == 9:
  model_type = models.mmdet.sabl
  backbone = model_type.backbones.r101_fpn_gn_2x_ms_640_800_coco

if selection == 10:
  model_type = models.mmdet.centripetalnet
  backbone = model_type.backbones.hourglass104_mstest_16x6_210e_coco

elif selection == 11:
  # The Retinanet model is also implemented in the torchvision library
  model_type = models.torchvision.retinanet
  backbone = model_type.backbones.resnet50_fpn

elif selection == 12:
  model_type = models.ross.efficientdet
  backbone = model_type.backbones.tf_lite0
  # The efficientdet model requires an img_size parameter
  extra_args['img_size'] = image_size

elif selection == 13:
  model_type = models.ultralytics.yolov5
  backbone = model_type.backbones.small
  # The yolov5 model requires an img_size parameter
  extra_args['img_size'] = image_size

model_type, extra_args

(<module 'icevision.models.ross.efficientdet' from '/home/pmonniot/anaconda3/envs/wandb-ice/lib/python3.9/site-packages/icevision/models/ross/efficientdet/__init__.py'>,
 {'img_size': 384})

In [38]:
backbone.__dict__

{'model_name': 'tf_efficientdet_lite0'}

In [39]:
# Instantiate the model
model = model_type.model(backbone=backbone(pretrained=True), num_classes=len(parser.class_map), **extra_args) 

In [40]:
# Data Loaders
train_dl = model_type.train_dl(train_ds, batch_size=16, num_workers=10, shuffle=True)
valid_dl = model_type.valid_dl(valid_ds, batch_size=8, num_workers=4, shuffle=False)

In [41]:
# show batch
model_type.show_batch(first(valid_dl), ncols=4)

In [42]:
metrics = [COCOMetric(metric_type=COCOMetricType.bbox)]


In [43]:
class COCOMetric_perclass(COCOMetric):
   def finalize(self) -> Dict[str, float]:
    with CaptureStdout():
        coco_eval = create_coco_eval(
            records=self._records,
            preds=self._preds,
            metric_type=self.metric_type.value,
            iou_thresholds=self.iou_thresholds,
            show_pbar=self.show_pbar,
        )
        coco_eval.params.catIds = self.class_ids #<== Add this row!!!!!!!!!!
        coco_eval.evaluate()
        coco_eval.accumulate()


    with CaptureStdout(propagate_stdout=self.print_summary):
        coco_eval.summarize()
    stats = coco_eval.stats
    logs = {
        "AP (IoU=0.50:0.95) area=all": stats[0],
        "AP (IoU=0.50) area=all": stats[1],
        "AP (IoU=0.75) area=all": stats[2],
        "AP (IoU=0.50:0.95) area=small": stats[3],
        "AP (IoU=0.50:0.95) area=medium": stats[4],
        "AP (IoU=0.50:0.95) area=large": stats[5],
        "AR (IoU=0.50:0.95) area=all maxDets=1": stats[6],
        "AR (IoU=0.50:0.95) area=all maxDets=10": stats[7],
        "AR (IoU=0.50:0.95) area=all maxDets=100": stats[8],
        "AR (IoU=0.50:0.95) area=small maxDets=100": stats[9],
        "AR (IoU=0.50:0.95) area=medium maxDets=100": stats[10],
        "AR (IoU=0.50:0.95) area=large maxDets=100": stats[11],
    }
    self._reset()
    return logs
   
class COCOMetric_Chair(COCOMetric_perclass):class_ids = [1] 
class COCOMetric_Couch(COCOMetric_perclass):class_ids = [2] 
class COCOMetric_TV(COCOMetric_perclass):class_ids = [3] 
class COCOMetric_Remote(COCOMetric_perclass):class_ids = [4] 
class COCOMetric_Book(COCOMetric_perclass):class_ids = [5] 
class COCOMetric_Vase(COCOMetric_perclass):class_ids = [6] 


In [44]:
metrics = [COCOMetric_Chair(), COCOMetric_Couch(), \
               COCOMetric_TV(), COCOMetric_Remote(), COCOMetric_Book(), COCOMetric_Vase()]

In [45]:
from fastai.callback.wandb import *
from fastai.callback.tracker import SaveModelCallback
from fastai.torch_core import set_seed

In [46]:
set_seed(train_config.seed, reproducible=True)

In [47]:
learn = model_type.fastai.learner(dls=[train_dl, valid_dl], model=model, metrics=metrics, cbs=[WandbCallback(log_dataset=True, log_model=True), SaveModelCallback()])

In [None]:
learn.lr_find()

# For Sparse-RCNN, use lower `end_lr`
# learn.lr_find(end_lr=0.005)

In [48]:
learn.fine_tune(1, 0.00145, freeze_epochs=1)

Could not gather input dimensions
WandbCallback could not retrieve the dataset path, please provide it explicitly to "log_dataset"
WandbCallback was not able to prepare a DataLoader for logging prediction samples -> 'Dataset' object has no attribute 'items'


epoch,train_loss,valid_loss,COCOMetric_Chair,COCOMetric_Couch,COCOMetric_TV,COCOMetric_Remote,COCOMetric_Book,COCOMetric_Vase,time


RuntimeError: CUDA out of memory. Tried to allocate 12.00 MiB (GPU 0; 4.00 GiB total capacity; 3.39 GiB already allocated; 0 bytes free; 3.46 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
learn.save_model

In [None]:
infer_dl = model_type.infer_dl(valid_ds, batch_size=8, shuffle=False)
preds = model_type.predict_from_dl(model, infer_dl, keep_images=True)

In [None]:
samples
preds[0].pred.as_dict()

In [None]:
preds["boxes"]

In [None]:
show_preds(preds=preds[:4])

In [None]:
# This crashes
# def get_predictions(learner, test_dl=None, max_n=None):
#     """Return the samples = (x,y) and outputs (model predictions decoded), and predictions (raw preds)"""
#     test_dl = learner.dls.valid if test_dl is None else test_dl
#     inputs, predictions, targets, outputs = learner.get_preds(
#         dl=test_dl, with_input=True, with_decoded=True
#     )
#     x, y, samples, outputs = learner.dls.valid.show_results(
#         tuplify(inputs) + tuplify(targets), outputs, show=False, max_n=max_n
#     )
#     return samples, outputs, predictions

In [None]:
# samples, outputs, predictions = get_predictions(learn)

In [None]:
wandb_images = wandb_img_preds(preds, add_ground_truth=True) 
wandb.log({"Predicted images": wandb_images})


In [None]:
preds[0].pred

In [None]:
# table = create_iou_table(samples, outputs, predictions, )
# wandb.log({"pred_table":table})
wandb.join()