In [1]:
# !python -m pip install -r ../requirements.txt
# !python -m pip install -e ../libs

# What is this notebook for ?

As a pretask to annotate the training data, we will do below tasks
1. Filter the train data split prepared in owlv2_datasetprep_1.ipynb notebook
2. Annotate the images with bounding boxes predicted by owlv2 model
3. Prepare the dataset and upload to roboflow
4. (in roboflow) adjust the bounding boxes and labels.

## Validate the train/val before pre-annotating

In [2]:
from binsense.config import BIN_DATA_DIR
import pandas as pd
import os

file_path = os.path.join(BIN_DATA_DIR, 'train_test_val_split_item.csv')
item_df = pd.read_csv(file_path, dtype={'image_name': str, 'bbox_label': str})
item_df = item_df[["image_name", "tag", "bbox_label", "bbox_count"]]

# filter for train data and get bin_ids
train_item_df = item_df[item_df["tag"] != "test"]
train_bin_df = train_item_df[["image_name", "tag"]].drop_duplicates(ignore_index=True)
print('check(No duplicate bins):', train_bin_df.shape[0] == train_bin_df["image_name"].nunique())

check(No duplicate bins): True


## Pre-annotate the images with the bounding boxes predicted by OWLv2 model
We can use any other model. I found OWLv2 (based on CLIP, trained on social net images) seems to be good enough for now. Haven't tried other models.

In [4]:
from binsense.cli.owlv2.run_preannotator import run_annotate

import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s : %(message)s')

run_annotate(batch_size=2, test_run=True)

2024-03-30 19:12:14,520 INFO : backed up /Users/n0c09jf/code/github/binsense/_data/bin/pre-annotate_labels.cpt as /Users/n0c09jf/code/github/binsense/_data/bin/pre-annotate_labels.cpt.11.bkp
2024-03-30 19:12:14,525 INFO : backed up /Users/n0c09jf/code/github/binsense/_data/bin/pre-annotate_bboxes.cpt as /Users/n0c09jf/code/github/binsense/_data/bin/pre-annotate_bboxes.cpt.12.bkp
2024-03-30 19:12:45,531 INFO : predicting bboxes:   0%|          | 0/5 [00:30<?, ?it/s]
2024-03-30 19:13:27,643 INFO : predicting bboxes:  20%|██        | 1/5 [01:12<02:03, 30.80s/it]
2024-03-30 19:14:16,297 INFO : predicting bboxes:  40%|████      | 2/5 [02:01<01:52, 37.45s/it]
2024-03-30 19:15:04,142 INFO : predicting bboxes:  60%|██████    | 3/5 [02:49<01:25, 42.57s/it]
2024-03-30 19:15:50,592 INFO : predicting bboxes:  80%|████████  | 4/5 [03:35<00:44, 44.65s/it]


('/Users/n0c09jf/code/github/binsense/_data/bin/pre-annotate_labels.cpt',
 '/Users/n0c09jf/code/github/binsense/_data/bin/pre-annotate_bboxes.cpt')

## Prepare the folder in Yolov8 format to be uploaded to Roboflow
Roboflow supports only yolov8 format to upload using browser. I tried COCO and VOC formats but I had issues with seeing the annotations after uploading.

In [1]:
from binsense.cli.owlv2.run_preannotator import run_create_dataset

run_create_dataset()

upload dir (/Users/n0c09jf/code/github/binsense/_data/bin/robo_upload) prepared in yolov8 format


## Manually upload the directory prepared on app.roboflow.com
1. Create a object detection project with 'bins' as annotation group
2. Select the project, go to 'Upload Data' selection
3. Drag and drop the folder prepared (above cell)

## Prepare csv file to be sharded with Annotators

In [2]:

from binsense.config import BIN_DATA_DIR
import binsense.dataprep.metadata as m
import os, logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s : %(message)s')

_, all_item_df = m.load()
all_item_df["image_name"] = all_item_df["bin_id"] + '.jpg'
first_column = all_item_df.pop('image_name') 
all_item_df.insert(0, 'image_name', first_column) 
all_item_df.sort_values(by=["bin_id", "item_id"], inplace=True)
all_item_df.drop(["bin_id"], axis=1, inplace=True)
all_item_df.to_csv(
    os.path.join(BIN_DATA_DIR, 'data_for_annotators.csv'),
    index=False
)

loading bin-metadata: 100%|██████████| 3875/3875 [01:23<00:00, 46.19it/s]
