Performs bb annotation correction on the existing labels provided in the dataset

In [0]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
import cv2
import os
import tarfile
import pandas as pd

from skimage import io, draw
from tqdm import tqdm

In [0]:
# This location is from the dataset in google drive
dataset_tar_loc = 'drive/My Drive/Project STation/detection.tar'
dataset_extract_dst = 'detection/'

with tarfile.open(dataset_tar_loc, 'r') as tar:
    tar.extractall(dataset_extract_dst)

## Dataset loading and preprocessing
One time setup for generating the csv file for annotations for training images

1) Bounding box correction in the images (The bounding boxes provided in the dataset are not rectangular and are tilted in orientation. We need to correct this.)

2) Create a csv file containing the following cols: img_name, bb_coords, height, width, label

In [0]:
def get_box_attrs(annotation_file_path):
    with open(annotation_file_path, 'r') as annotation:
        box_attrs = list()
        gt = annotation.readlines()
        for box in gt:
            attrs = {}
            coords = box.split(',')[:8]
            coords = [int(coord) for coord in coords]
            coords = _correct_gt_bb(coords)
            attrs['coords'] = coords
            attrs['height'] = abs(coords[7] - coords[4])  # height = abs(y1 - y4)
            attrs['width'] = abs(coords[1] - coords[0])  # width = abs(x1 - x2)
            attrs['c_coords'] = [(coords[0] + coords[1]) / 2, (coords[4] + coords[7]) / 2]

            # Get the label for the annotation
            label = box.split(',')[-1].split('::')[-1].rstrip('\n')

            # We dont count boxes labeled as `ENGLISH`
            if label == 'ENGLISH':
                continue
            attrs['label'] = label
            box_attrs.append(attrs)
    return box_attrs

In [0]:
def _correct_gt_bb(coord):
    x1, x2, x3, x4, y1, y2, y3, y4 = coord[0],coord[1],coord[2],coord[3],coord[4],coord[5],coord[6],coord[7]
    x1 = x4 = min(x1, x4)
    x2 = x3 = max(x2, x3)
    y1 = y2 = min(y1, y2)
    y3 = y4 = max(y3, y4)
    return (x1, x2, x3, x4, y1, y2, y3, y4)

In [0]:
# Plot some bounding boxes on some images for a sanity check
image_loc = os.path.join(dataset_extract_dst, 'real_Image_dataset_Detection/Image/')
annotation_loc = os.path.join(dataset_extract_dst, 'real_Image_dataset_Detection/Annotation/')
image_list = [os.path.join(image_loc, name) for name in os.listdir(image_loc)]

cols = ['image', 'coords', 'height', 'width', 'c_coords', 'label']
ground_truth = pd.DataFrame(columns=cols)

for image_path in tqdm(image_list):
    data_record = {}
    image_name = os.path.splitext(os.path.basename(image_path))[0]
    data_record['image'] = image_path

    annotation_name = f"{image_name}.txt"
    annotation_path = os.path.join(annotation_loc, annotation_name)
    box_attrs = get_box_attrs(annotation_path)
    if len(box_attrs) == 0:
        continue

    data_record['coords'] = [box['coords'] for box in box_attrs]
    data_record['c_coords'] = [box['c_coords'] for box in box_attrs]
    data_record['height'] = [box['height'] for box in box_attrs]
    data_record['width'] = [box['width'] for box in box_attrs]
    data_record['label'] = [box['label'] for box in box_attrs]

    # Add this record to the dataframe
    ground_truth = ground_truth.append(data_record, ignore_index=True)

100%|██████████| 428/428 [00:01<00:00, 295.32it/s]


In [0]:
ground_truth.head()

Unnamed: 0,image,coords,height,width,c_coords,label
0,detection/real_Image_dataset_Detection/Image/1...,"[(286, 683, 683, 286, 25, 25, 204, 204), (268,...","[179, 155, 110]","[397, 412, 402]","[[484.5, 114.5], [474.0, 271.5], [460.0, 404.0]]","[HINDI, HINDI, HINDI]"
1,detection/real_Image_dataset_Detection/Image/3...,"[(59, 157, 157, 59, 201, 201, 261, 261), (159,...","[60, 74, 73, 85, 39, 47, 49, 57, 49]","[98, 169, 149, 167, 185, 175, 134, 54, 31]","[[108.0, 231.0], [243.5, 222.0], [407.5, 218.5...","[HINDI, HINDI, HINDI, HINDI, HINDI, HINDI, HIN..."
2,detection/real_Image_dataset_Detection/Image/1...,"[(5, 272, 272, 5, 183, 183, 400, 400), (257, 4...","[217, 196, 225]","[267, 175, 277]","[[138.5, 291.5], [344.5, 245.0], [571.5, 286.5]]","[HINDI, HINDI, HINDI]"
3,detection/real_Image_dataset_Detection/Image/1...,"[(240, 310, 310, 240, 28, 28, 70, 70), (304, 3...","[42, 56, 46, 172, 137, 96]","[70, 50, 64, 273, 198, 167]","[[275.0, 49.0], [329.0, 50.0], [389.0, 49.0], ...","[HINDI, HINDI, HINDI, HINDI, HINDI, HINDI]"
4,detection/real_Image_dataset_Detection/Image/2...,"[(321, 487, 487, 321, 160, 160, 203, 203), (42...","[43, 70, 65, 80, 48, 46, 41, 34, 38, 46]","[166, 190, 160, 208, 73, 54, 107, 72, 59, 143]","[[404.0, 181.5], [137.0, 253.0], [310.0, 236.5...","[HINDI, HINDI, HINDI, HINDI, HINDI, HINDI, HIN..."


In [0]:
# export the dataframe to a csv file
ground_truth.to_csv('annotations.csv', index=False)