In [1]:
import json
import matplotlib.pyplot as plt
import numpy as np
from os import makedirs
from os.path import isfile, join
from pathlib import Path, PosixPath
import cv2
import bs4
from bs4 import BeautifulSoup
from shutil import copy2
from tqdm import tqdm
from tqdm.notebook import tqdm as ntqdm
from random import shuffle

from numpy.typing import ArrayLike
from typing import Any, Iterable, List, Tuple

plt.rcParams['figure.figsize'] = [20, 15]

In [29]:
"""
Yolo format 

dataset.yaml
├── images
│   ├── test
│   ├── train
│   └── val
└── labels
    ├── test
    ├── train
    └── val
    

"""


class Bbox:
    def __init__(self, xyxy: list | tuple, label: int):
        self.x1 = int(xyxy[0])
        self.y1 = int(xyxy[1])
        self.x2 = int(xyxy[2])
        self.y2 = int(xyxy[3])
        self.label = label
        # Fixme: Adapt label to COCO format in future.
        self.pt1 = (self.x1, self.y1)
        self.pt2 = (self.x2, self.y2)
        self.width = abs(self.x2 - self.x1)
        self.height = abs(self.y2 - self.y1)

    def to_coco(self) -> List[int]:
        return [self.x1, self.y1, self.width, self.height]

    def to_yolo(self, img_width: int, img_height: int) -> List[float]:
        x_cen = self.x1 + self.width/2
        y_cen = self.y1 + self.height/2
        x_cen = x_cen / img_width
        y_cen = y_cen / img_height
        width = self.width / img_width
        height = self.height / img_height
        return [x_cen, y_cen, width, height]
    
    def draw(self, img: ArrayLike, color: tuple = (255, 0, 0)) -> ArrayLike:
        img = cv2.rectangle(img, self.pt1, self.pt2, color, 2)
        return img
    
class Segment:
    def __init__(self, input_annot: List[float | int], label: int):
        """
        Args:
            input_annot(list): Segmentation mask points in form of "x1 y1 x2 y2 ... xn yn"
            label(int): The digit denoting the label of the item.
        """
        self.input_annot_type = input_type
        self.input_annot = input_annot
        self.pts = self.chunk(input_annot)
        self.label = label
    
    def chunk(self, arr: List) -> List[ArrayLike]:
        return [np.array(arr[i:i + 2]).reshape((-1,1,2)).astype(np.int32) for i in range(0, len(arr), 2)]

    def segment_to_yolo(self, img_w, img_h):
        text = ""
        for i, p in enumerate(self.input_annot):
            if i % 2 == 0: # x points / img_w
                text += f" {p/img_w:.3f}"
            else: # y points / img_h
                text += f" {p/img_h:.3f}"
        return text
    
    def get_bbox(self) -> Bbox:
        Xs = [int(item) for i, item in enumerate(self.input_annot) if i%2 == 0]
        Ys = [int(item) for i, item in enumerate(self.input_annot) if i%2 == 1]
        x1, y1 = max(Xs), max(Ys)
        x2, y2 = min(Xs), min(Ys)
        bbox = Bbox([x1, y1, x2, y2], self.label)
        return bbox
    
    def draw(self, img: ArrayLike, color: tuple = (0, 255, 0), draw_bbox = True) -> ArrayLike:
        img = cv2.drawContours(img, self.pts, -1, color, 3)
        bbox: Bbox = self.get_bbox()
        if draw_bbox:
            img = bbox.draw(img, color=color)
        return img

    
class ImageAnnot:
    # Fixme: Adapt the framework for training bbox annotated project. right now it is decent for segmentation project.
    # Decouple Segment with Bbox.
    def __init__(self, img_path: str | PosixPath):
        """
        img_path(str | pathlib.PosixPath): image path
        
        """
        self.img_path = Path(img_path)
        assert self.img_path.is_file()
        self.name = self.img_path.name
        self.segments = []
        self.img_w, self.img_h = self.get_img_size()
    
    def get_img_size(self):
        img = cv2.imread(self.img_path.as_posix())
        return img.shape[1], img.shape[0]
        
    def add_segment(self, segment: Segment) -> None:
        self.segments.append(segment)

    def get_yolo_format(self, only_bboxes=True):
        """
        the first five numbers still encode the class index and 
        bounding box information. The rest of the numbers encode 
        the boundary of the object that we are trying to segment. 
        Starting from the 6th number, we have space-separated 
        x-y coordinates for each point on the boundary of the 
        object for the segmentation mask.
        
        """
        temp = ""
        for i, item in enumerate(self.segments):
            label = item.label
            x_cen, y_cen, w, h = item.get_bbox().to_yolo(self.img_w, self.img_h)
            temp = f"{label} {x_cen} {y_cen} {w} {h}"
            
            if not only_bboxes:
                segment_txt = item.segment_to_yolo(self.img_w, self.img_h)
                temp += segment_txt

            if i != len(self.segments)-1:
                temp += '\n'
        return temp

    def save_labels(self, save_path: str = "base_dir", train: bool = True, only_bboxes: bool = True):
        output = self.get_yolo_format(only_bboxes=only_bboxes)
        file_name = self.img_path.stem
        
        if train:
            img_path = join(save_path, 'images', 'train')
            label_path = join(save_path, 'labels', 'train')
        else:
            img_path = join(save_path, 'images', 'val')
            label_path = join(save_path, 'labels', 'val')

        makedirs(label_path, exist_ok=True)
        makedirs(img_path, exist_ok=True)
        
        label_path = join(label_path, file_name + '.txt')
        copy2(self.img_path.as_posix(), img_path)
        with open(label_path, 'w') as file:
            file.write(output)
        
    def get_coco_format(self):
        pass

    def img_show(self, color: tuple = (0, 255, 0)):
        img = cv2.imread(self.img_path.as_posix())
        img = cv2.cvtColor(img, 4)
        for segment in self.segments:
            img = segment.draw(img, color=color, draw_bbox=True)
        return img
    
    
    

In [18]:
# Generate the images before any process for annotations.
# Demonstrate for both segmentation and detection at once



class AnnotationPreprocessor:
    def __init__(self, data_dict: dict):
        """
        data_dict (dict): it's a dictionary that contains the format_type as the key, and 
                          all depending items of the data format.
        
        # TODO: convert data into a middle standard format, then convert it to each one if requested.
        
        Supports yolo.
        {'format': 'yolo', 'task': segmentation/detection, 'annotations': 'path/to/yaml_file.yaml', data_path: 'path/to/images'}
        {'format': 'coco', 'task': 'segmentation/detection', 'annotations': 'path/to/annotations.json', data_path: 'path/to/images'}
        {'format': 'cvat', 'task': 'segmentation/detection', 'annotations': 'path/to/cvat.xml', data_path: 'path/to/video_file.mp4'}
        """
        self.format = data_dict['format']
        self.task = data_dict['format'] # segmentation or detection
        self.data_file = data_dict['annotations']
        self.resource_path = data_dict['data_path']
        self.generate_images = data_dict['generate_images']
        self.images, self.annotations = self._preprocess(self)

    def _preprocess(self):
        """
        convert the input format to a middle format.
        """
        match self.format:
            case 'yolo':
                pass
            case 'coco':
                pass
            case 'cvat':
                self.cap = cv2.VideoCapture(self.resource_path)
                assert self.cap.isOpened(), "video file is not readable!"
                image_tags: List[bs4.element.Tag] = cvat_read_data(self.data_file)
                images, annotations = self._preprocess_cvat_data(image_tags, generate_images=True)


    def cvat_read_data(self, data: str) -> List[bs4.element.Tag]:
        with open(xml, 'r') as f:
            data = f.read()
        bs_data = BeautifulSoup(data, "xml")
        image_tags = bs_data.find_all('image')
        annots_tags = [i for i in images if i.polygon is not None]
        return annots_tags
    
    @staticmethod
    def process_tag(image_tag: bs4.element.Tag) -> List[Tuple[str, int, int, List[int]]]:
        items = self.find_children(image_tag, 'polygon')
        # TODO: add bbox and segment together support.
        # generate the images in the initialization before initializing the annotations.
        # Search for the image with name in path/to/images + frame_{id}.png and add its path.
        
        # img_annot = ImageAnnot(img_path)
        for item in items:
            _, frame_no, label, points = item
            img_annot.add_annot()
            
        
        
        
        return items
    
    @staticmethod
    def filter_segment_pts(text_points) -> List[int]:
        return [int(float(item)) for item in [text_points.replace(';', ' ').replace(',', ' ').split(' ')]]
    
    def find_children(self, tag) -> List[Tuple[str, int, int, List[int]]]:
        items = []
        frame_no = int(tag['id'])
        for child in tag.children:
            if self.task == 'segmentation':
                output = ('segmentation', frame_no, child['label'], self.filter_segment_pts(child['points']))
                items.append(output)
            # TODO: implement the bbox too.
            # TODO: add support for having both segmentation and bboxes.
        return items
    
    def _preprocess_yolo(self):
        pass
    
    def _preprocess_coco(self):
        pass
    
    def generate_image(self, )
    
    def _preprocess_cvat_data(self, image_tags, generate_images=True):
        """
        TODO: 
            - Fetch multiple bboxes with beautiful soup
            - Fetch multiple segmentations with beautiful soup
            - Write images
        
        """
        segments = []
        for image_tag in image_tags:
            label, points, frame_no = self.process_tag(image_tag)
            if generate_images:
                self.cap.set(1, frame_no)
                status, frame = self.cap.read()
                save_path: PosixPath = Path(self.resource_path).parent / 'images'
                save_path.mkdir(exist_ok=True)
                save_img = save_path / f'frame_{frame_no}.png'
                img_path = save_img.as_posix()
                cv2.imwrite(img_path, frame)

            img_annot = ImageAnnot(img_path)
            
    
    
    def to_yolo(self):
        pass
    
    def to_coco(self):
        pass



SyntaxError: expected ':' (3078298455.py, line 208)

In [19]:
f = Path('images')

In [4]:
xml = '../input/videos/8.xml'
vid = '../input/videos/8.mp4'

with open(xml, 'r') as f:
    data = f.read()

# the beautifulsoup parser, storing
# the returned object 
bs_data = BeautifulSoup(data, "xml")
 
# Finding all instances of tag 
# `unique`
images = bs_data.find_all('image')
annotated_images = [i for i in images if i.polygon is not None]
print(len(images))
print(len(annotated_images))


t = annotated_images[0]
width = t['width']
height = t['height']
frame_id = t.id
points = t.polygon['points']


5001
319


In [5]:
t

<image height="1080" id="15050" name="frame_015050" width="1920">
<polygon label="ball" occluded="0" points="525.30,589.54;524.71,579.28;524.71,574.58;525.59,569.89;527.65,565.78;530.87,562.26;533.81,558.16;535.86,554.05;540.55,552.88;544.95,551.70;549.36,550.82;554.05,550.82;558.74,551.12;561.68,554.64;565.49,557.57;568.72,560.80;570.18,565.78;571.06,570.48;572.24,574.88;571.94,579.57;571.65,584.26;570.77,589.25;569.30,593.94;568.13,598.34;566.37,602.74;562.56,605.38;558.16,606.85;553.46,608.61;549.06,610.08;544.07,611.25;539.38,611.54;534.69,610.37;531.75,606.85;529.99,602.74;527.65,598.64" source="manual" z_order="0">
</polygon>
</image>

In [35]:
width

'1920'

In [36]:
t.width

In [37]:
'ff.ff'.split('.')

['ff', 'ff']

In [3]:
with open('../input/annotations.xml') as file:
    data = file.read()

bs_data = BeautifulSoup(data, "xml")

images = bs_data.find_all('image')
annotated_images = [i for i in images if i.polygon is not None]
print(len(images))
print(len(annotated_images))

3955
25


In [4]:
annotated_images

[<image height="720" id="99" name="frame_000099" width="1280">
 <polygon label="ball" occluded="0" points="806.30,87.50;804.30,86.70;802.70,86.40;801.10,85.80;799.90,84.80;799.20,83.30;798.60,81.70;797.50,80.60;796.10,79.90;794.60,79.30;792.90,78.90;791.70,77.90;791.30,76.40;791.30,74.60;791.60,73.00;792.40,71.30;793.50,69.70;794.60,68.10;796.00,66.70;796.90,65.30;797.90,63.90;798.90,62.70;800.30,62.10;801.80,61.70;803.30,61.40;805.10,61.10;806.50,60.50;808.30,60.30;810.10,60.50;811.60,60.70;813.10,60.80;814.60,61.20;816.00,61.90;817.40,62.70;818.30,64.10;819.60,65.10;820.30,66.80;820.90,68.50;821.50,70.30;821.80,71.90;821.10,73.50;819.90,74.60;819.70,76.40;819.70,78.00;819.70,79.60;819.60,81.50;819.10,83.10;818.00,84.20;816.70,85.00;815.30,86.00;813.80,86.50;812.30,87.20;810.80,87.80;809.30,88.00;807.70,88.10" source="manual" z_order="0">
 </polygon>
 </image>,
 <image height="720" id="107" name="frame_000107" width="1280">
 <polygon label="ball" occluded="0" points="801.06,100.80;797

In [26]:
text2pts = lambda text_points: [int(float(item)) for item in [text_points.replace(';', ' ').replace(',', ' ').split(' ')]] 
find_children = lambda tag, name: [(name, child['label'], child['points']) for child in tag.children if child.name == name]
find_children(annotated_images[-1], 'polygon')
boxes = find_children(image, 'box')

[<polygon label="ball" occluded="0" points="466.10,131.80;463.98,129.67;461.85,127.73;460.79,125.25;460.61,122.60;461.68,120.12;463.09,117.82;465.21,116.05;467.51,114.46;470.52,113.58;473.53,114.29;476.00,115.52;478.13,117.65;480.42,119.42;481.49,121.89;481.13,124.54;479.19,126.67;476.36,127.73;473.53,127.55;470.87,127.73;470.17,130.38;468.04,132.33;465.39,132.50" source="manual" z_order="0">
 </polygon>,
 <polygon label="ball" occluded="0" points="467.52,186.20;460.19,184.37;453.58,182.90;445.15,181.80;437.81,179.23;432.31,175.93;427.54,171.17;426.81,164.56;430.11,159.43;435.98,155.76;442.58,154.29;450.65,152.46;456.15,152.09;463.49,152.09;470.09,153.19;476.32,153.19;482.93,152.83;490.63,152.46;496.50,152.46;504.20,152.83;509.70,155.03;515.20,157.96;519.24,162.00;521.80,167.86;522.17,174.83;521.07,180.70;515.94,186.20;508.23,189.50;502.00,192.07;495.76,193.91;490.26,194.64;484.03,196.11;478.16,196.11;472.29,193.17" source="manual" z_order="0">
 </polygon>]

In [28]:
annotated_images[-1]

<image height="720" id="2651" name="frame_002651" width="1280">
<polygon label="ball" occluded="0" points="466.10,131.80;463.98,129.67;461.85,127.73;460.79,125.25;460.61,122.60;461.68,120.12;463.09,117.82;465.21,116.05;467.51,114.46;470.52,113.58;473.53,114.29;476.00,115.52;478.13,117.65;480.42,119.42;481.49,121.89;481.13,124.54;479.19,126.67;476.36,127.73;473.53,127.55;470.87,127.73;470.17,130.38;468.04,132.33;465.39,132.50" source="manual" z_order="0">
</polygon>
<polygon label="ball" occluded="0" points="467.52,186.20;460.19,184.37;453.58,182.90;445.15,181.80;437.81,179.23;432.31,175.93;427.54,171.17;426.81,164.56;430.11,159.43;435.98,155.76;442.58,154.29;450.65,152.46;456.15,152.09;463.49,152.09;470.09,153.19;476.32,153.19;482.93,152.83;490.63,152.46;496.50,152.46;504.20,152.83;509.70,155.03;515.20,157.96;519.24,162.00;521.80,167.86;522.17,174.83;521.07,180.70;515.94,186.20;508.23,189.50;502.00,192.07;495.76,193.91;490.26,194.64;484.03,196.11;478.16,196.11;472.29,193.17" source="ma

In [27]:
image

NameError: name 'image' is not defined

In [14]:
xys = t.polygon['points'].replace(';', ' ').replace(',', ' ').split(' ')
xys = [int(float(item)) for item in xys]
xys

[525,
 589,
 524,
 579,
 524,
 574,
 525,
 569,
 527,
 565,
 530,
 562,
 533,
 558,
 535,
 554,
 540,
 552,
 544,
 551,
 549,
 550,
 554,
 550,
 558,
 551,
 561,
 554,
 565,
 557,
 568,
 560,
 570,
 565,
 571,
 570,
 572,
 574,
 571,
 579,
 571,
 584,
 570,
 589,
 569,
 593,
 568,
 598,
 566,
 602,
 562,
 605,
 558,
 606,
 553,
 608,
 549,
 610,
 544,
 611,
 539,
 611,
 534,
 610,
 531,
 606,
 529,
 602,
 527,
 598]