In [1]:
import json
import matplotlib.pyplot as plt
import numpy as np
import os
from os import makedirs
from os.path import isfile, join
from pathlib import Path, PosixPath
import cv2
import bs4
from bs4 import BeautifulSoup
from shutil import copy2
from tqdm import tqdm
from tqdm.notebook import tqdm as ntqdm
from random import shuffle
from utils import Bbox, Segment, ImageAnnot
from numpy.typing import ArrayLike
from typing import Any, Iterable, List, Tuple

plt.rcParams['figure.figsize'] = [20, 15]

In [5]:
# Generate the images before any process for annotations.
# Demonstrate for both segmentation and detection at once

"""
json file:
{
    '/path/to/image_name.png':
        [
            {
                'bbox': [x1, y1, x2, y2],
                'segmentation': [x1 y1 x2 y2 ... xn yn],
                'label': 'ball'
            },
            {
                'bbox': [x1, y1, x2, y2],
                'segmentation': [x1 y1 x2 y2 ... xn yn],
                'label': 'ball'
            }
        ]
}



"""

def cvat_prepare_images_annots(data_file: str, video_file: str, output_path: str, only_bboxes: bool = False):
    assert os.path.isfile(data_file), f"the file {data_file} does not exist"
    data = open(data_file).read()
    cap = cv2.VideoCapture(video_file)
    assert cap.isOpened(), "video is not accessible"
    output_path = join(output_path, 'images', 'train')
    makedirs(output_path, exist_ok=True)
    
    # Read tags and extract the annotated frames.
    bs_data = BeautifulSoup(data, "xml")
    # image_tags = bs_data.find_all('track')
    image_tags = bs_data.find_all('image')
    annots_tags = [i for i in image_tags if i.polygon]
    img_annots = []
    
    for img in tqdm(annots_tags, desc='generating the frames'):
        fno = int(img['id'])
        file_name = f'{Path(video_file).stem}_frame_{fno}'
        file_path = join(output_path, file_name+'.png')
        if not Path(file_path).is_file():
            cap.set(1, fno)
            status, frame = cap.read()
            cv2.imwrite(file_path, frame)
            
        img_annot = ImageAnnot(file_path)
        # Convert to yolo
        polygons = [tag for tag in img.children if tag.name == 'polygon']
        for poly in polygons:
            points = poly['points']
            points = [int(float(item)) for item in points.replace(';', ' ').replace(',', ' ').split(' ')]
            segment = Segment(points, label=0)
            img_annot.add_segment(segment)

        img_annot.save_labels(save_path=output_path, train=True, only_bboxes=only_bboxes)

data_file = '../input/videos/train/annotations/8.xml'
video_file = '../input/videos/train/8.mp4'
output_path = '../new/new'

cvat_prepare_images_annots(data_file=data_file , video_file=video_file, output_path= output_path, only_bboxes=False)


generating the frames: 100%|██████████████████| 319/319 [00:35<00:00,  9.04it/s]


In [4]:
xml = '../input/videos/8.xml'
vid = '../input/videos/8.mp4'

with open(xml, 'r') as f:
    data = f.read()

# the beautifulsoup parser, storing
# the returned object 
bs_data = BeautifulSoup(data, "xml")

# Finding all instances of tag
# `unique`
images = bs_data.find_all('image')
annotated_images = [i for i in images if i.polygon is not None]
print(len(images))
print(len(annotated_images))


t = annotated_images[0]
width = t['width']
height = t['height']
frame_id = t.id
points = t.polygon['points']


5001
319


In [5]:
t

<image height="1080" id="15050" name="frame_015050" width="1920">
<polygon label="ball" occluded="0" points="525.30,589.54;524.71,579.28;524.71,574.58;525.59,569.89;527.65,565.78;530.87,562.26;533.81,558.16;535.86,554.05;540.55,552.88;544.95,551.70;549.36,550.82;554.05,550.82;558.74,551.12;561.68,554.64;565.49,557.57;568.72,560.80;570.18,565.78;571.06,570.48;572.24,574.88;571.94,579.57;571.65,584.26;570.77,589.25;569.30,593.94;568.13,598.34;566.37,602.74;562.56,605.38;558.16,606.85;553.46,608.61;549.06,610.08;544.07,611.25;539.38,611.54;534.69,610.37;531.75,606.85;529.99,602.74;527.65,598.64" source="manual" z_order="0">
</polygon>
</image>

In [6]:
with open('../input/annotations.xml') as file:
    data = file.read()

bs_data = BeautifulSoup(data, "xml")

images = bs_data.find_all('image')
annotated_images = [i for i in images if i.polygon is not None]
print(len(images))
print(len(annotated_images))

3955
25


In [26]:
text2pts = lambda text_points: [int(float(item)) for item in [text_points.replace(';', ' ').replace(',', ' ').split(' ')]] 
find_children = lambda tag, name: [(name, child['label'], child['points']) for child in tag.children if child.name == name]
find_children(annotated_images[-1], 'polygon')
boxes = find_children(image, 'box')

[<polygon label="ball" occluded="0" points="466.10,131.80;463.98,129.67;461.85,127.73;460.79,125.25;460.61,122.60;461.68,120.12;463.09,117.82;465.21,116.05;467.51,114.46;470.52,113.58;473.53,114.29;476.00,115.52;478.13,117.65;480.42,119.42;481.49,121.89;481.13,124.54;479.19,126.67;476.36,127.73;473.53,127.55;470.87,127.73;470.17,130.38;468.04,132.33;465.39,132.50" source="manual" z_order="0">
 </polygon>,
 <polygon label="ball" occluded="0" points="467.52,186.20;460.19,184.37;453.58,182.90;445.15,181.80;437.81,179.23;432.31,175.93;427.54,171.17;426.81,164.56;430.11,159.43;435.98,155.76;442.58,154.29;450.65,152.46;456.15,152.09;463.49,152.09;470.09,153.19;476.32,153.19;482.93,152.83;490.63,152.46;496.50,152.46;504.20,152.83;509.70,155.03;515.20,157.96;519.24,162.00;521.80,167.86;522.17,174.83;521.07,180.70;515.94,186.20;508.23,189.50;502.00,192.07;495.76,193.91;490.26,194.64;484.03,196.11;478.16,196.11;472.29,193.17" source="manual" z_order="0">
 </polygon>]

In [28]:
[i for i in annotated_images[-1].find_all('polygon')]

<image height="720" id="2651" name="frame_002651" width="1280">
<polygon label="ball" occluded="0" points="466.10,131.80;463.98,129.67;461.85,127.73;460.79,125.25;460.61,122.60;461.68,120.12;463.09,117.82;465.21,116.05;467.51,114.46;470.52,113.58;473.53,114.29;476.00,115.52;478.13,117.65;480.42,119.42;481.49,121.89;481.13,124.54;479.19,126.67;476.36,127.73;473.53,127.55;470.87,127.73;470.17,130.38;468.04,132.33;465.39,132.50" source="manual" z_order="0">
</polygon>
<polygon label="ball" occluded="0" points="467.52,186.20;460.19,184.37;453.58,182.90;445.15,181.80;437.81,179.23;432.31,175.93;427.54,171.17;426.81,164.56;430.11,159.43;435.98,155.76;442.58,154.29;450.65,152.46;456.15,152.09;463.49,152.09;470.09,153.19;476.32,153.19;482.93,152.83;490.63,152.46;496.50,152.46;504.20,152.83;509.70,155.03;515.20,157.96;519.24,162.00;521.80,167.86;522.17,174.83;521.07,180.70;515.94,186.20;508.23,189.50;502.00,192.07;495.76,193.91;490.26,194.64;484.03,196.11;478.16,196.11;472.29,193.17" source="ma

In [14]:
xys = t.polygon['points'].replace(';', ' ').replace(',', ' ').split(' ')
xys = [int(float(item)) for item in xys]
xys

[525,
 589,
 524,
 579,
 524,
 574,
 525,
 569,
 527,
 565,
 530,
 562,
 533,
 558,
 535,
 554,
 540,
 552,
 544,
 551,
 549,
 550,
 554,
 550,
 558,
 551,
 561,
 554,
 565,
 557,
 568,
 560,
 570,
 565,
 571,
 570,
 572,
 574,
 571,
 579,
 571,
 584,
 570,
 589,
 569,
 593,
 568,
 598,
 566,
 602,
 562,
 605,
 558,
 606,
 553,
 608,
 549,
 610,
 544,
 611,
 539,
 611,
 534,
 610,
 531,
 606,
 529,
 602,
 527,
 598]