In [1]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup

Lets parse annotations from XML file, use all attributes saved by CVAT annotation tool and check which attributes can be ommited.

In [2]:
def parse_annotations_tracks(soup: object):
    frame_annotation_data = list()
    tasks_data = list()
    for task in soup.find_all('task'):
        tasks_data.append({
            'task_id': task.find('id').text,
            'name': task.find('name').text,
        })
    tasks_df = pd.DataFrame(tasks_data)
    tasks_df['camera_id'] = tasks_df['name'].apply(lambda x: x.split('_')[3])
    tasks_df['datetime'] = tasks_df['name'].apply(lambda x: x.split('_')[6].replace('.mkv', ''))
    tasks_df['datetime'] = pd.to_datetime(tasks_df['datetime'], format='%Y%m%dT%H%M%S')
    tasks_df.set_index('task_id', inplace=True)
    for track in soup.findAll('track'):
        track_annotation_dict = {}
        for attribute_name in ['id', 'label', 'source', 'task_id', 'subset']:
            track_annotation_dict[attribute_name] = track[attribute_name]
        label = track['label']
        for box in track.findAll('box'):
            frame_annotation_dict = track_annotation_dict.copy()
            for attribute_name in ['frame', 'keyframe', 'outside', 'occluded', 'xtl', 'ytl', 'xbr', 'ybr', 'z_order']:
                frame_annotation_dict[attribute_name] = box[attribute_name]

            if label == 'spz':
                frame_annotation_dict['text_spz'] = box.find('attribute', {'name': 'text_spz'}).text
            if label == 'spz_alt':
                frame_annotation_dict['text_spz'] = box.find('attribute', {'name': 'spz_alt_text'}).text
            elif label == 'text':
                frame_annotation_dict['unspecified_text'] = box.find('attribute', {'name': 'unspecified_text'}).text

            frame_annotation_dict['name'] = tasks_df.loc[frame_annotation_dict['task_id'], 'name']
            frame_annotation_dict['camera_id'] = tasks_df.loc[frame_annotation_dict['task_id'],'camera_id']
            frame_annotation_dict['datetime'] = tasks_df.loc[frame_annotation_dict['task_id'],'datetime']
            frame_annotation_data.append(frame_annotation_dict)

    frame_annotation_df = pd.DataFrame(frame_annotation_data)
    return frame_annotation_df


with open('project_camera_01_09_to_11_06_23-2023_07_20_12_42_13-cvat for video 1.1.xml', 'r', encoding='utf-8') as f:
	xml_annotations = f.read() 
soup = BeautifulSoup(xml_annotations, 'xml')
frame_annotation_df_1 = parse_annotations_tracks(soup)

with open('project_camera_02_09_to_11_06_23-2023_07_17_13_14_54-cvat for video 1.1.xml', 'r', encoding='utf-8') as f:
    xml_annotations = f.read() 
soup = BeautifulSoup(xml_annotations, 'xml')
frame_annotation_df_2 = parse_annotations_tracks(soup)

frame_annotation_df = pd.concat([frame_annotation_df_1, frame_annotation_df_2])
frame_annotation_df.rename({'id': 'track_id'}, axis=1, inplace=True)
frame_annotation_df.reset_index(drop=True, inplace=True)
frame_annotation_df.shape

(43437, 19)

In [3]:
frame_annotation_df

Unnamed: 0,track_id,label,source,task_id,subset,frame,keyframe,outside,occluded,xtl,ytl,xbr,ybr,z_order,name,camera_id,datetime,text_spz,unspecified_text
0,0,boat,semi-auto,1,default,1087,1,0,0,1238.46,503.65,1425.55,585.23,0,cfg_raw_cam_01_fhd_h265_20230609T050002.mkv,01,2023-06-09 05:00:02,,
1,0,boat,semi-auto,1,default,1088,1,0,0,1238.00,502.00,1425.00,583.00,0,cfg_raw_cam_01_fhd_h265_20230609T050002.mkv,01,2023-06-09 05:00:02,,
2,0,boat,semi-auto,1,default,1089,1,0,0,1239.00,503.00,1426.00,584.00,0,cfg_raw_cam_01_fhd_h265_20230609T050002.mkv,01,2023-06-09 05:00:02,,
3,0,boat,semi-auto,1,default,1090,1,0,0,1239.00,503.00,1426.00,584.00,0,cfg_raw_cam_01_fhd_h265_20230609T050002.mkv,01,2023-06-09 05:00:02,,
4,0,boat,semi-auto,1,default,1091,1,0,0,1239.00,503.00,1426.00,584.00,0,cfg_raw_cam_01_fhd_h265_20230609T050002.mkv,01,2023-06-09 05:00:02,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43432,627,spz_alt,semi-auto,699,default,933691,1,0,0,115.36,527.69,226.60,579.69,0,cfg_raw_cam_02_fhd_h265_20230707T190001.mkv,02,2023-07-07 19:00:01,Martina,
43433,627,spz_alt,semi-auto,699,default,933692,1,0,0,60.00,530.67,171.00,582.67,0,cfg_raw_cam_02_fhd_h265_20230707T190001.mkv,02,2023-07-07 19:00:01,Martina,
43434,627,spz_alt,semi-auto,699,default,933693,1,0,0,11.33,542.00,122.33,594.00,0,cfg_raw_cam_02_fhd_h265_20230707T190001.mkv,02,2023-07-07 19:00:01,Martina,
43435,627,spz_alt,semi-auto,699,default,933694,1,1,0,0.00,543.00,72.33,595.00,0,cfg_raw_cam_02_fhd_h265_20230707T190001.mkv,02,2023-07-07 19:00:01,Martina,


In [4]:
frame_annotation_df.describe()

Unnamed: 0,datetime
count,43437
mean,2023-06-21 15:13:26.134263552
min,2023-06-09 05:00:02
25%,2023-06-09 14:10:03
50%,2023-06-10 13:00:00
75%,2023-07-07 15:10:01
max,2023-07-08 20:00:03


In [5]:
for attribute_name in ['track_id', 'frame']:
    frame_annotation_df[attribute_name] = frame_annotation_df[attribute_name].astype(int)
for attribute_name in ['xtl', 'ytl', 'xbr', 'ybr']:
    frame_annotation_df[attribute_name] = frame_annotation_df[attribute_name].astype(float)

# Boat transit aggregation with boat identification

- Form a boat area transit dataframe with aggregation of boat passing in camera view and boat identification values.
- Fill missing annotation data

In [6]:
def decode_usable_spz(row:pd.Series):
    text_spz = row['text_spz']
    if pd.isna(text_spz):
        return '', 0
    text_spz = text_spz.replace('!', '').replace('#', '')    
    text_spz = text_spz.replace(' ', '')
    if text_spz == '???':
        return '', 0
    
    text_spz_known = text_spz.replace('?', '')
    return text_spz, len(text_spz_known)

frame_annotation_df['decoded_spz'], frame_annotation_df['usable_spz_len'] = zip(*frame_annotation_df.apply(decode_usable_spz, axis=1))

In [7]:
frame_annotation_df

Unnamed: 0,track_id,label,source,task_id,subset,frame,keyframe,outside,occluded,xtl,...,xbr,ybr,z_order,name,camera_id,datetime,text_spz,unspecified_text,decoded_spz,usable_spz_len
0,0,boat,semi-auto,1,default,1087,1,0,0,1238.46,...,1425.55,585.23,0,cfg_raw_cam_01_fhd_h265_20230609T050002.mkv,01,2023-06-09 05:00:02,,,,0
1,0,boat,semi-auto,1,default,1088,1,0,0,1238.00,...,1425.00,583.00,0,cfg_raw_cam_01_fhd_h265_20230609T050002.mkv,01,2023-06-09 05:00:02,,,,0
2,0,boat,semi-auto,1,default,1089,1,0,0,1239.00,...,1426.00,584.00,0,cfg_raw_cam_01_fhd_h265_20230609T050002.mkv,01,2023-06-09 05:00:02,,,,0
3,0,boat,semi-auto,1,default,1090,1,0,0,1239.00,...,1426.00,584.00,0,cfg_raw_cam_01_fhd_h265_20230609T050002.mkv,01,2023-06-09 05:00:02,,,,0
4,0,boat,semi-auto,1,default,1091,1,0,0,1239.00,...,1426.00,584.00,0,cfg_raw_cam_01_fhd_h265_20230609T050002.mkv,01,2023-06-09 05:00:02,,,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43432,627,spz_alt,semi-auto,699,default,933691,1,0,0,115.36,...,226.60,579.69,0,cfg_raw_cam_02_fhd_h265_20230707T190001.mkv,02,2023-07-07 19:00:01,Martina,,Martina,7
43433,627,spz_alt,semi-auto,699,default,933692,1,0,0,60.00,...,171.00,582.67,0,cfg_raw_cam_02_fhd_h265_20230707T190001.mkv,02,2023-07-07 19:00:01,Martina,,Martina,7
43434,627,spz_alt,semi-auto,699,default,933693,1,0,0,11.33,...,122.33,594.00,0,cfg_raw_cam_02_fhd_h265_20230707T190001.mkv,02,2023-07-07 19:00:01,Martina,,Martina,7
43435,627,spz_alt,semi-auto,699,default,933694,1,1,0,0.00,...,72.33,595.00,0,cfg_raw_cam_02_fhd_h265_20230707T190001.mkv,02,2023-07-07 19:00:01,Martina,,Martina,7


Attributes with unique value count equal 1 can be droped from dataframe.

In [8]:
frame_annotation_df.label.value_counts()

label
boat        24769
spz          7895
spz_alt      3415
anomaly      3384
text         3300
boat_alt      674
Name: count, dtype: int64

In [9]:
frame_annotation_df.groupby(['camera_id','label']).track_id.unique()

camera_id  label   
01         anomaly                     [3, 101, 108, 113, 118, 123, 182]
           boat        [0, 1, 4, 8, 13, 15, 19, 24, 27, 30, 35, 39, 4...
           boat_alt                                      [184, 202, 231]
           spz         [2, 5, 11, 16, 20, 25, 28, 31, 36, 40, 47, 52,...
           spz_alt                                  [187, 232, 310, 349]
           text        [6, 7, 9, 10, 12, 14, 17, 18, 21, 22, 23, 26, ...
02         anomaly                [54, 55, 126, 423, 437, 494, 585, 628]
           boat        [0, 3, 6, 8, 10, 13, 16, 19, 21, 24, 27, 30, 3...
           boat_alt         [53, 183, 187, 194, 199, 278, 317, 478, 571]
           spz         [1, 4, 7, 11, 14, 17, 20, 22, 25, 28, 32, 34, ...
           spz_alt     [2, 5, 9, 12, 15, 18, 23, 26, 29, 31, 41, 43, ...
Name: track_id, dtype: object

In [10]:
frame_annotation_df.groupby(['camera_id','label']).track_id.unique().apply(len)

camera_id  label   
01         anomaly       7
           boat        126
           boat_alt      3
           spz          93
           spz_alt       4
           text        119
02         anomaly       8
           boat        275
           boat_alt      9
           spz         210
           spz_alt     127
Name: track_id, dtype: int64

Parse table of boat traversals. Find a tracks connected to each other. The ones that are relevant for boat traversal record.

In [11]:
tracks_frame_range_df = frame_annotation_df.groupby(['camera_id', 'track_id']).frame.agg(['min', 'max']).sort_index().join(frame_annotation_df.groupby('track_id').label.first())
tracks_frame_range_df

Unnamed: 0_level_0,Unnamed: 1_level_0,min,max,label
camera_id,track_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
01,0,1087,1108,boat
01,1,25048,25143,boat
01,2,25090,25129,spz
01,3,37492,37530,anomaly
01,4,58362,58428,boat
...,...,...,...,...
02,624,935635,935663,spz
02,625,933659,933704,boat
02,626,933659,933695,spz
02,627,933659,933694,spz_alt


In [12]:
def select_representative_text_from_frames_series(text_series: pd.Series):
    text_series = text_series.dropna()
    if text_series.empty:
        return np.nan
    text_series = text_series.str.replace('#', '').replace('!', '')

    text_df = text_series.reset_index()
    text_df.columns = ['frame', 'text']
    text_df['len'] = text_df.text.str.len()
    return text_df.sort_values('len', ascending=False).iloc[0]['text']

In [13]:
boats_traversal_df = tracks_frame_range_df[tracks_frame_range_df.label.isin(['boat', 'boat_alt'])].copy().reset_index()
boats_traversal_df.columns = ['camera_id', 'boat_track_id', 'frame_id_min', 'frame_id_max', 'label']
boats_traversal_df['decoded_spz'] = np.nan
boats_traversal_df['decoded_text'] = np.nan

for id, row in list(boats_traversal_df.iterrows()):
    camera_tracks_frame_range_df = tracks_frame_range_df.loc[row.camera_id]
    additional_boat_info_df = camera_tracks_frame_range_df[(camera_tracks_frame_range_df['min'] >= row.frame_id_min) & (camera_tracks_frame_range_df['max'] <= row.frame_id_max) & (camera_tracks_frame_range_df['label'] != 'boat')]
    # print(additional_boat_info_df)
    decoded_info_msg = f'For boat track_id={row.boat_track_id}:'
    if additional_boat_info_df.size > 0:
        spz_info = additional_boat_info_df[additional_boat_info_df.label == ('spz_alt' if row.label == 'boat_alt' else 'spz')] 
        # print(spz_info.iloc[0].name)
        if spz_info.size > 0:
            decoded_spz = frame_annotation_df[(frame_annotation_df.track_id == spz_info.iloc[0].name)][['text_spz', 'decoded_spz', 'usable_spz_len']].sort_values('usable_spz_len', ascending=False).iloc[0]['decoded_spz']
            boats_traversal_df.loc[id, 'decoded_spz'] = decoded_spz
            decoded_info_msg += f' decoded_spz={decoded_spz},'
        else:
            decoded_info_msg += f' no spz found,'

        text_info = additional_boat_info_df[additional_boat_info_df.label == 'text']
        if text_info.size > 0:    
            decoded_text = ';'.join([select_representative_text_from_frames_series(frame_annotation_df[(frame_annotation_df.track_id == text_track_id)]['unspecified_text']) for text_track_id in text_info.index])
            boats_traversal_df.loc[id, 'decoded_text'] = decoded_text
            decoded_info_msg += f' decoded_text={decoded_text},'
        else:
            decoded_info_msg += f' no text found,'

        other_info = additional_boat_info_df[~additional_boat_info_df.label.isin(['spz', 'text'])]
        if other_info.size > 0:
            decoded_info_msg += f' other_info={other_info.to_dict()},'
    else:
        decoded_info_msg += f' no additional info was found.'

    print(decoded_info_msg)
        
boats_traversal_df

For boat track_id=0: no additional info was found.
For boat track_id=1: decoded_spz=www.hausbotvilma.cz, no text found,
For boat track_id=4: decoded_spz=303451-P, decoded_text=www.housbotvilma.cz;Max. 10 Osob,
For boat track_id=8: decoded_spz=303768-P, decoded_text=BLATOUCH;PŮJČOVNA-3;pronajemhousebotu.cz,
For boat track_id=13: no spz found, decoded_text=LINKA STRÁŽNICE - PETROC A ZPĚT Tel.: 603 371 350,
For boat track_id=15: decoded_spz=3043?4?, decoded_text=Amálka;???,
For boat track_id=19: decoded_spz=600711-P, decoded_text=SASANKA;PŮJČOVNA-4;pronajemhousbotu.cz,
For boat track_id=24: decoded_spz=600042-P, decoded_text=MAX. 8 osob,
For boat track_id=27: decoded_spz=P-600752, decoded_text=???,
For boat track_id=30: decoded_spz=BRITA, decoded_text=???;???;???,
For boat track_id=35: decoded_spz=, decoded_text=???;???,
For boat track_id=39: decoded_spz=600409, decoded_text=???;???;???,
For boat track_id=44: no spz found, decoded_text=???,
For boat track_id=46: decoded_spz=, decoded_text

For boat track_id=451: no spz found, no text found, other_info={'min': {452: 842587}, 'max': {452: 842596}, 'label': {452: 'spz_alt'}},
For boat track_id=453: decoded_spz=600239-P, no text found, other_info={'min': {455: 843205}, 'max': {455: 843228}, 'label': {455: 'spz_alt'}},
For boat track_id=456: decoded_spz=600042-P, no text found,
For boat track_id=458: decoded_spz=601302-P, no text found,
For boat track_id=460: decoded_spz=60029f?, no text found, other_info={'min': {461: 846474}, 'max': {461: 846505}, 'label': {461: 'spz_alt'}},
For boat track_id=463: decoded_spz=500954, no text found,
For boat track_id=465: decoded_spz=601723-P, no text found,
For boat track_id=467: no spz found, no text found, other_info={'min': {468: 850760}, 'max': {468: 850783}, 'label': {468: 'spz_alt'}},
For boat track_id=469: decoded_spz=?00f792, no text found, other_info={'min': {470: 851512}, 'max': {470: 851533}, 'label': {470: 'spz_alt'}},
For boat track_id=472: decoded_spz=, no text found,
For boat

Unnamed: 0,camera_id,boat_track_id,frame_id_min,frame_id_max,label,decoded_spz,decoded_text
0,01,0,1087,1108,boat,,
1,01,1,25048,25143,boat,www.hausbotvilma.cz,
2,01,4,58362,58428,boat,303451-P,www.housbotvilma.cz;Max. 10 Osob
3,01,8,60451,60527,boat,303768-P,BLATOUCH;PŮJČOVNA-3;pronajemhousebotu.cz
4,01,13,61683,61760,boat,,LINKA STRÁŽNICE - PETROC A ZPĚT Tel.: 603 371 350
...,...,...,...,...,...,...,...
379,02,616,926478,926503,boat,600042-P,
380,02,618,928791,928829,boat,302254,
381,02,620,933649,933658,boat,6002??,
382,02,623,935625,935677,boat,303331,


In [14]:
boats_traversal_df.to_csv('decoded_boat_traversals.csv')