# Preprocessing of the annotations of the i-LIDS datasets

In [1]:
from __future__ import annotations

from collections import OrderedDict
from datetime import time
from functools import reduce

from glob import glob

from json import dump
import os
from pathlib import Path

import subprocess

from typing import Any, Dict, Union, List, Optional

from chardet.universaldetector import UniversalDetector
import datamodel_code_generator
from pydantic import BaseModel, validator
import pandas as pd
import xmltodict

from ffprobe_models import Video

## SZTE

The **SZTE** subfolder has a structure as following:

```
SZTE/
├── calibration/        # Don't care, old .tif files
├── index-files/        # Html etc. files for their web interface, to browser and
│                       #   filter the clips
├── video/              # Holds the videos in pairs with their xml files holding
│   │                   #   video metadata
│   ├── SZTEA101a.mov
│   ├── SZTEA101a.xml       # Meta data for file SZTEA101a.mov.
│   │                       #   Holds only minor meta on the video format itself
│   ├── ... 64 pairs later
│   ├── SZTEN203a.mov
│   └── SZTEN203a.xml
├── i-LIDS Flyer.pdf        # General purpose 1 page flyer describing the different
│                           #   i-LIDS datasets
├── index.xml               # Holds the perturbation annotations for the sequences
│                           #   in the video/folder
├── Sterile Zone.pdf        # Short pdf describing the dataset and the structure of
│                           #   the index.xml file
└── User Guide.pdf          # Guide for the licensing, distribution, web app and more
```

### Metadata extraction
Extracting the metadata in a more portable/PyTorch friendly format

#### Individual xml files inside the 'video' folder

In [2]:
xml_files = glob('../SZTE/video/*.xml')
xml_files

['../SZTE/video/SZTEN201d.xml',
 '../SZTE/video/SZTEN201e.xml',
 '../SZTE/video/SZTEN201b.xml',
 '../SZTE/video/SZTEA101a.xml',
 '../SZTE/video/SZTEA204a.xml',
 '../SZTE/video/SZTEN201c.xml',
 '../SZTE/video/SZTEN201a.xml',
 '../SZTE/video/SZTEA101b.xml',
 '../SZTE/video/SZTEN103b.xml',
 '../SZTE/video/SZTEA203a.xml',
 '../SZTE/video/SZTEN103a.xml',
 '../SZTE/video/SZTEN102d.xml',
 '../SZTE/video/SZTEA202b.xml',
 '../SZTE/video/SZTEN102a.xml',
 '../SZTE/video/SZTEN102c.xml',
 '../SZTE/video/SZTEA202a.xml',
 '../SZTE/video/SZTEN102b.xml',
 '../SZTE/video/SZTEN101c.xml',
 '../SZTE/video/SZTEA104a.xml',
 '../SZTE/video/SZTEA201a.xml',
 '../SZTE/video/SZTEN101b.xml',
 '../SZTE/video/SZTEA201b.xml',
 '../SZTE/video/SZTEN101a.xml',
 '../SZTE/video/SZTEN101d.xml',
 '../SZTE/video/SZTEA105a.xml',
 '../SZTE/video/SZTEN202a.xml',
 '../SZTE/video/SZTEA102b.xml',
 '../SZTE/video/SZTEN202b.xml',
 '../SZTE/video/SZTEA102a.xml',
 '../SZTE/video/SZTEN202c.xml',
 '../SZTE/video/SZTEA103a.xml',
 '../SZT

In [3]:
def get_file_encoding(file_path: Path, detector: UniversalDetector = UniversalDetector()) -> str:
    detector.reset()

    for line in open(file_path, "rb"):
        detector.feed(line)
        if detector.done:
            return detector.close().get("encoding")

    return "utf-8"


def read_xml(file_path: Path) -> OrderedDict:
    # by default, those xml files are encoded in utf-16, therefore, we can't rely on the default
    # encoding of python 'open' function
    with open(file_path, "r", encoding=get_file_encoding(file_path)) as f:
        return xmltodict.parse(f.read())


# Remove the root element called GeeVSClip
parsed_xml = [read_xml(Path(f))['GeeVSClip'] for f in xml_files]
parsed_xml[0]


{'DatabaseID': '0',
 'UniqueID': '00000000-0000-0000-0000-000000000000',
 'FolderID': '34',
 'VideoPath': 'E:\\video\\SZTEN201d.mov',
 'VideoLength': '45000',
 'VideoTrack': '0',
 'KeyPath': None,
 'KeyLength': '0',
 'KeyTrack': '0',
 'A1Path': None,
 'A1Length': '0',
 'A1Slip': '0',
 'A1Track': '0',
 'A2Path': None,
 'A2Length': '0',
 'A2Slip': '0',
 'A2Track': '0',
 'A3Path': None,
 'A3Length': '0',
 'A3Slip': '0',
 'A3Track': '0',
 'A4Path': None,
 'A4Length': '0',
 'A4Slip': '0',
 'A4Track': '0',
 'VBIPath': None,
 'VBILength': '0',
 'VBITrack': '0',
 'Name': 'SZTEN201d',
 'Description': 'New Clip',
 'Category': None,
 'Agency': None,
 'UsageAvailable': '1',
 'Usage': '1',
 'MiniPicture': '0',
 'CueGPI': '0',
 'GPIBehavior': '0',
 'SourceIn': '00:00:00:00',
 'SourceTape': None,
 'SourceMisc': None,
 'Parent': '-1',
 'Owner': '-1',
 'Permissions': '255',
 'Captured': '2008-11-12T14:19:40',
 'Modified': '2005-12-15T14:38:02',
 'Expires': '2008-12-12T14:19:40',
 'TrimIn': '0',
 'TrimO

In [4]:
df_initial_meta_videos = pd.json_normalize(parsed_xml)  #to flatten the keys as a xml requires a unique root element
df_initial_meta_videos

Unnamed: 0,DatabaseID,UniqueID,FolderID,VideoPath,VideoLength,VideoTrack,KeyPath,KeyLength,KeyTrack,A1Path,...,VideoStandard,Compression,HomeServer,LastPlayed,ChannelMask,CapturedBy,ModifiedBy,AudioType,PlayCount,GOPLength
0,0,00000000-0000-0000-0000-000000000000,34,E:\video\SZTEN201d.mov,45000,0,,0,0,,...,Pal,MJPEG VFW,VSERVER,1899-12-30T00:00:00,65535,SYSTEM,SYSTEM,Embedded_Stereo,0,1
1,0,00000000-0000-0000-0000-000000000000,34,E:\video\SZTEN201e.mov,22500,0,,0,0,,...,Pal,MJPEG VFW,VSERVER,1899-12-30T00:00:00,65535,SYSTEM,SYSTEM,Embedded_Stereo,0,1
2,0,00000000-0000-0000-0000-000000000000,34,E:\video\SZTEN201b.mov,45000,0,,0,0,,...,Pal,MJPEG VFW,VSERVER,1899-12-30T00:00:00,65535,SYSTEM,SYSTEM,Embedded_Stereo,0,1
3,0,00000000-0000-0000-0000-000000000000,34,E:\video\SZTEA101a.mov,55789,0,,0,0,,...,Pal,MJPEG VFW,VSERVER,1899-12-30T00:00:00,65535,SYSTEM,SYSTEM,Embedded_Stereo,0,1
4,0,00000000-0000-0000-0000-000000000000,34,E:\video\SZTEA204a.mov,138313,0,,0,0,,...,Pal,MJPEG VFW,VSERVER,1899-12-30T00:00:00,65535,SYSTEM,SYSTEM,Embedded_Stereo,0,1
5,0,00000000-0000-0000-0000-000000000000,34,E:\video\SZTEN201c.mov,45000,0,,0,0,,...,Pal,MJPEG VFW,VSERVER,1899-12-30T00:00:00,65535,SYSTEM,SYSTEM,Embedded_Stereo,0,1
6,0,00000000-0000-0000-0000-000000000000,34,E:\video\SZTEN201a.mov,22500,0,,0,0,,...,Pal,MJPEG VFW,VSERVER,1899-12-30T00:00:00,65535,SYSTEM,SYSTEM,Embedded_Stereo,0,1
7,0,00000000-0000-0000-0000-000000000000,34,E:\video\SZTEA101b.mov,74656,0,,0,0,,...,Pal,MJPEG VFW,VSERVER,1899-12-30T00:00:00,65535,SYSTEM,SYSTEM,Embedded_Stereo,0,1
8,0,00000000-0000-0000-0000-000000000000,34,E:\video\SZTEN103b.mov,45000,0,,0,0,,...,Pal,MJPEG VFW,VSERVER,1899-12-30T00:00:00,65535,SYSTEM,SYSTEM,Embedded_Stereo,0,1
9,0,00000000-0000-0000-0000-000000000000,34,E:\video\SZTEA203a.mov,70765,0,,0,0,,...,Pal,MJPEG VFW,VSERVER,1899-12-30T00:00:00,65535,SYSTEM,SYSTEM,Embedded_Stereo,0,1


In [5]:
# Remove unnecessary keys and clean up a few values
# df.drop(['DatabaseID', 'FolderID', 'VideoTrack', 'KeyPath', 'KeyLength', 'A1Path', 'A1Length', 'A1Slip', 'A1Track', 'A2Path', 'A2Length', 'A2Slip', 'A2Track', 'A3Path', 'A3Length', 'A3Slip', 'A3Track', 'A4Path', 'A4Length', 'A4Slip', 'A4Track', 'VBIPath', 'VBILength', 'VBITrack', 'Description', 'Category', 'Agency', 'UsageAvailable', 'Usage', '...'])  # better select to ones to keep

df_initial_meta_videos = df_initial_meta_videos[['VideoPath', 'VideoLength', 'Name', 'TrimOut', 'Datarate', 'AspectRatio']]
df_initial_meta_videos['VideoPath'] = df_initial_meta_videos['VideoPath'].str.replace("E:\\\\video\\\\", "./video/", regex=True)
df_initial_meta_videos

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_initial_meta_videos['VideoPath'] = df_initial_meta_videos['VideoPath'].str.replace("E:\\\\video\\\\", "./video/", regex=True)


Unnamed: 0,VideoPath,VideoLength,Name,TrimOut,Datarate,AspectRatio
0,./video/SZTEN201d.mov,45000,SZTEN201d,45000,29139,4:3
1,./video/SZTEN201e.mov,22500,SZTEN201e,22500,41525,4:3
2,./video/SZTEN201b.mov,45000,SZTEN201b,45000,28847,4:3
3,./video/SZTEA101a.mov,55789,SZTEA101a,55789,54900,4:3
4,./video/SZTEA204a.mov,138313,SZTEA204a,138313,26997,4:3
5,./video/SZTEN201c.mov,45000,SZTEN201c,45000,30330,4:3
6,./video/SZTEN201a.mov,22500,SZTEN201a,22500,26659,4:3
7,./video/SZTEA101b.mov,74656,SZTEA101b,74656,34206,4:3
8,./video/SZTEN103b.mov,45000,SZTEN103b,45000,32783,4:3
9,./video/SZTEA203a.mov,70765,SZTEA203a,70765,27216,4:3


In [6]:
assert any((df_initial_meta_videos['VideoLength'] == df_initial_meta_videos['TrimOut']).tolist())

In [7]:
# also getting rid of TrimOut as it is equal to VideoLength
df_initial_meta_videos = df_initial_meta_videos.drop(columns=['TrimOut'])
df_initial_meta_videos

Unnamed: 0,VideoPath,VideoLength,Name,Datarate,AspectRatio
0,./video/SZTEN201d.mov,45000,SZTEN201d,29139,4:3
1,./video/SZTEN201e.mov,22500,SZTEN201e,41525,4:3
2,./video/SZTEN201b.mov,45000,SZTEN201b,28847,4:3
3,./video/SZTEA101a.mov,55789,SZTEA101a,54900,4:3
4,./video/SZTEA204a.mov,138313,SZTEA204a,26997,4:3
5,./video/SZTEN201c.mov,45000,SZTEN201c,30330,4:3
6,./video/SZTEN201a.mov,22500,SZTEN201a,26659,4:3
7,./video/SZTEA101b.mov,74656,SZTEA101b,34206,4:3
8,./video/SZTEN103b.mov,45000,SZTEN103b,32783,4:3
9,./video/SZTEA203a.mov,70765,SZTEA203a,27216,4:3


In [8]:
# add information from ffprobe (inspiration: https://gist.github.com/nrk/2286511)
def get_stream_info_with_ffprobe(video_path: Path) -> str:
    """Execute ffprobe on the given file and returns the unparsed json str"""
    command = ['ffprobe',
               '-v', 'quiet',  # loglevel quiet: Show nothing at all; be silent.
               '-print_format', 'json',
               '-show_format',
               '-show_streams',
               os.path.basename(video_path)
               ]

    ffprobe_process = subprocess.Popen(command, cwd=video_path.parent, stdout=subprocess.PIPE)
    ffprobe_process.wait()

    stdout, stderr = ffprobe_process.communicate()

    assert ffprobe_process.returncode == 0, f"ffprobe didn't run successfully {stderr}"

    return stdout.decode()

# Execute ffprobe on the 5 first video to produce their json output
# concatenate them inside a json array with a root element,
# and generate their pydantic models from it using 'datamodel-code-generator'
raw_json_output = [
    get_stream_info_with_ffprobe(Path("../SZTE") / path)
    for path in df_initial_meta_videos.iloc[:min(5, len(df_initial_meta_videos))]['VideoPath']
]

raw_ffprobe_json = f'{{"videos": [{",".join(raw_json_output)}]}}'
datamodel_code_generator.generate(raw_ffprobe_json, input_file_type=datamodel_code_generator.InputFileType.Json, output=Path(".") / "ffprobe_models.py", target_python_version=datamodel_code_generator.PythonVersion.PY_310, class_name="VideosRoot")

In [9]:
from ffprobe_models import Video

def dict_and_flatten_video_streams(video: Video) -> Dict[str, Any]:
    dic: Dict[str, Any] = video.dict()
    streams = dic.pop("streams")

    assert len(streams) == 1, "Expected a unique stream in the given video"

    dic["stream"] = streams[0]

    return dic

relative_video_paths: List[Path] = df_initial_meta_videos['VideoPath'].map(lambda partial_video_path: Path("../SZTE") / partial_video_path).tolist()
ffprobe_video_properties = [dict_and_flatten_video_streams(Video.parse_raw(get_stream_info_with_ffprobe(path))) for path in relative_video_paths]
ffprobe_video_properties[:2]

[{'format': {'filename': 'SZTEN201d.mov',
   'nb_streams': 1,
   'nb_programs': 0,
   'format_name': 'mov,mp4,m4a,3gp,3g2,mj2',
   'format_long_name': 'QuickTime / MOV',
   'start_time': '0.000000',
   'duration': '1800.000000',
   'size': '6279587110',
   'bit_rate': '27909276',
   'probe_score': 100,
   'tags': {'creation_time': '2005-12-15T14:02:12.000000Z',
    'premiere_version': 'Created with Adobe Premiere 6.0'}},
  'stream': {'index': 0,
   'codec_name': 'mjpeg',
   'codec_long_name': 'Motion JPEG',
   'profile': 'Baseline',
   'codec_type': 'video',
   'codec_tag_string': 'mjpa',
   'codec_tag': '0x61706a6d',
   'width': 720,
   'height': 576,
   'coded_width': 720,
   'coded_height': 576,
   'closed_captions': 0,
   'film_grain': 0,
   'has_b_frames': 0,
   'sample_aspect_ratio': '15:16',
   'display_aspect_ratio': '75:64',
   'pix_fmt': 'yuvj422p',
   'level': -99,
   'color_range': 'pc',
   'color_space': 'bt470bg',
   'chroma_location': 'center',
   'field_order': 'tt',
  

In [10]:
df_ffprobe = pd.json_normalize(ffprobe_video_properties)
# Add a column to *join* the 2 DataFrames
df_ffprobe['Name'] = df_ffprobe['format.filename'].str.removesuffix(".mov")
df_ffprobe

Unnamed: 0,format.filename,format.nb_streams,format.nb_programs,format.format_name,format.format_long_name,format.start_time,format.duration,format.size,format.bit_rate,format.probe_score,...,stream.disposition.descriptions,stream.disposition.metadata,stream.disposition.dependent,stream.disposition.still_image,stream.tags.creation_time,stream.tags.language,stream.tags.handler_name,stream.tags.vendor_id,stream.tags.encoder,Name
0,SZTEN201d.mov,1,0,"mov,mp4,m4a,3gp,3g2,mj2",QuickTime / MOV,0.0,1800.0,6279587110,27909276,100,...,0,0,0,0,2005-12-15T14:02:12.000000Z,eng,Apple Video Media Handler,appl,Motion JPEG A,SZTEN201d
1,SZTEN201e.mov,1,0,"mov,mp4,m4a,3gp,3g2,mj2",QuickTime / MOV,0.0,900.0,4790607670,42583179,100,...,0,0,0,0,2006-05-23T14:01:33.000000Z,eng,Apple Video Media Handler,appl,Motion JPEG A,SZTEN201e
2,SZTEN201b.mov,1,0,"mov,mp4,m4a,3gp,3g2,mj2",QuickTime / MOV,0.0,1800.0,6632899078,29479551,100,...,0,0,0,0,2005-12-09T12:59:42.000000Z,eng,Apple Video Media Handler,appl,Motion JPEG A,SZTEN201b
3,SZTEA101a.mov,1,0,"mov,mp4,m4a,3gp,3g2,mj2",QuickTime / MOV,0.0,2231.56,14605606929,52360167,100,...,0,0,0,0,2005-11-08T01:11:19.000000Z,eng,Apple Video Media Handler,appl,Motion JPEG A,SZTEA101a
4,SZTEA204a.mov,1,0,"mov,mp4,m4a,3gp,3g2,mj2",QuickTime / MOV,0.0,5532.52,19532330934,28243666,100,...,0,0,0,0,2005-11-04T02:57:56.000000Z,eng,Apple Video Media Handler,appl,Motion JPEG A,SZTEA204a
5,SZTEN201c.mov,1,0,"mov,mp4,m4a,3gp,3g2,mj2",QuickTime / MOV,0.0,1800.0,6297246998,27987764,100,...,0,0,0,0,2005-12-13T12:22:17.000000Z,eng,Apple Video Media Handler,appl,Motion JPEG A,SZTEN201c
6,SZTEN201a.mov,1,0,"mov,mp4,m4a,3gp,3g2,mj2",QuickTime / MOV,0.0,900.0,3007251014,26731120,100,...,0,0,0,0,2005-12-05T09:26:26.000000Z,eng,Apple Video Media Handler,appl,Motion JPEG A,SZTEN201a
7,SZTEA101b.mov,1,0,"mov,mp4,m4a,3gp,3g2,mj2",QuickTime / MOV,0.0,2986.24,15163913029,40623427,100,...,0,0,0,0,2005-11-07T23:35:34.000000Z,eng,Apple Video Media Handler,appl,Motion JPEG A,SZTEA101b
8,SZTEN103b.mov,1,0,"mov,mp4,m4a,3gp,3g2,mj2",QuickTime / MOV,0.0,1800.0,7524978582,33444349,100,...,0,0,0,0,2006-06-06T11:44:29.000000Z,eng,Apple Video Media Handler,appl,Motion JPEG A,SZTEN103b
9,SZTEA203a.mov,1,0,"mov,mp4,m4a,3gp,3g2,mj2",QuickTime / MOV,0.0,2830.6,10185628902,28787193,100,...,0,0,0,0,2005-11-02T04:41:45.000000Z,eng,Apple Video Media Handler,appl,Motion JPEG A,SZTEA203a


In [11]:
# Join both Data Frame
df_videos = df_initial_meta_videos.set_index("Name").join(df_ffprobe.set_index("Name"))

Inference and description of the columns:

In [12]:
videos_columns_descriptions = """\
VideoPath:      POSIX path from the root of the SZTE folder
                [example: './video/SZTEA101a.mov']
VideoLength:    Frame count in the clip, for example, './video/SZTEA101a.mov' has
                a frame rate of approx. 25 and a duration of 37'12":
                (minutes * 60 + seconds) * frame rate = (37 * 60 + 12) * 25 = 55'800
                (difference is due to the fact that the last second is not complete)
                [example: 55789]
Name:           Name of the video file without extension
                [example: 'SZTEA101a']
Datarate:       _Not certain to get what it expresses_
                [example: 54900]
AspectRatio:    Ratio between width and height expressed in a string seperated by a
                colon like this 'W:H'
                [example: '4:3']
format.X.Y.Z:   Fields extracted using ffprobe CLI
stream.X.Y.Z:   Fields extracted using ffprobe CLI concerning the single video stream
                of the video\
"""

with open(Path("../SZTE/videos_description.txt"), "w") as f:
    f.write(videos_columns_descriptions)

from IPython.display import display, Markdown

display(Markdown(f"```{os.linesep}{videos_columns_descriptions}{os.linesep}```"))

```
VideoPath:      POSIX path from the root of the SZTE folder
                [example: './video/SZTEA101a.mov']
VideoLength:    Frame count in the clip, for example, './video/SZTEA101a.mov' has
                a frame rate of approx. 25 and a duration of 37'12":
                (minutes * 60 + seconds) * frame rate = (37 * 60 + 12) * 25 = 55'800
                (difference is due to the fact that the last second is not complete)
                [example: 55789]
Name:           Name of the video file without extension
                [example: 'SZTEA101a']
Datarate:       _Not certain to get what it expresses_
                [example: 54900]
AspectRatio:    Ratio between width and height expressed in a string seperated by a
                colon like this 'W:H'
                [example: '4:3']
format.X.Y.Z:   Fields extracted using ffprobe CLI
stream.X.Y.Z:   Fields extracted using ffprobe CLI concerning the single video stream
                of the video
```

In [13]:
# save as a more portable file (csv) in the root folder of the dataset
df_videos.to_csv('../SZTE/videos.csv')

#### Parse root index.xml

In [14]:
def deep_get(dictionary: Union[Dict, OrderedDict], keys: str) -> Any:
    """Inspiration https://stackoverflow.com/a/46890853/3771148"""
    return reduce(lambda d, key: d[key], keys.split("."), dictionary)


In [15]:
index_xml = read_xml(Path('../SZTE/index.xml'))

# For easier navigation/inspection
# with open("../SZTE/index.json", "w") as json_fd:
#     dump(index_xml, json_fd, indent=4, sort_keys=True)

# Extract only part of the produced structure (IlidsLibraryIndex.Library)
tmp_clip_json_path = Path("../SZTE/index-library.tmp.json")
ilids_library_xml = deep_get(index_xml, "IlidsLibraryIndex.Library")

with open(tmp_clip_json_path, "w") as json_fd:
    dump(ilids_library_xml, json_fd, indent=4, sort_keys=True)


# Generate pydantic models from clip json using 'datamodel-code-generator' package
datamodel_code_generator.generate(tmp_clip_json_path, input_file_type=datamodel_code_generator.InputFileType.Json, output=Path(".") / "szte_ilids_models.py", target_python_version=datamodel_code_generator.PythonVersion.PY_310, class_name="IlidsLibrary")

tmp_clip_json_path.unlink()

In [16]:
# Let's fix some type conversion from str to int or time
from szte_ilids_models import IlidsLibrary as IlidsLibraryBase, ClipItem as ClipItemBase, Alarms as AlarmsBase, AlarmItem as AlarmItemBase, Weather as WeatherBase

class AlarmItem(AlarmItemBase):
    AlarmDuration: time
    Distance: int
    StartTime: time

class Alarms(AlarmsBase):
    Alarm: List[AlarmItem]

class Weather(WeatherBase):
    Clouds: Optional[str] = None
    Fog: bool
    Rain: bool
    Snow: bool
    TimeOfDay: str

class ClipItem(ClipItemBase):
    AlarmEvents: int
    Alarms: Optional[Alarms] = None
    Duration: time
    Stage: int
    Weather: Weather

    @validator("filename", pre=True)
    def parse_filename(cls, value: str) -> str:
        """In the initial dataset, the videos are stored in .mov files and .qtl are present as a
        duplicate that "symlink" to the .mov file.
        In the index.xml file, it references those .qtl file.
        Therefore, replace the .qtl extension to the .mov"""
        assert value.endswith(".qtl")
        return value.replace(".qtl", ".mov")


class IlidsLibrary(IlidsLibraryBase):
    clip: List[ClipItem]


In [17]:
ilids_library = IlidsLibrary.parse_obj(ilids_library_xml)
print(dict(scenario=ilids_library.scenario, version=ilids_library.libversion, dataset=ilids_library.dataset))
with open(Path("../SZTE/meta.txt"), "w") as f:
    f.write(os.linesep.join([
        "i-LIDS",
        f"Scenario: {ilids_library.scenario}",
        f"Dataset: {ilids_library.dataset}",
        f"Version: {ilids_library.libversion}"
    ]))


clips_with_alarm_events = [(clip.AlarmEvents, clip.filename, clip.Duration) for clip in ilids_library.clip if clip.AlarmEvents > 0]
sorted(clips_with_alarm_events)

{'scenario': 'Sterile Zone', 'version': '1.0', 'dataset': 'Test'}


[(10, 'SZTEA101a.mov', datetime.time(0, 37, 11)),
 (10, 'SZTEA105a.mov', datetime.time(0, 35, 28)),
 (10, 'SZTEA201a.mov', datetime.time(0, 37, 11)),
 (13, 'SZTEA102a.mov', datetime.time(0, 36, 39)),
 (13, 'SZTEA202a.mov', datetime.time(0, 36, 37)),
 (15, 'SZTEA101b.mov', datetime.time(0, 49, 46)),
 (15, 'SZTEA201b.mov', datetime.time(0, 49, 46)),
 (17, 'SZTEA102b.mov', datetime.time(0, 45, 56)),
 (17, 'SZTEA103a.mov', datetime.time(0, 47, 14)),
 (17, 'SZTEA202b.mov', datetime.time(0, 45, 38)),
 (17, 'SZTEA203a.mov', datetime.time(0, 47, 10)),
 (31, 'SZTEA104a.mov', datetime.time(1, 32, 18)),
 (31, 'SZTEA204a.mov', datetime.time(1, 32, 12))]

##### Extract 'alarms' and 'distractions' in different files

**Alarms**: Produce a csv, with filename --> Alarms (a filename can appear multiple times, but each entry is an alarm associated to a file)

In [18]:
# Without alarms nor distractions
clips_information = [clip.dict(exclude={"Alarms", "Distractions"}) for clip in ilids_library.clip]
clips_information[:2]

[{'AlarmEvents': 10,
  'Duration': datetime.time(0, 37, 11),
  'Stage': 1,
  'Weather': {'Clouds': 'None',
   'Fog': False,
   'Rain': False,
   'Snow': False,
   'TimeOfDay': 'Dawn'},
  'filename': 'SZTEA101a.mov'},
 {'AlarmEvents': 15,
  'Duration': datetime.time(0, 49, 46),
  'Stage': 1,
  'Weather': {'Clouds': 'Overcast',
   'Fog': False,
   'Rain': False,
   'Snow': False,
   'TimeOfDay': 'Dusk'},
  'filename': 'SZTEA101b.mov'}]

In [19]:
clips_information_df = pd.json_normalize(clips_information).set_index("filename")
clips_information_df.to_csv('../SZTE/clips_information.csv')

In [20]:
# **Absence defines no alarm for that file/sequence**
alarms_per_filename = [dict(filename=clip.filename, **alarm.dict()) for clip in ilids_library.clip if clip.AlarmEvents > 0 for alarm in clip.Alarms.Alarm]
alarms_per_filename[:2]

[{'filename': 'SZTEA101a.mov',
  'AlarmDescription': 'Fence Attack',
  'AlarmDuration': datetime.time(0, 1),
  'Distance': 30,
  'StartTime': datetime.time(0, 5, 37),
  'SubjectApproachType': 'Crouch Walk',
  'SubjectDescription': 'One Person',
  'SubjectOrientation': 'Perpendicular'},
 {'filename': 'SZTEA101a.mov',
  'AlarmDescription': 'Fence Attack',
  'AlarmDuration': datetime.time(0, 1, 8),
  'Distance': 15,
  'StartTime': datetime.time(0, 8, 58),
  'SubjectApproachType': 'Crawl',
  'SubjectDescription': 'One Person',
  'SubjectOrientation': 'Perpendicular'}]

In [21]:
alarms_per_filename_df = pd.json_normalize(alarms_per_filename).set_index("filename")
alarms_per_filename_df.to_csv('../SZTE/alarms.csv')

**Distractions**: Produce a csv, with filename 1 -- 1 Distractions (distractions are comma seperated)

In [22]:
distractions_per_filename = [dict(filename=clip.filename, distractions=(",".join(clip.Distractions.Distraction)) if isinstance(clip.Distractions.Distraction, list) else clip.Distractions.Distraction) for clip in ilids_library.clip if clip.Distractions]
distractions_per_filename[:2]


[{'filename': 'SZTEA101a.mov',
  'distractions': 'Camera switch from monochrome to colour'},
 {'filename': 'SZTEA101b.mov',
  'distractions': 'Camera switch from colour to monochrome,Bats'}]

In [23]:
distractions_per_filename_df = pd.json_normalize(distractions_per_filename).set_index("filename")
distractions_per_filename_df.to_csv('../SZTE/distractions.csv')


---
## SZTR

In opposition with the **SZTE** folder, it has a different structure for its annotations.
Its subfolder has the structure as following:

```
SZTE/
├── calibration/        # Don't care, old .tif files
├── index-files/        # Html etc. files for their web interface, to browser and
│                       #   filter the clips
├── video/              # Holds the videos in pairs with their xml files holding
│   │                   #   video metadata
│   ├── SZTRA101a01.mov
│   ├── ... 235 videos later
│   └── SZTRN203a.mov
├── i-LIDS Flyer.pdf        # General purpose 1 page flyer describing the different
│                           #   i-LIDS datasets
├── index.xml               # Holds the perturbation annotations for the sequences
│                           #   in the video/folder
├── Sterile Zone.pdf        # Short pdf describing the dataset and the structure of
│                           #   the index.xml file
├── SZTR.mdb                # Database holding 4 tables: CLIPDATA, CLIPS, DATASTRUCTURE, LIBRARIES
└── User Guide.pdf          # Guide for the licensing, distribution, web app and more
```


### Metadata extraction


#### Extract SZTR.mdb database data in CSV
Using the [mdbtools CLIs](https://github.com/mdbtools/mdbtools/), extract the tables of the database
into CSV files.


In [24]:
!mdb-tables --single-column ../SZTR/SZTR.mdb | xargs -I{} sh -c 'mdb-export ../SZTR/SZTR.mdb {} > ../SZTR/$(echo {} | tr "[:upper:]" "[:lower:]").csv'


In [25]:
df_clipdata = pd.read_csv(Path('../SZTR/clipdata.csv'))
df_clips = pd.read_csv(Path('../SZTR/clips.csv'))
df_datastructure = pd.read_csv(Path('../SZTR/datastructure.csv'))
df_libraries = pd.read_csv(Path('../SZTR/libraries.csv'))


In [26]:
df_clipdata.head()


Unnamed: 0,ClipdataID,ClipID,ElemID,value,ParentCID
0,2,1,25,,0
1,4,1,24,,0
2,12,1,29,,4
3,20,2,25,,0
4,22,2,24,,0


In [27]:
df_clips.head()


Unnamed: 0,ClipID,VitalfileID,LibID,stage,filename
0,1,SZTRA101a01,5,1,SZTRA101a01.qtl
1,2,SZTRA201a01,5,2,SZTRA201a01.qtl
2,3,SZTRA101a02,5,1,SZTRA101a02.qtl
3,4,SZTRA201a02,5,2,SZTRA201a02.qtl
4,5,SZTRA101a03,5,1,SZTRA101a03.qtl


In [28]:
df_datastructure.head()


Unnamed: 0,ElemID,Name,Sort,Description,Parent
0,40,Clouds,2,None|Some|Overcast,25
1,55,Distraction,1,Description of False Alarm,56
2,56,Distractions,4,Number of distractions in this clip,0
3,57,AlarmDescription,2,All Fence Attacks,29
4,29,Alarm,1,*Parent to properties of a single alarm event,24


In [29]:
df_libraries.head()


Unnamed: 0,LibID,scenario,dataset,version
0,5,Sterile Zone,Training,1.0


#### Prse root index.xml

In [30]:
index_xml = read_xml(Path('../SZTR/index.xml'))

# For easier navigation/inspection
# with open("../SZTR/index.json", "w") as json_fd:
#     dump(index_xml, json_fd, indent=4, sort_keys=True)

# Extract only part of the produced structure (IlidsLibraryIndex.Library)
tmp_clip_json_path = Path("../SZTR/index-library.tmp.json")
ilids_library_xml = deep_get(index_xml, "IlidsLibraryIndex.Library")

with open(tmp_clip_json_path, "w") as json_fd:
    dump(ilids_library_xml, json_fd, indent=4, sort_keys=True)


# Generate pydantic models from clip json using 'datamodel-code-generator' package
datamodel_code_generator.generate(tmp_clip_json_path, input_file_type=datamodel_code_generator.InputFileType.Json, output=Path(".") / "sztr_ilids_models.py", target_python_version=datamodel_code_generator.PythonVersion.PY_310, class_name="IlidsLibrary")

tmp_clip_json_path.unlink()

In [31]:
# Let's fix some type conversion from str to int or time
from sztr_ilids_models import IlidsLibrary as IlidsLibraryBase, ClipItem as ClipItemBase, Alarms as AlarmsBase, Alarm as AlarmBase, Weather as WeatherBase
#
class Alarm(AlarmBase):
    AlarmDuration: time
    Distance: int
    StartTime: time

class Alarms(AlarmsBase):
    Alarm: Alarm

class Weather(WeatherBase):
    Fog: bool
    Rain: Optional[bool] = None
    Snow: bool
    TimeOfDay: str
    Clouds: Optional[str] = None

class ClipItem(ClipItemBase):
    AlarmEvents: int
    Alarms: Optional[Alarms] = None
    Duration: time
    Stage: int
    Weather: Weather

    @validator("filename", pre=True)
    def parse_filename(cls, value: str) -> str:
        """In the initial dataset, the videos are stored in .mov files and .qtl are present as a
        duplicate that "symlink" to the .mov file.
        In the index.xml file, it references those .qtl file.
        Therefore, replace the .qtl extension to the .mov"""
        assert value.endswith(".qtl")
        return value.replace(".qtl", ".mov")


class IlidsLibrary(IlidsLibraryBase):
    clip: List[ClipItem]

In [32]:

# Extract only part of the produced structure (IlidsLibraryIndex.Library)
ilids_library_xml = deep_get(index_xml, "IlidsLibraryIndex.Library")
ilids_library = IlidsLibrary.parse_obj(ilids_library_xml)

print(dict(scenario=ilids_library.scenario, version=ilids_library.libversion, dataset=ilids_library.dataset))
with open(Path("../SZTR/meta.txt"), "w") as f:
    f.write(os.linesep.join([
        "i-LIDS",
        f"Scenario: {ilids_library.scenario}",
        f"Dataset: {ilids_library.dataset}",
        f"Version: {ilids_library.libversion}"
    ]))

clips_with_alarm_events = [(clip.AlarmEvents, clip.filename, clip.Duration) for clip in ilids_library.clip if clip.AlarmEvents > 0]

{'scenario': 'Sterile Zone', 'version': '1.0', 'dataset': 'Training'}


##### Extract 'alarms' and 'distractions' in different files


** Alarms **: Produce a csv, with filename --> Alarms (a filename can appear multiple times, but each entry is an alarm associated to a file)

In [33]:
# Without alarms nor distractions
clips_information = [clip.dict(exclude={"AlarmEvents", "Alarms", "Distractions"}) for clip in ilids_library.clip]
clips_information_df = pd.json_normalize(clips_information).set_index("filename")
clips_information_df.to_csv('../SZTR/clips_information.csv')

# **Absence defines no alarm for that file/sequence**
alarms_per_filename = [dict(filename=clip.filename, **clip.Alarms.Alarm.dict()) for clip in ilids_library.clip if clip.AlarmEvents > 0]

alarms_per_filename_df = pd.json_normalize(alarms_per_filename).set_index("filename")
alarms_per_filename_df.to_csv('../SZTR/alarms.csv')

** Distractions **: Produce a csv, with filename 1 -- 1 Distractions (distractions are comma seperated)

In [35]:
distractions_per_filename = [dict(filename=clip.filename, distractions=(",".join(clip.Distractions.Distraction)) if isinstance(clip.Distractions.Distraction, list) else clip.Distractions.Distraction) for clip in ilids_library.clip if clip.Distractions]

distractions_per_filename_df = pd.json_normalize(distractions_per_filename).set_index("filename")
distractions_per_filename_df.to_csv('../SZTR/distractions.csv')
