In [1]:
from os import listdir, getcwd, makedirs
from os.path import join, isfile, exists
from typing import Sequence

import cv2
import pandas as pd
from scipy.io import loadmat

from tools import smd_paths
from tools.base_paths import get_root_dir
from tools.smd_paths import get_smd_main_dir

Purpose of this file is to go deeper into the details of the frame generation for the Singapore dataset..

### Why?;
- MVI_0790 originally generated 600 frames, while previous work claims it to have 1010 frames
    - So in generating the xml files, we get way fewer annotations matched to frames.
    - This must be investigated.

- We observe that the MVI_0970 in objects_onboard.txt as generated from the author of (https://github.com/tilemmpon/Singapore-Maritime-Dataset-Frames-Ground-Truth-Generation-and-Statistics)
 only utilizes the non-empty annotated frames from the GT file, not all 1010.

- In our case, it seems either we must match each of the 600 frames to an non-empty annotation in the GT file
 or we must simply generate frames at a higher frame rate.




We thus find it of interest to generate all the frames from MVI_0790 and double-check the total amount.
- Thereafter, we compare with the total amount of lines in the objectGT mat file.


In [2]:
# Define directories
repo_dir = get_root_dir()
smd_dir = get_smd_main_dir()
onboard_annotations = smd_paths.vis_onboard_all_annotations

Load test video MVI_0790_VIS_OB

In [3]:
# Load test video
video_name_filter = 'MVI_0790_VIS_OB'
videos_path_onboard = join(repo_dir, "SMD_Data", "VIS_Onboard", "Videos")
object_annotations_onboard_path = join(repo_dir, "SMD_Data", "VIS_Onboard", "ObjectGT")

test_video = join(videos_path_onboard, video_name_filter + '.avi')
test_gt_file = join(object_annotations_onboard_path, video_name_filter + '_ObjectGT.mat')

# Define test folder
test_folder = join(getcwd(), 'MVI_0790_VIS_OB')
makedirs(test_folder, exist_ok=True)

Count the length of captured frames of test video MVI_0790_VIS_OB

In [5]:
def count_video_frames_manually(video_path: str) -> int:
    vidcap = cv2.VideoCapture(video_path)
    length = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
    vidcap.release()
    return length

test_frames = count_video_frames_manually(test_video)
print('Framecap length of test folder is: {}'.format(test_frames))



Framecap length of test folder is: 600


Let us compare this count with the mat ground-truth file total lines and "non-empty annotation" lines.
That is, annotation lines which the presence of an object.

- Each line should correspond to one frame, where the line might include single or multiple-object annotations.

In [6]:
def load_mat_file(file_path: str):
    mat_gt = loadmat(file_path)
    all_gts = len(mat_gt['structXML'][0])
    object_matrix = mat_gt['structXML'][0]['Object']
    object_occurrences = len([object_array for object_array in object_matrix if object_array.size > 0])
    return all_gts, object_occurrences

ground_truths, ground_truths_with_objs = load_mat_file(test_gt_file)

print('Frame count for {} is: {}'.format(video_name_filter, test_frames))
print('All GT lines in mat file: {}'.format(ground_truths))
print('GT lines with an object annotated: {}'.format(ground_truths_with_objs))

Frame count for MVI_0790_VIS_OB is: 600
All GT lines in mat file: 1010
GT lines with an object annotated: 597


As we can see, the MVI_0790_VIS_OB has 600 frames from video capture generation.

- There are 1010 annotated frames in the ground truth.
- There are 597 annotated frames where an object actually occures.

- How can we match the generated 600 frames with the 1010 annotation lines? Something is obviously fishy.

### Let us summarize the same statistics for all the videos and assess gt count discrepancies.
This might just be the case for the VIS_0970 video.


Define onboard GT paths ( hardcoded )

In [10]:
# Hardcode folders with ground-truth as this is known
onboard_file_names = ['MVI_0790_VIS_OB', 'MVI_0797_VIS_OB', 'MVI_0799_VIS_OB', 'MVI_0801_VIS_OB']

onboard_video_paths = {file_name: join(videos_path_onboard, file_name + '.avi')
                       for file_name in onboard_file_names}

onboard_gt_paths = {file_name: join(object_annotations_onboard_path, file_name + '_ObjectGT.mat')
                    for file_name in onboard_file_names}

# Do some quality checks

assert all(isfile(onboard_video_paths.get(file_name)) for file_name in onboard_file_names), \
    'The onboard video paths do not exist'
assert all(isfile(onboard_gt_paths.get(file_name)) for file_name in onboard_file_names), \
    'The onboard video GT paths do not exist'

print('Length of onboard videos: {}'.format(len(onboard_video_paths)))
print('Length of onboard GT files: {}'.format(len(onboard_gt_paths)))


Length of onboard videos: 4
Length of onboard GT files: 4


Create helper function and summarize statistics for onboard
Summarize the following
- Total generated frames: 'Frame_count
- Total GT lines(rows) in GT mat files: 'GT_all_mat_lines'
- Amount of GT lines which are not emptily annotated: 'GT_mat_lines_not_empty'
- Difference total generated frames and total GT lines: 'Diff_Frames_all_lines'


In [11]:

def generate_frame_and_gt_statistics(file_names, video_path_dict, gt_path_dict):
    # List to save all pd.series generated for each file name
    series_list = []
    for file_name in file_names:
        video_cap_length = count_video_frames_manually(video_path_dict[file_name])
        gt_all_lines, gt_object_count = load_mat_file(gt_path_dict[file_name])
        diff_video_length_gt_all_lines = video_cap_length - gt_all_lines
        series = pd.Series({'Filename': file_name,
                            'Frame_count': video_cap_length,
                            'GT_all_mat_lines': gt_all_lines,
                            'GT_mat_lines_not_empty': gt_object_count,
                            'Diff_Frames_all_lines': diff_video_length_gt_all_lines})
        series_list.append(series)

    stats_df = pd.concat(series_list, axis=1).T
    return stats_df


generate_frame_and_gt_statistics(onboard_file_names, onboard_video_paths, onboard_gt_paths)

Unnamed: 0,Filename,Frame_count,GT_all_mat_lines,GT_mat_lines_not_empty,Diff_Frames_all_lines
0,MVI_0790_VIS_OB,600,1010,597,-410
1,MVI_0797_VIS_OB,600,600,600,0
2,MVI_0799_VIS_OB,600,601,481,-1
3,MVI_0801_VIS_OB,600,600,600,0


The main problem is when the frame_count < GT_all_mat_lines. That means we have generated
fewer frames than existing annotations.  As we see, this is mainly the case for MVI_0790_VIS_OB.

Let us do the same check for onshore videos.

In [12]:
# Redefine onshore paths
videos_path_onshore = join(repo_dir, "SMD_Data", "VIS_Onshore", "Videos")
object_annotations_onshore_path = join(repo_dir, "SMD_Data", "VIS_Onshore", "ObjectGT")

onshore_gt_file_names = listdir(object_annotations_onshore_path)
# Get the file names which are used for filtering the videos for generating frames
onshore_file_names = [file_name.split('_ObjectGT')[0] for file_name in onshore_gt_file_names]

onshore_gt_paths = {file_name: join(object_annotations_onshore_path, file_name + '_ObjectGT.mat')
                    for file_name in onshore_file_names}

onshore_video_paths = {file_name: join(videos_path_onshore, file_name + '.avi')
                       for file_name in onshore_file_names}

assert all(isfile(onshore_video_paths.get(file_name)) for file_name in onshore_file_names), \
    'The onboard video paths do not exist'
assert all(isfile(onshore_gt_paths.get(file_name)) for file_name in onshore_file_names), \
    'The onboard video GT paths do not exist'

print('Length of onshore videos: {}'.format(len(onshore_video_paths)))
print('Length of onshore GT files: {}'.format(len(onshore_gt_paths)))

#onshore_video_paths

Length of onshore videos: 36
Length of onshore GT files: 36


Onshore statistics

In [13]:
generate_frame_and_gt_statistics(onshore_file_names, onshore_video_paths, onshore_gt_paths)

Unnamed: 0,Filename,Frame_count,GT_all_mat_lines,GT_mat_lines_not_empty,Diff_Frames_all_lines
0,MVI_1448_VIS_Haze,604,604,604,0
1,MVI_1451_VIS_Haze,439,439,439,0
2,MVI_1452_VIS_Haze,340,340,340,0
3,MVI_1469_VIS,600,600,600,0
4,MVI_1474_VIS,445,445,445,0
5,MVI_1478_VIS,477,477,477,0
6,MVI_1479_VIS,206,206,206,0
7,MVI_1481_VIS,409,409,409,0
8,MVI_1482_VIS,454,454,454,0
9,MVI_1484_VIS,687,687,687,0


### Observations

- There are only three videos with difference in their generated frames and total existing GT mat annotation lines.
- Onboard: MVI_0790_VIS_OB (-410), MVI_0799_VIS_OB ( -1)
- Onshore: MVI_1584_VIS (-11)

So, this issue is mainly present for MVI_0790_VIS_OB as the other ones are mostly similar to their frame generation.
How can it be the case?

### Error hypothesis 1
- The 600 frames generated from MVI_0790_VIS_OB are possible generated with wrong frame-rate.
- Can we set the fps of cv2.videocapture to generate the same amount of frames as the total
annotated frames from the mat file?

However, this is counter intuitive as all the other videos in onboard are 20 seconds
and for 600 generated frames they seem to match well with the annotations.

Why would one video need such a much higher fps when the other ones are recorded with the same camera ... ?



In [14]:
vidcap = cv2.VideoCapture(test_video)
fps = vidcap.get(cv2.CAP_PROP_FPS)
vidcap.release()
fps

30.0

We can see that the fps of 30, with video length of 20s for MVI_0790_VIS_OB yields our generated 600 frames.



### Error hypothesis 2
We need to ignore the empty annotated lines from the GT of MVI_0790_VIS_OB.

- However, such a solution would require us to match 600 frames with the 597 ground-truths where an object actually occurres.
- This would be presumptuous to match properly if we strip away the empty GT lines.



