In [1]:
from pathlib import Path

import pandas as pd

from massimal.batch_process import underwater_image_correction
from massimal.video_transect import (
    extract_images_from_video,
    filter_gdf_on_distance,
    get_video_data,
    otter_csv_to_geodataframe,
    prepare_gdf_with_video_data,
)

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
# Parameters
dataset_root_dir = Path("/media/mha114/Massimal2/Massimal/Larvik_Terneholmen/Otter/20230829")
video_root_dir = dataset_root_dir / "3_GoPro/Video"

video_dirs = sorted(video_root_dir.glob("T*"))
dataset_base_name = "massimal_larvik_terneholmen_20230829_usv"

sample_distance = 2  # meters

In [5]:
# for video_dir in video_dirs[0:1]:
for video_dir in video_dirs[1:]:
    print(f"Processing data in {video_dir}")

    try:
        # Find CSV file
        csv_file = next(video_dir.glob("*.csv"))

        # Define input and output paths
        geotagged_dir = dataset_root_dir / "8_GeotaggedImages" / video_dir.name
        geotagged_dir.mkdir(parents=True, exist_ok=True)
        image_dir = geotagged_dir / "images"
        image_dir.mkdir(parents=True, exist_ok=True)

        dataset_name = f"{dataset_base_name}_{video_dir.name}_geotagged_images"
        gpkg_file = geotagged_dir / f"{dataset_name}.gpkg"

        video_files = video_dir.glob("*.[Mm][Pp]4")
        print(f"Found video files {[vfile.name for vfile in video_files]}")
        video_data = get_video_data(
            str(video_dir), tz="UTC", video_time_offset=pd.Timedelta(hours=0)
        )

        gdf = otter_csv_to_geodataframe(csv_file)
        gdf = filter_gdf_on_distance(gdf, sample_distance)
        gdf = prepare_gdf_with_video_data(gdf, video_data)

        gdf = extract_images_from_video(gdf, str(image_dir))
        gdf.to_file(gpkg_file, driver="GPKG")

    except Exception as e:
        print(f"Failed to process data in {video_dir}: {e}")


Processing data in /media/mha114/Massimal2/Massimal/Larvik_Terneholmen/Otter/20230829/3_GoPro/Video/T2
Found video files ['GX018302.MP4', 'GX028302.MP4', 'GX038302.MP4', 'GX048302.MP4', 'GX058302.MP4', 'GX068302.MP4', 'GX078302.MP4']


100%|██████████| 2291/2291 [32:15<00:00,  1.18it/s]


Processing data in /media/mha114/Massimal2/Massimal/Larvik_Terneholmen/Otter/20230829/3_GoPro/Video/T3
Found video files ['GX018303.MP4', 'GX028303.MP4', 'GX038303.MP4', 'GX048303.MP4', 'GX058303.MP4', 'GX068303.MP4', 'GX078303.MP4']


100%|██████████| 1305/1305 [17:06<00:00,  1.27it/s]


In [4]:
# Underwater image correction - skipped for this dataset
# input_image_dirs = [
#     p / "images" for p in sorted((dataset_root_dir / "8_GeotaggedImages").glob("T*"))
# ]
# output_image_dirs = [p.parent / "images_corrected" for p in input_image_dirs]

# underwater_image_correction(input_image_dirs, output_image_dirs)
