In [75]:
import video_transect
from pathlib import Path
import exiftool
import datetime
import pandas as pd
import shutil
import tqdm

In [31]:
# Paths
im_dir = Path('/media/mha114/Massimal2/Massimal/Smola_Skalmen/OtterTransects/20230620/Images')
csv_file = Path('/media/mha114/Massimal2/Massimal/Smola_Skalmen/OtterTransects/20230620/GnssData/20230620_144519_GnssTimePosHeadCourSpeed.csv')
test_gpkg_output = Path('/media/mha114/Massimal2/Massimal/Smola_Skalmen/OtterTransects/20230620/ImagesGeopackage/20230620_Smola_Skalmen_OtterImages.gpkg')

In [32]:
# Parameters
sample_distance = 1.0 # Units defined by CRS, default is UTM with unit meters

In [33]:
# Get list of **all** image files
im_files = sorted(list(im_dir.rglob('*.JPG')))

for im_file in im_files[0:5]:
    print(im_file)
print('...')
for im_file in im_files[-5:]:
    print(im_file)

print(f'{len(im_files)} image files found')

/media/mha114/Massimal2/Massimal/Smola_Skalmen/OtterTransects/20230620/Images/GoPro1/100GOPRO/G0011753.JPG
/media/mha114/Massimal2/Massimal/Smola_Skalmen/OtterTransects/20230620/Images/GoPro1/100GOPRO/G0011754.JPG
/media/mha114/Massimal2/Massimal/Smola_Skalmen/OtterTransects/20230620/Images/GoPro1/100GOPRO/G0011755.JPG
/media/mha114/Massimal2/Massimal/Smola_Skalmen/OtterTransects/20230620/Images/GoPro1/100GOPRO/G0011756.JPG
/media/mha114/Massimal2/Massimal/Smola_Skalmen/OtterTransects/20230620/Images/GoPro1/100GOPRO/G0011757.JPG
...
/media/mha114/Massimal2/Massimal/Smola_Skalmen/OtterTransects/20230620/Images/GoPro2/125GOPRO/G0054533.JPG
/media/mha114/Massimal2/Massimal/Smola_Skalmen/OtterTransects/20230620/Images/GoPro2/125GOPRO/G0054534.JPG
/media/mha114/Massimal2/Massimal/Smola_Skalmen/OtterTransects/20230620/Images/GoPro2/125GOPRO/G0054535.JPG
/media/mha114/Massimal2/Massimal/Smola_Skalmen/OtterTransects/20230620/Images/GoPro2/125GOPRO/G0054536.JPG
/media/mha114/Massimal2/Massimal/

In [34]:
def get_image_timestamps(image_paths):
    """ Get subsecond-accuracy timestamps for every image file in a list
    
    # Input parameters:
    image_paths:
        List of image paths, used by exiftool method ExifToolHelper.get_metadata()
    
    # Returns:
    image_paths_and_timestamps:
        List of tuples containing (image_path, image_timestamp_string)
    """
    with exiftool.ExifToolHelper() as et:
        metadata = et.get_metadata(image_paths)
    image_paths_and_timestamps = [(im_path,md['Composite:SubSecCreateDate']) 
                                  for (im_path,md) in zip(image_paths,metadata) 
                                  if 'Composite:SubSecCreateDate' in md]
    return image_paths_and_timestamps

In [35]:
# im_files_test = im_files[::100]  # Every 100'th image, for testing
im_files_test = im_files[10000:10100]  # 100 images from middle, for testing
print(len(im_files_test))

100


In [36]:
# Get EXIF data for images. If all images, this takes approx. 8 minutes
image_paths_and_timestamps = get_image_timestamps(im_files_test)
for im_ts in image_paths_and_timestamps:
    print(im_ts)
print(f'Got {len(image_paths_and_timestamps)} valid timestamps.')

(PosixPath('/media/mha114/Massimal2/Massimal/Smola_Skalmen/OtterTransects/20230620/Images/GoPro1/110GOPRO/G0021754.JPG'), '2023:06:20 17:46:36.4450')
(PosixPath('/media/mha114/Massimal2/Massimal/Smola_Skalmen/OtterTransects/20230620/Images/GoPro1/110GOPRO/G0021755.JPG'), '2023:06:20 17:46:36.9460')
(PosixPath('/media/mha114/Massimal2/Massimal/Smola_Skalmen/OtterTransects/20230620/Images/GoPro1/110GOPRO/G0021756.JPG'), '2023:06:20 17:46:37.4460')
(PosixPath('/media/mha114/Massimal2/Massimal/Smola_Skalmen/OtterTransects/20230620/Images/GoPro1/110GOPRO/G0021757.JPG'), '2023:06:20 17:46:37.9470')
(PosixPath('/media/mha114/Massimal2/Massimal/Smola_Skalmen/OtterTransects/20230620/Images/GoPro1/110GOPRO/G0021758.JPG'), '2023:06:20 17:46:38.4470')
(PosixPath('/media/mha114/Massimal2/Massimal/Smola_Skalmen/OtterTransects/20230620/Images/GoPro1/110GOPRO/G0021759.JPG'), '2023:06:20 17:46:38.9480')
(PosixPath('/media/mha114/Massimal2/Massimal/Smola_Skalmen/OtterTransects/20230620/Images/GoPro1/110

In [37]:
# Get Otter positions, show geodataframe
otter_gdf = video_transect.otter_csv_to_geodataframe(csv_file)
otter_gdf


Unnamed: 0,Time,Lat,Lng,geometry
0,2023-06-20 14:45:19.418000+00:00,63.464339,7.771215,POINT (7.77122 63.46434)
1,2023-06-20 14:45:19.513000+00:00,63.464339,7.771212,POINT (7.77121 63.46434)
2,2023-06-20 14:45:19.609000+00:00,63.464339,7.771209,POINT (7.77121 63.46434)
3,2023-06-20 14:45:19.705000+00:00,63.464339,7.771206,POINT (7.77121 63.46434)
4,2023-06-20 14:45:19.817000+00:00,63.464339,7.771203,POINT (7.77120 63.46434)
...,...,...,...,...
145175,2023-06-20 18:47:17.026000+00:00,63.463280,7.775832,POINT (7.77583 63.46328)
145176,2023-06-20 18:47:17.122000+00:00,63.463280,7.775832,POINT (7.77583 63.46328)
145177,2023-06-20 18:47:17.234000+00:00,63.463280,7.775832,POINT (7.77583 63.46328)
145178,2023-06-20 18:47:17.330000+00:00,63.463280,7.775832,POINT (7.77583 63.46328)


# Matching timestamps
**Notes below are included because they led to the discovery of duplicated data**

GoPro images start at approx. 16:23 and end at approx. 21:17 (approx 5 hours)
Otter timestamps start at 14:45 and end at 18:47 (approx. 4 hours!)

Otter times are UTC, meaning that the times (in summer) are 2 hours ahead of local time.

First GoPro image from camera 1 taken 16:23:11
First GoPro image from camera 2 taken 18:56:00, on June 19! From day before...? Or incorrect date setting?

Last GoPro image from camera 1 taken 18:41:00 on June 20
Last GoPro image from camera 2 taken 21:18:28 on June 20

Image G0020571.JPG in folder 20230620/Images/GoPro2/110GOPRO is the last image taken on June 19.

File naming does not follow folder naming, but is formatted as G00Xdddd, where X indicates the "sequence number". 

AHA! The image sequences 1 and 2 from camera 2 are _duplicated_. They are included in the datasets both from Måholmen (June 19) and Skalmen (June 20.). Image sequence 3 is just a test sequence taken indoors.

Fix: Remove duplicated images from camera 2 from Skalmen dataset.

# Matching timestamps 2
Otter timestamps start at 14:45 and end at 18:47 (approx. 4 hours!)

Otter times are UTC, meaning that the times (in summer) are 2 hours ahead of local time.

First GoPro image from camera 1 taken 16:23:11
Last GoPro image from camera 1 taken 18:41:00 (approx 2 hours and 20 minutes)
All images appear to be taken in one continuous take.

First GoPro image from camera 2 taken 19:22:10
Last GoPro image from camera 2 taken 21:18:28 
All these images also appear to be taken in one continuous take

The interval 18:41 - 19:22 makes sense - it probably took 40 minutes to change cameras. This again indicates that from beginning to end, imaging took approx. 5 hours (including camera change). So were the cameras started before the Otter GPS logging started, or stopped long after the Otter stopped?

For camera 1, the first indication of movement is around image G0013150.JPG, taken at 16:34:50. 

If the camera and Otter are offset by exactly 2 hours, then the first image of the Otter position log is around G0014406.JPG. At this point, the images show the Otter being above kelp (hard to interpret movement), but soon after, the Otter appears to head out to deeper waters. Not really any obvious break here...

Plotting Otter path in QGIS: First point seems to be while moving westwards towards start of grid. Not an obvious starting position, maybe Håvard just started logging there.

If Otter log stops at 18:47 UTC (22:47 local time), it means that approx. the last half hour of images from GoPro 2 doesn't have positions. 

In [38]:
def get_otter_image_positions(otter_gdf,image_paths_and_timestamps,
                              dt_format = r'%Y:%m:%d %H:%M:%S.%f',
                              time_tolerance = datetime.timedelta(seconds=1),
                              image_time_offset_from_utc = datetime.timedelta(hours=2)):
    """ Search Otter geodataframe for positions closest to image (in time)
    
    # Input arguments:
    otter_gdf:      
        GDF created from Otter CSV (see otter_csv_to_geodataframe())
        Only column "Time" (containing UTC datetime objects) is used
    image_paths_and_timestamps:
        List of tuples containing (image_path, image_timestamp_string)
    time_tolerance:
        datetime.timedelta, default 1 second, indicating maximum time offset
        when searching for matching times between images and positions
    image_time_offset_from_utc:
        datetime.timedelta, default 2 hours, indicating time offset between
        utc and image timestamps (typically local time).
    
    # Returns
    image_gdf:
        Geodataframe with positions (taken form Otter) and corresponding
        image file paths. 
    
    """
    
    # Extract timestamps and convert to datetime objects
    image_datetimes = [pd.to_datetime(image_datetime_str,format=dt_format,utc=True) 
                       - image_time_offset_from_utc
                       for (_,image_datetime_str) in image_paths_and_timestamps]

    # Find indices for Otter timestamps that are closest to image timestamps
    closest_time_index = pd.Index(otter_gdf['Time']).get_indexer(image_datetimes,method='nearest',
                                                                 tolerance=time_tolerance)
    
    # Remove images that don't have any matches within the time tolerance
    image_paths = [image_path for ((image_path,_),im_index) in 
                   zip(image_paths_and_timestamps,closest_time_index)
                   if im_index >= 0] # Images without match get index -1
    closest_time_index = closest_time_index[closest_time_index>=0]

    # Create copy of Otter gdf which only contains rows corresponding to images
    image_gdf = otter_gdf.iloc[closest_time_index]
    
    # Insert additional column for image path
    image_gdf.insert(image_gdf.shape[1]-1,'ImageFile','')
    image_gdf.iloc[:,image_gdf.columns.get_loc('ImageFile')] = image_paths
    return image_gdf

In [39]:
image_gdf = get_otter_image_positions(otter_gdf,image_paths_and_timestamps)
image_gdf

Unnamed: 0,Time,Lat,Lng,ImageFile,geometry
36769,2023-06-20 15:46:36.431000+00:00,63.466281,7.765075,/media/mha114/Massimal2/Massimal/Smola_Skalmen...,POINT (7.76508 63.46628)
36774,2023-06-20 15:46:36.927000+00:00,63.466280,7.765088,/media/mha114/Massimal2/Massimal/Smola_Skalmen...,POINT (7.76509 63.46628)
36779,2023-06-20 15:46:37.423000+00:00,63.466279,7.765101,/media/mha114/Massimal2/Massimal/Smola_Skalmen...,POINT (7.76510 63.46628)
36784,2023-06-20 15:46:37.919000+00:00,63.466278,7.765113,/media/mha114/Massimal2/Massimal/Smola_Skalmen...,POINT (7.76511 63.46628)
36789,2023-06-20 15:46:38.431000+00:00,63.466277,7.765126,/media/mha114/Massimal2/Massimal/Smola_Skalmen...,POINT (7.76513 63.46628)
...,...,...,...,...,...
37245,2023-06-20 15:47:24.028000+00:00,63.466275,7.766308,/media/mha114/Massimal2/Massimal/Smola_Skalmen...,POINT (7.76631 63.46627)
37250,2023-06-20 15:47:24.524000+00:00,63.466275,7.766320,/media/mha114/Massimal2/Massimal/Smola_Skalmen...,POINT (7.76632 63.46627)
37255,2023-06-20 15:47:25.020000+00:00,63.466275,7.766333,/media/mha114/Massimal2/Massimal/Smola_Skalmen...,POINT (7.76633 63.46627)
37260,2023-06-20 15:47:25.532000+00:00,63.466274,7.766346,/media/mha114/Massimal2/Massimal/Smola_Skalmen...,POINT (7.76635 63.46627)


In [40]:
image_gdf_filt = video_transect.filter_gdf_on_distance(image_gdf,sample_distance)
image_gdf_filt

Unnamed: 0,Time,Lat,Lng,ImageFile,geometry
36769,2023-06-20 15:46:36.431000+00:00,63.466281,7.765075,/media/mha114/Massimal2/Massimal/Smola_Skalmen...,POINT (7.76508 63.46628)
36779,2023-06-20 15:46:37.423000+00:00,63.466279,7.765101,/media/mha114/Massimal2/Massimal/Smola_Skalmen...,POINT (7.76510 63.46628)
36789,2023-06-20 15:46:38.431000+00:00,63.466277,7.765126,/media/mha114/Massimal2/Massimal/Smola_Skalmen...,POINT (7.76513 63.46628)
36799,2023-06-20 15:46:39.423000+00:00,63.466276,7.765154,/media/mha114/Massimal2/Massimal/Smola_Skalmen...,POINT (7.76515 63.46628)
36809,2023-06-20 15:46:40.431000+00:00,63.466275,7.76518,/media/mha114/Massimal2/Massimal/Smola_Skalmen...,POINT (7.76518 63.46627)
36819,2023-06-20 15:46:41.423000+00:00,63.466274,7.765204,/media/mha114/Massimal2/Massimal/Smola_Skalmen...,POINT (7.76520 63.46627)
36829,2023-06-20 15:46:42.431000+00:00,63.466273,7.765231,/media/mha114/Massimal2/Massimal/Smola_Skalmen...,POINT (7.76523 63.46627)
36839,2023-06-20 15:46:43.423000+00:00,63.466272,7.765259,/media/mha114/Massimal2/Massimal/Smola_Skalmen...,POINT (7.76526 63.46627)
36849,2023-06-20 15:46:44.431000+00:00,63.466271,7.765285,/media/mha114/Massimal2/Massimal/Smola_Skalmen...,POINT (7.76528 63.46627)
36859,2023-06-20 15:46:45.423000+00:00,63.466271,7.76531,/media/mha114/Massimal2/Massimal/Smola_Skalmen...,POINT (7.76531 63.46627)


In [42]:
gdf = image_gdf_filt.copy()

In [77]:
# Get all image paths
im_paths = gdf.ImageFile
im_times = gdf.Time
image_filename_base = 'Smola_Skalmen_'
new_image_filenames = []
for i,(im_path,im_time) in enumerate(zip(im_paths,im_times)):
    new_image_filename = image_filename_base
    new_image_filename += f'{i:08d}_'
    new_image_filename += im_time.strftime("%Y%m%dT%H%M%SZ_")
    new_image_filename += im_path.name
    new_image_filenames.append(new_image_filename)

print(new_image_filenames)

['Smola_Skalmen_00000000_20230620T154636Z_G0021754.JPG', 'Smola_Skalmen_00000001_20230620T154637Z_G0021756.JPG', 'Smola_Skalmen_00000002_20230620T154638Z_G0021758.JPG', 'Smola_Skalmen_00000003_20230620T154639Z_G0021760.JPG', 'Smola_Skalmen_00000004_20230620T154640Z_G0021762.JPG', 'Smola_Skalmen_00000005_20230620T154641Z_G0021764.JPG', 'Smola_Skalmen_00000006_20230620T154642Z_G0021766.JPG', 'Smola_Skalmen_00000007_20230620T154643Z_G0021768.JPG', 'Smola_Skalmen_00000008_20230620T154644Z_G0021770.JPG', 'Smola_Skalmen_00000009_20230620T154645Z_G0021772.JPG', 'Smola_Skalmen_00000010_20230620T154646Z_G0021774.JPG', 'Smola_Skalmen_00000011_20230620T154647Z_G0021776.JPG', 'Smola_Skalmen_00000012_20230620T154648Z_G0021778.JPG', 'Smola_Skalmen_00000013_20230620T154649Z_G0021780.JPG', 'Smola_Skalmen_00000014_20230620T154650Z_G0021782.JPG', 'Smola_Skalmen_00000015_20230620T154651Z_G0021784.JPG', 'Smola_Skalmen_00000016_20230620T154652Z_G0021786.JPG', 'Smola_Skalmen_00000017_20230620T154653Z_G00217

In [79]:
output_dir = im_dir / 'TestOutput'
output_dir.mkdir(exist_ok=True)
print('Copying and renaming files...')
for source_file,new_filename in tqdm.tqdm(zip(gdf.ImageFile,new_image_filenames)):
    destination_file = output_dir / new_filename
    #print(f'Source: {source_file}, destination: {destination_file}')
    shutil.copy(source_file,destination_file)

Copying and renaming files...


50it [00:00, 277.51it/s]


In [67]:
updated_gdf = gdf.copy()
updated_gdf.iloc[:,updated_gdf.columns.get_loc('ImageFile')] = new_image_filenames
updated_gdf

Unnamed: 0,Time,Lat,Lng,ImageFile,geometry
36769,2023-06-20 15:46:36.431000+00:00,63.466281,7.765075,Smola_Skalmen_00000000_2023-06-20T15:46:36Z_G0...,POINT (7.76508 63.46628)
36779,2023-06-20 15:46:37.423000+00:00,63.466279,7.765101,Smola_Skalmen_00000001_2023-06-20T15:46:37Z_G0...,POINT (7.76510 63.46628)
36789,2023-06-20 15:46:38.431000+00:00,63.466277,7.765126,Smola_Skalmen_00000002_2023-06-20T15:46:38Z_G0...,POINT (7.76513 63.46628)
36799,2023-06-20 15:46:39.423000+00:00,63.466276,7.765154,Smola_Skalmen_00000003_2023-06-20T15:46:39Z_G0...,POINT (7.76515 63.46628)
36809,2023-06-20 15:46:40.431000+00:00,63.466275,7.76518,Smola_Skalmen_00000004_2023-06-20T15:46:40Z_G0...,POINT (7.76518 63.46627)
36819,2023-06-20 15:46:41.423000+00:00,63.466274,7.765204,Smola_Skalmen_00000005_2023-06-20T15:46:41Z_G0...,POINT (7.76520 63.46627)
36829,2023-06-20 15:46:42.431000+00:00,63.466273,7.765231,Smola_Skalmen_00000006_2023-06-20T15:46:42Z_G0...,POINT (7.76523 63.46627)
36839,2023-06-20 15:46:43.423000+00:00,63.466272,7.765259,Smola_Skalmen_00000007_2023-06-20T15:46:43Z_G0...,POINT (7.76526 63.46627)
36849,2023-06-20 15:46:44.431000+00:00,63.466271,7.765285,Smola_Skalmen_00000008_2023-06-20T15:46:44Z_G0...,POINT (7.76528 63.46627)
36859,2023-06-20 15:46:45.423000+00:00,63.466271,7.76531,Smola_Skalmen_00000009_2023-06-20T15:46:45Z_G0...,POINT (7.76531 63.46627)


In [41]:
def copy_rename_georeferenced_images(gdf,output_dir,image_filename_base,include_full_path_in_gdf=False):
    """ Copy and rename image files in geodataframe
    
    # Arguments:
    gdf:
        GeoDataFrame with columns "Time" (datetime) and "ImageFile" (pathlib.Path)
        ImageFile paths are assumed to be absolute.
    output_dir:
        Output directory which files will be copied to.
    image_filename_base:
        String with filename base for new filenames.
        Typically location (e.g. 'Smola_Skalmen_) or some other information
        which is common for all images.
    include_full_path_in_gdf:
        Boolean. If True, geodataframe is updated with full path to new files
        If False (default), only filename is inserted in ImagePath column
        in updated geodataframe.

    Images are given new names on the following format:
    <image_filename_base>_<ImageNumber>_<ImageDateTime>_<OriginalFileName>

    # Returns:
    updated_gdf:
        Copy of gdf, with ImageFile updated with 
    """
    # Create new file names based on old filenames and timestamps
    im_paths = gdf.ImageFile
    im_times = gdf.Time
    new_image_filenames = []
    for i,(im_path,im_time) in enumerate(zip(im_paths,im_times)):
        new_image_filename = image_filename_base
        new_image_filename += f'{i:08d}_'
        new_image_filename += im_time.strftime("%Y%m%dT%H%M%SZ_")
        new_image_filename += im_path.name
        new_image_filenames.append(new_image_filename)

    # Create output directory (if not already present), copy and rename files
    output_dir = im_dir / 'TestOutput'
    output_dir.mkdir(exist_ok=True,parents=True)
    print('Copying and renaming files...')
    for source_file,new_filename in tqdm.tqdm(zip(gdf.ImageFile,new_image_filenames)):
        destination_file = output_dir / new_filename
        shutil.copy(source_file,destination_file)

    # Update path to image in geodataframe
    updated_gdf = gdf.copy()
    if include_full_path_in_gdf:
        full_new_image_filenames = [str(output_dir / new_image_filename) for 
                                    new_image_filename in new_image_filenames]
        updated_gdf.iloc[:,updated_gdf.columns.get_loc('ImageFile')] = full_new_image_filenames
    else:
        updated_gdf.iloc[:,updated_gdf.columns.get_loc('ImageFile')] = new_image_filenames
    return updated_gdf