In [21]:
import matplotlib.pyplot as plt
import video_transect, misc
import geopandas
import ffmpeg
import pandas as pd
import datetime
import numpy as np

In [36]:
# Paths
video_dir = '/media/mha114/Massimal/Vega_Sola/Transects/Otter/Sola_01/20220820_134500_Sola01b/GoPro'
# video_dir = '/media/mha114/Massimal/Vega_Sola/Transects/Otter/Sola_01/TestImageExtraction'
# video_dir = '/media/mha114/Massimal/Vega_Sola/Transects/Otter/Sola_02/20220820_160305_Sola02/GoPro'
# video_dir = '/media/mha114/Massimal/Bodo_Juvika/Video-WalkGroundTransect'

csv_file = '/media/mha114/Massimal/Vega_Sola/Transects/Otter/Sola_01/20220820_134500_Sola01b/CSV and mat files/20220820_134500_GnssTimePosHeadCourSpeed.csv'

In [37]:
video_files = misc.file_pattern_search(video_dir, '*.[Mm][Pp]4')

In [38]:
video_files

['/media/mha114/Massimal/Vega_Sola/Transects/Otter/Sola_01/20220820_134500_Sola01b/GoPro/GX010060.MP4',
 '/media/mha114/Massimal/Vega_Sola/Transects/Otter/Sola_01/20220820_134500_Sola01b/GoPro/GX020060.MP4',
 '/media/mha114/Massimal/Vega_Sola/Transects/Otter/Sola_01/20220820_134500_Sola01b/GoPro/GX030060.MP4',
 '/media/mha114/Massimal/Vega_Sola/Transects/Otter/Sola_01/20220820_134500_Sola01b/GoPro/GX040060.MP4',
 '/media/mha114/Massimal/Vega_Sola/Transects/Otter/Sola_01/20220820_134500_Sola01b/GoPro/GX050060.MP4']

In [34]:
probe_data = ffmpeg.probe(video_files[0])

In [35]:
probe_data['streams'][0]['tags']

{'creation_time': '2022-06-30T10:23:09.000000Z',
 'language': 'eng',
 'handler_name': '\x0bGoPro H.265',
 'encoder': 'GoPro H.265 encoder',
 'timecode': '10:22:32:09'}

In [8]:
for video_file in video_files:
    probe_data = ffmpeg.probe(video_file)
    print(probe_data['streams'][0]['tags'])

{'creation_time': '2022-08-20T15:33:49.000000Z', 'language': 'eng', 'handler_name': '\x0bGoPro H.265', 'encoder': 'GoPro H.265 encoder', 'timecode': '15:56:21:03'}


Note! The creation_time tag is the _same_ for all files which are part of a "series" of files.
Each full-length file is 11:44 long.
Starts second full "straight track" 30 seconds into 3. video, which corresponds to 15:33:49 + 11:44 + 11:44 + 0:30 = 15:57:47. 
Based on the CSV track viewed in QGIS, the time for the start of the straight track is 13:57:43, i.e. 2 hours and a few seconds off.
Can assume that the GoPro timestamp is two hours ahead of the (UTC) Otter timestamp, which fits with Norway being 2 hours ahead of UTC in summer (due to daylight saving time). 

In [9]:
def otter_csv_to_geodataframe(csv_file):
    """ Read position and timestamp from Otter CSV file

    # Usage:
    otter_csv_to_geodataframe(csv_file)

    # Required arguments:
    csv_file:   CSV file with track data, generated by Otter
                and post-processed in Matlab.
                Columns "LatDecDeg", "LongDecDeg" and "GnssUTC" are used

    # Returns
    gdf:        GeoDataFrame with information extracted from CSV file
                Columns are renamed "Lat", "Lng" and "Time"

    """

    # Read data
    data = pd.read_csv(csv_file,usecols=['LatDecDeg','LongDecDeg','GnssUTC'])
    # Rename columns
    data.columns = ['Time','Lat','Lng']
    # Convert time to datetime format
    data['Time'] = pd.to_datetime(data['Time'])
    
    # Create GeoDataFrame
    gdf = geopandas.GeoDataFrame(
        data,
        crs = 'EPSG:4326',
        geometry=geopandas.points_from_xy(data.Lng, data.Lat))

    # Return
    return gdf

In [21]:
def filter_gdf_on_distance(gdf,epsg=32633,sample_distance=0.7, inplace=False, outlier_distance = 1000):
    """ Filter a geodataframe by only including new samples if position has changed
    
    # Usage:
    gdf_filtered = filter_gdf_on_distance(gdf,...)

    # Input arguments:
    gdf:        GeoPandas GeoDataFrame object
    
    # Keyword arguments:
    epsg:                EPSG code (integer) for CRS to measure distance in
                         Default: 32633 (UTM 33N)
    sample_distance       Minimum change in position in order for next sample to be included
    outlier_distance      Samples with changes in distance above this limit are not included
    
    """
    
    # Convert CRS (often necessary to get valid distance units, e.g. meters)
    if epsg is not None:
        geom = gdf.geometry.to_crs(epsg=epsg)
    else:
        geom = gdf.geometry
    
    # Create a copy of the geodatafram
    if not inplace:
        gdf = gdf.copy()
        
    # Iterate over all positions and only include a new point if position 
    # has changed more than sample_distance
    mask = [0]
    last_pos = geom[0]
    for index, position in enumerate(geom):
        dist = position.distance(last_pos)
        if (dist > sample_distance) and (dist < outlier_distance):
            mask.append(index)
            last_pos = position
    gdf = gdf.iloc[mask]
    
    return gdf
        

In [18]:
gdf = otter_csv_to_geodataframe(csv_file)

In [19]:
gdf_simp = filter_gdf_on_distance(gdf)

In [20]:
gdf_simp

Unnamed: 0,Time,Lat,Lng,geometry
0,2022-08-20 13:45:00.957,65.678659,11.719861,POINT (11.71986 65.67866)
115,2022-08-20 13:45:12.394,65.678664,11.719852,POINT (11.71985 65.67866)
302,2022-08-20 13:45:31.097,65.678671,11.719849,POINT (11.71985 65.67867)
350,2022-08-20 13:45:35.897,65.678676,11.719859,POINT (11.71986 65.67868)
480,2022-08-20 13:45:48.888,65.678669,11.719859,POINT (11.71986 65.67867)
...,...,...,...,...
39020,2022-08-20 14:50:02.894,65.678606,11.720123,POINT (11.72012 65.67861)
39035,2022-08-20 14:50:04.398,65.678613,11.720123,POINT (11.72012 65.67861)
39054,2022-08-20 14:50:06.286,65.678619,11.720122,POINT (11.72012 65.67862)
39075,2022-08-20 14:50:08.398,65.678626,11.720121,POINT (11.72012 65.67863)


In [124]:
# Read file (only position and timestamp)
# data = pd.read_csv(csv_file, header = 0, names=['Lat','Lng','Time'], usecols=['LatDecDeg','LongDecDeg','GnssUTC'])
data = pd.read_csv(csv_file,usecols=['LatDecDeg','LongDecDeg','GnssUTC'])
# Rename columns
data.columns = ['Time','Lat','Lng']
# Convert time to datetime format
data['Time'] = pd.to_datetime(data['Time'])

# Add time offset
data['Time'] += datetime.timedelta(hours=2)

# Add column with relative time differences(?)
data['TimeDiffSec'] = pd.to_timedelta(data['Time'] - data['Time'][0]).dt.total_seconds()

data.head()

Unnamed: 0,Time,Lat,Lng,TimeDiffSec
0,2022-08-20 15:45:00.957,65.678659,11.719861,0.0
1,2022-08-20 15:45:00.987,65.678659,11.719861,0.03
2,2022-08-20 15:45:01.099,65.678659,11.719861,0.142
3,2022-08-20 15:45:01.195,65.678659,11.719861,0.238
4,2022-08-20 15:45:01.291,65.678659,11.71986,0.334


In [125]:
gdf = geopandas.GeoDataFrame(
    data,
    crs = 'EPSG:4326',
    geometry=geopandas.points_from_xy(data.Lng, data.Lat))

In [126]:
# View size of geodataframe
gdf.shape

(39115, 5)

In [127]:
# Convert to different CRS (UTM 33N)
gdf_utm = gdf.to_crs(epsg=32633)
gdf_utm

Unnamed: 0,Time,Lat,Lng,TimeDiffSec,geometry
0,2022-08-20 15:45:00.957,65.678659,11.719861,0.000,POINT (349309.282 7288025.316)
1,2022-08-20 15:45:00.987,65.678659,11.719861,0.030,POINT (349309.286 7288025.312)
2,2022-08-20 15:45:01.099,65.678659,11.719861,0.142,POINT (349309.281 7288025.309)
3,2022-08-20 15:45:01.195,65.678659,11.719861,0.238,POINT (349309.286 7288025.306)
4,2022-08-20 15:45:01.291,65.678659,11.719860,0.334,POINT (349309.280 7288025.302)
...,...,...,...,...,...
39110,2022-08-20 16:50:11.885,65.678636,11.720125,3910.928,POINT (349321.267 7288022.082)
39111,2022-08-20 16:50:11.997,65.678636,11.720125,3911.040,POINT (349321.277 7288022.113)
39112,2022-08-20 16:50:12.093,65.678636,11.720125,3911.136,POINT (349321.285 7288022.141)
39113,2022-08-20 16:50:12.189,65.678636,11.720125,3911.232,POINT (349321.287 7288022.165)


In [128]:
samp_dist = 1.0
mask = [0]
last_pos = gdf_utm.geometry[0]
for index, row in gdf_utm.iterrows():
    if row.geometry.distance(last_pos) > samp_dist:
        mask.append(index)
        last_pos = row.geometry

In [129]:
len(mask)

2096

In [130]:
gdf_simp = gdf.iloc[mask]

In [41]:
gdf_simp

NameError: name 'gdf_simp' is not defined

In [19]:
def get_video_data(video_dir):
    """ Get info about videos in folder, organized as dataframe

    # Usage:
    video_data = get_video_data(video_dir)

    # Required:
    video_dir:      Path to folder with video file(s).
                    Files are assumed to be from a single continuous "take",
                    split into files with names ordered alphabetiacally
                    according to recording order.

    # Returns:
    video_data:     Pandas dataframe with columns
                    'FileName', 'DurationSec','StartTimeSec','StopTimeSec'
                    Start and stop times are relative to start of first file.

    """

    video_data = pd.DataFrame({ 'FileName':'',
                                'CreationTime':datetime.datetime(2000,1,1),
                                'DurationSec':float(),
                                'StartTimeSec':float(),
                                'StopTimeSec':float()},
                                 index=[])

    # Get list of video files, insert into dataframe
    # Use brackets to find both .mp4 and .MP4 files
    video_data['FileName'] = misc.file_pattern_search(video_dir, '*.[Mm][Pp]4')

    # Get duration and frame rate for each video file
    for ii,file in enumerate(video_data['FileName']):
        probe_data = ffmpeg.probe(file)
        video_data.loc[ii,'CreationTime'] = pd.to_datetime(probe_data['streams'][0]['tags']['creation_time'])
        video_data.loc[ii,'DurationSec'] = pd.to_numeric(probe_data['streams'][0]['duration'])
        

    video_data.iloc[0,video_data.columns.get_loc('StartTimeSec')] = 0.0
    video_data.iloc[1:,video_data.columns.get_loc('StartTimeSec')] = np.cumsum(
        video_data.iloc[:-1,video_data.columns.get_loc('DurationSec')])
    video_data['StopTimeSec'] = video_data['StartTimeSec'] + video_data['DurationSec']

    return video_data

In [22]:
video_data = get_video_data(video_dir)

In [23]:
video_data

Unnamed: 0,FileName,CreationTime,DurationSec,StartTimeSec,StopTimeSec
0,/media/mha114/Massimal/Vega_Sola/Transects/Ott...,2022-08-20 15:33:49+00:00,704.704,0.0,704.704
1,/media/mha114/Massimal/Vega_Sola/Transects/Ott...,2022-08-20 15:33:49+00:00,704.704,704.704,1409.408
2,/media/mha114/Massimal/Vega_Sola/Transects/Ott...,2022-08-20 15:33:49+00:00,704.704,1409.408,2114.112
3,/media/mha114/Massimal/Vega_Sola/Transects/Ott...,2022-08-20 15:33:49+00:00,704.704,2114.112,2818.816
4,/media/mha114/Massimal/Vega_Sola/Transects/Ott...,2022-08-20 15:33:49+00:00,329.195533,2818.816,3148.011533


In [40]:
video_data.iloc[0].CreationTime

Timestamp('2022-08-20 15:33:49+0000', tz='UTC')

In [None]:
def prepare_gdf_with_video_data(gdf,video_data, video_time_offset=datetime.timedelta()):
    """ Insert video information into geodataframe, prepare for extracting images

    # Usage:
    gdf = prepare_gdf_with_video_data(gdf,video_data)

    # Arguments:
    gdf:        geodataframe with positions and timestamps
                (see track_csv_to_geodataframe())
    video_data: dataframe with video data
                (see get_video_data())
    
    # Keyword arguments:
    video_time_offset:    datetime.timedelta object with time difference between
                          video datetime ("CreationTime") and GNSS track datetime.
                          Example: GoPro camera at Norwegian local time and 
                          daylight saving time is 2 hours "in front of"
                          GNNS using UTC time, resulting in
                          video_time_offset = datetime.timedelta(hours=2)

    # Returns
    gdf:        geodataframe with video information inserted
    """

    # Create copy of original geodataframe
    gdf = gdf.copy()
    
    # Insert additional columns
    gdf.insert(gdf.shape[1]-1,'VideoFile','')
    gdf.insert(gdf.shape[1]-1,'VideoRelTimeFromStart',float())
    gdf.insert(gdf.shape[1]-1,'VideoRelTimeInFile',float())
    
    # Remove logged positions from before video start or after video end
    video_start_gdf_time = video_data.iloc[0].CreationTime - video_time_offset
    video_end_gdf_time = video_start_gdf_time + video_data.iloc[-1].StopTimeSec
    ind_within_video_duration = (gdf.Time >= video_start_gdf_time) & (gdf.Time <= video_end_gdf_time)
    gdf = gdf[ind_within_video_duration]

    # For each position, find corresponding video file and calculate time relative to start of video
    for ii in range(video_data.shape[0]):
        ind = (gdf['TimeDiffSec'] >= video_data['StartTimeSec'][ii]) & (gdf['TimeDiffSec'] < video_data['StopTimeSec'][ii])
        gdf.loc[ind, 'VideoFile'] = video_data['FileName'][ii]
        gdf.loc[ind, 'VideoRelTime'] = gdf.loc[ind, 'TimeDiffSec'] - video_data['StartTimeSec'][ii]

    return gdf

In [39]:
gdf_with_video = prepare_gdf_with_video_data(gdf_simp,video_data)

NameError: name 'prepare_gdf_with_video_data' is not defined

In [24]:
tmp = datetime.timedelta()

In [25]:
tmp

datetime.timedelta(0)