In [1]:
# Imports
import ffmpeg
import pandas as pd
import geopandas
import datetime
import shapely
import warnings
import numpy as np
import os
from pathlib import Path

import video_transect, misc

In [2]:
# Filter future warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [3]:
# Paths
csv_file = '/media/mha114/MassimalDataProcessing/20210312_Saltstraumen/Images-UW/20220329/VideoTransect_Structured/Transect1/2022-03-29_125003_MassimalTrack.csv'
video_dir = '/media/mha114/MassimalDataProcessing/20210312_Saltstraumen/Images-UW/20220329/VideoTransect_Structured/Transect1/Video'
image_dir = '/media/mha114/MassimalDataProcessing/20220324_VideoTransectTest/ExtractedImages'
gpk_file = '/media/mha114/MassimalDataProcessing/20220324_VideoTransectTest/test_output.gpkg'

In [4]:
# Read file, get geodataframe
gdf = video_transect.track_csv_to_geodataframe(csv_file)

In [5]:
gdf

Unnamed: 0,Lat,Lng,Time,TimeDiffSec,geometry
0,67.227312,14.625156,2022-03-29 10:50:04.089000+00:00,0.000,POINT (14.62516 67.22731)
1,67.227312,14.625156,2022-03-29 10:50:04.089000+00:00,0.000,POINT (14.62516 67.22731)
2,67.227312,14.625156,2022-03-29 10:50:05.098000+00:00,1.009,POINT (14.62516 67.22731)
3,67.227312,14.625156,2022-03-29 10:50:06.094000+00:00,2.005,POINT (14.62516 67.22731)
4,67.227312,14.625156,2022-03-29 10:50:07.098000+00:00,3.009,POINT (14.62516 67.22731)
...,...,...,...,...,...
3719,67.227312,14.625167,2022-03-29 11:52:02.075000+00:00,3717.986,POINT (14.62517 67.22731)
3720,67.227327,14.625210,2022-03-29 11:52:39.451000+00:00,3755.362,POINT (14.62521 67.22733)
3721,67.227328,14.625213,2022-03-29 11:52:40.338000+00:00,3756.249,POINT (14.62521 67.22733)
3722,67.227320,14.625202,2022-03-29 11:52:41.338000+00:00,3757.249,POINT (14.62520 67.22732)


In [6]:
# Insert additional columns
gdf.insert(gdf.shape[1]-1,'VideoFile','')
gdf.insert(gdf.shape[1]-1,'VideoRelTime',float())
gdf.insert(gdf.shape[1]-1,'ImagePath','')

In [7]:
gdf

Unnamed: 0,Lat,Lng,Time,TimeDiffSec,VideoFile,VideoRelTime,ImagePath,geometry
0,67.227312,14.625156,2022-03-29 10:50:04.089000+00:00,0.000,,0.0,,POINT (14.62516 67.22731)
1,67.227312,14.625156,2022-03-29 10:50:04.089000+00:00,0.000,,0.0,,POINT (14.62516 67.22731)
2,67.227312,14.625156,2022-03-29 10:50:05.098000+00:00,1.009,,0.0,,POINT (14.62516 67.22731)
3,67.227312,14.625156,2022-03-29 10:50:06.094000+00:00,2.005,,0.0,,POINT (14.62516 67.22731)
4,67.227312,14.625156,2022-03-29 10:50:07.098000+00:00,3.009,,0.0,,POINT (14.62516 67.22731)
...,...,...,...,...,...,...,...,...
3719,67.227312,14.625167,2022-03-29 11:52:02.075000+00:00,3717.986,,0.0,,POINT (14.62517 67.22731)
3720,67.227327,14.625210,2022-03-29 11:52:39.451000+00:00,3755.362,,0.0,,POINT (14.62521 67.22733)
3721,67.227328,14.625213,2022-03-29 11:52:40.338000+00:00,3756.249,,0.0,,POINT (14.62521 67.22733)
3722,67.227320,14.625202,2022-03-29 11:52:41.338000+00:00,3757.249,,0.0,,POINT (14.62520 67.22732)


In [8]:
# Create dataframe to hold info on videos
# video_data = pd.DataFrame(columns=['FileName','Duration','FrameRate','StartTimeSec','StopTimeSec'])
video_data = pd.DataFrame({'FileName':'','Duration':float(),'StartTimeSec':float(),'StopTimeSec':float()}, index=[])

In [9]:
video_data.dtypes

FileName         object
Duration        float64
StartTimeSec    float64
StopTimeSec     float64
dtype: object

In [10]:
# Get list of video files, insert into dataframe
video_data['FileName'] = misc.file_pattern_search(video_dir, '*.[Mm][Pp]4')    # Use brackets to find both .mp4 and .MP4 files

In [11]:
video_data

Unnamed: 0,FileName,Duration,StartTimeSec,StopTimeSec
0,/media/mha114/MassimalDataProcessing/20210312_...,,,
1,/media/mha114/MassimalDataProcessing/20210312_...,,,
2,/media/mha114/MassimalDataProcessing/20210312_...,,,
3,/media/mha114/MassimalDataProcessing/20210312_...,,,
4,/media/mha114/MassimalDataProcessing/20210312_...,,,
5,/media/mha114/MassimalDataProcessing/20210312_...,,,


In [12]:
# Get start time and duration for each video file
for ii,file in enumerate(video_data['FileName']):
    probe_data = ffmpeg.probe(file)
    # Note that there are multiple streams. Stream 0 is the video.
    # video_data['StartTime'][ii] = pd.to_datetime(probe_data['streams'][0]['tags']['creation_time'])
#     video_data['Duration'][ii] = pd.to_numeric(probe_data['streams'][0]['duration'])
#     video_data['FrameRate'][ii] = eval(probe_data['streams'][0]['avg_frame_rate'])

    video_data.loc[ii,'Duration'] = pd.to_numeric(probe_data['streams'][0]['duration'])
    video_data.loc[ii,'FrameRate'] = eval(probe_data['streams'][0]['avg_frame_rate'])

    
#     print('********')
#     print(file)
#     for key, value in probe_data['streams'][0].items():
#        print(key + ': ' + str(value))

In [13]:
video_data

Unnamed: 0,FileName,Duration,StartTimeSec,StopTimeSec,FrameRate
0,/media/mha114/MassimalDataProcessing/20210312_...,532.532,,,29.97003
1,/media/mha114/MassimalDataProcessing/20210312_...,532.532,,,29.97003
2,/media/mha114/MassimalDataProcessing/20210312_...,532.532,,,29.97003
3,/media/mha114/MassimalDataProcessing/20210312_...,532.532,,,29.97003
4,/media/mha114/MassimalDataProcessing/20210312_...,532.532,,,29.97003
5,/media/mha114/MassimalDataProcessing/20210312_...,248.781867,,,29.97003


In [14]:
# Calculate start/stop times for each video
# video_data['StartTimeSec'][0] = 0.0
#video_data['StartTimeSec'][1:] = np.cumsum(video_data['Duration'][0:-1])
#video_data['StopTimeSec'] = video_data['StartTimeSec'] + video_data['Duration']

# video_data.iloc[0,video_data.columns.get_loc('StartTimeSec')] = 0.0
# video_data.iloc[1:,video_data.columns.get_loc('StartTimeSec')] = np.cumsum(video_data.iloc[:-1,video_data.columns.get_loc('Duration')])
# video_data['StopTimeSec'] = video_data['StartTimeSec'] + video_data['Duration']


# video_data.iloc[0]['StartTimeSec'] = 0.0
# video_data.iloc[1:]['StartTimeSec'] = np.cumsum(video_data.iloc[:-1]['Duration'])
# video_data['StopTimeSec'] = video_data['StartTimeSec'] + video_data['Duration']

video_data.ix

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  video_data.iloc[0]['StartTimeSec'] = 0.0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  video_data.iloc[1:]['StartTimeSec'] = np.cumsum(video_data.iloc[:-1]['Duration'])


In [71]:
video_data.dtypes

FileName         object
Duration        float64
FrameRate       float64
StartTimeSec    float64
StopTimeSec     float64
dtype: object

In [72]:
video_data

Unnamed: 0,FileName,Duration,FrameRate,StartTimeSec,StopTimeSec
0,/media/mha114/MassimalDataProcessing/20210312_...,532.532,29.97003,0.0,532.532
1,/media/mha114/MassimalDataProcessing/20210312_...,532.532,29.97003,532.532,1065.064
2,/media/mha114/MassimalDataProcessing/20210312_...,532.532,29.97003,1065.064,1597.596
3,/media/mha114/MassimalDataProcessing/20210312_...,532.532,29.97003,1597.596,2130.128
4,/media/mha114/MassimalDataProcessing/20210312_...,532.532,29.97003,2130.128,2662.66
5,/media/mha114/MassimalDataProcessing/20210312_...,248.781867,29.97003,2662.66,2911.441867


In [13]:
gdf

Unnamed: 0,Lat,Lng,Time,TimeDiffSec,VideoFile,VideoRelTime,ImagePath,geometry
0,67.227312,14.625156,2022-03-29 10:50:04.089000+00:00,0.000,,,,POINT (14.62516 67.22731)
1,67.227312,14.625156,2022-03-29 10:50:04.089000+00:00,0.000,,,,POINT (14.62516 67.22731)
2,67.227312,14.625156,2022-03-29 10:50:05.098000+00:00,1.009,,,,POINT (14.62516 67.22731)
3,67.227312,14.625156,2022-03-29 10:50:06.094000+00:00,2.005,,,,POINT (14.62516 67.22731)
4,67.227312,14.625156,2022-03-29 10:50:07.098000+00:00,3.009,,,,POINT (14.62516 67.22731)
...,...,...,...,...,...,...,...,...
3719,67.227312,14.625167,2022-03-29 11:52:02.075000+00:00,3717.986,,,,POINT (14.62517 67.22731)
3720,67.227327,14.625210,2022-03-29 11:52:39.451000+00:00,3755.362,,,,POINT (14.62521 67.22733)
3721,67.227328,14.625213,2022-03-29 11:52:40.338000+00:00,3756.249,,,,POINT (14.62521 67.22733)
3722,67.227320,14.625202,2022-03-29 11:52:41.338000+00:00,3757.249,,,,POINT (14.62520 67.22732)


In [14]:
video_data['StopTimeSec'].iloc[-1]

2911.4418670000005

In [15]:
ind_after_last_video = (gdf['TimeDiffSec'] >= video_data['StopTimeSec'].iloc[-1])

In [16]:
# Remove logged positions without video data
ind_within_video_duration = (gdf['TimeDiffSec'] <= video_data['StopTimeSec'].iloc[-1])
gdf = gdf[ind_within_video_duration]

In [17]:
gdf

Unnamed: 0,Lat,Lng,Time,TimeDiffSec,VideoFile,VideoRelTime,ImagePath,geometry
0,67.227312,14.625156,2022-03-29 10:50:04.089000+00:00,0.000,,,,POINT (14.62516 67.22731)
1,67.227312,14.625156,2022-03-29 10:50:04.089000+00:00,0.000,,,,POINT (14.62516 67.22731)
2,67.227312,14.625156,2022-03-29 10:50:05.098000+00:00,1.009,,,,POINT (14.62516 67.22731)
3,67.227312,14.625156,2022-03-29 10:50:06.094000+00:00,2.005,,,,POINT (14.62516 67.22731)
4,67.227312,14.625156,2022-03-29 10:50:07.098000+00:00,3.009,,,,POINT (14.62516 67.22731)
...,...,...,...,...,...,...,...,...
2908,67.227421,14.626092,2022-03-29 11:38:31.119000+00:00,2907.030,,,,POINT (14.62609 67.22742)
2909,67.227422,14.626088,2022-03-29 11:38:32.124000+00:00,2908.035,,,,POINT (14.62609 67.22742)
2910,67.227423,14.626085,2022-03-29 11:38:33.113000+00:00,2909.024,,,,POINT (14.62608 67.22742)
2911,67.227424,14.626081,2022-03-29 11:38:34.117000+00:00,2910.028,,,,POINT (14.62608 67.22742)


In [18]:
for ii in range(video_data.shape[0]):
    ind = (gdf['TimeDiffSec'] >= video_data['StartTimeSec'][ii]) & (gdf['TimeDiffSec'] < video_data['StopTimeSec'][ii])
    gdf.loc[ind, 'VideoFile'] = video_data['FileName'][ii]  
    gdf.loc[ind, 'VideoRelTime'] = gdf.loc[ind, 'TimeDiffSec'] - video_data['StartTimeSec'][ii]

In [19]:
gdf

Unnamed: 0,Lat,Lng,Time,TimeDiffSec,VideoFile,VideoRelTime,ImagePath,geometry
0,67.227312,14.625156,2022-03-29 10:50:04.089000+00:00,0.000,/media/mha114/MassimalDataProcessing/20210312_...,0.0,,POINT (14.62516 67.22731)
1,67.227312,14.625156,2022-03-29 10:50:04.089000+00:00,0.000,/media/mha114/MassimalDataProcessing/20210312_...,0.0,,POINT (14.62516 67.22731)
2,67.227312,14.625156,2022-03-29 10:50:05.098000+00:00,1.009,/media/mha114/MassimalDataProcessing/20210312_...,1.009,,POINT (14.62516 67.22731)
3,67.227312,14.625156,2022-03-29 10:50:06.094000+00:00,2.005,/media/mha114/MassimalDataProcessing/20210312_...,2.005,,POINT (14.62516 67.22731)
4,67.227312,14.625156,2022-03-29 10:50:07.098000+00:00,3.009,/media/mha114/MassimalDataProcessing/20210312_...,3.009,,POINT (14.62516 67.22731)
...,...,...,...,...,...,...,...,...
2908,67.227421,14.626092,2022-03-29 11:38:31.119000+00:00,2907.030,/media/mha114/MassimalDataProcessing/20210312_...,244.37,,POINT (14.62609 67.22742)
2909,67.227422,14.626088,2022-03-29 11:38:32.124000+00:00,2908.035,/media/mha114/MassimalDataProcessing/20210312_...,245.375,,POINT (14.62609 67.22742)
2910,67.227423,14.626085,2022-03-29 11:38:33.113000+00:00,2909.024,/media/mha114/MassimalDataProcessing/20210312_...,246.364,,POINT (14.62608 67.22742)
2911,67.227424,14.626081,2022-03-29 11:38:34.117000+00:00,2910.028,/media/mha114/MassimalDataProcessing/20210312_...,247.368,,POINT (14.62608 67.22742)


In [20]:
# Create a small test dataset based on every 100'th row
gdf_small = gdf.copy()
gdf_small = gdf_small.iloc[::100]

In [21]:
gdf_small

Unnamed: 0,Lat,Lng,Time,TimeDiffSec,VideoFile,VideoRelTime,ImagePath,geometry
0,67.227312,14.625156,2022-03-29 10:50:04.089000+00:00,0.0,/media/mha114/MassimalDataProcessing/20210312_...,0.0,,POINT (14.62516 67.22731)
100,67.227314,14.625155,2022-03-29 10:51:43.118000+00:00,99.029,/media/mha114/MassimalDataProcessing/20210312_...,99.029,,POINT (14.62516 67.22731)
200,67.227313,14.625155,2022-03-29 10:53:23.109000+00:00,199.02,/media/mha114/MassimalDataProcessing/20210312_...,199.02,,POINT (14.62515 67.22731)
300,67.227314,14.625155,2022-03-29 10:55:03.123000+00:00,299.034,/media/mha114/MassimalDataProcessing/20210312_...,299.034,,POINT (14.62516 67.22731)
400,67.227314,14.625155,2022-03-29 10:56:43.122000+00:00,399.033,/media/mha114/MassimalDataProcessing/20210312_...,399.033,,POINT (14.62516 67.22731)
500,67.227313,14.62505,2022-03-29 10:58:23.114000+00:00,499.025,/media/mha114/MassimalDataProcessing/20210312_...,499.025,,POINT (14.62505 67.22731)
600,67.227335,14.624601,2022-03-29 11:00:03.102000+00:00,599.013,/media/mha114/MassimalDataProcessing/20210312_...,66.481,,POINT (14.62460 67.22733)
700,67.227252,14.624851,2022-03-29 11:01:43.102000+00:00,699.013,/media/mha114/MassimalDataProcessing/20210312_...,166.481,,POINT (14.62485 67.22725)
800,67.227349,14.624688,2022-03-29 11:03:23.115000+00:00,799.026,/media/mha114/MassimalDataProcessing/20210312_...,266.494,,POINT (14.62469 67.22735)
900,67.227225,14.62488,2022-03-29 11:05:03.131000+00:00,899.042,/media/mha114/MassimalDataProcessing/20210312_...,366.51,,POINT (14.62488 67.22722)


In [61]:
def sec_to_timestring(sec):
    sec_td = datetime.timedelta(seconds = sec)
    timestring = (str(sec_td.seconds//60).zfill(2) + 'm' 
                  + str(np.mod(sec_td.seconds,60)).zfill(2) + 's' 
                  + str(sec_td.microseconds//1000).zfill(3) + 'ms')
    return timestring

In [73]:
sec_to_timestring(367.86)

'06m07s860ms'

In [65]:
# Loop over every row, create image and save image file name
for ii in range(len(gdf_small)):
    print('Processing row ' + str(ii) + ' of ' + str(len(gdf_small)))
    
    image_file_name = (Path(gdf_small.iloc[ii]['VideoFile']).stem + '_' +
                       sec_to_timestring(gdf_small.iloc[ii]['VideoRelTime']) + '.jpg')

    abs_path = Path(image_dir,image_file_name)
    rel_path = Path(abs_path.parent.name, image_file_name)
    
    gdf_small.iloc[ii, gdf_small.columns.get_loc('ImagePath')] = str(rel_path)
    
    # Extract image from video and save
    video_transect.image_from_video(gdf_small.iloc[ii]['VideoFile'],
                                   str(abs_path),
                                   gdf_small.iloc[ii]['VideoRelTime'])
    

Processing row 0 of 30
Processing row 1 of 30
Processing row 2 of 30
Processing row 3 of 30
Processing row 4 of 30
Processing row 5 of 30
Processing row 6 of 30
Processing row 7 of 30
Processing row 8 of 30
Processing row 9 of 30
Processing row 10 of 30
Processing row 11 of 30
Processing row 12 of 30
Processing row 13 of 30
Processing row 14 of 30
Processing row 15 of 30
Processing row 16 of 30
Processing row 17 of 30
Processing row 18 of 30
Processing row 19 of 30
Processing row 20 of 30
Processing row 21 of 30
Processing row 22 of 30
Processing row 23 of 30
Processing row 24 of 30
Processing row 25 of 30
Processing row 26 of 30
Processing row 27 of 30
Processing row 28 of 30
Processing row 29 of 30
