**Notebook to extract frames from video**

**Import libraries**

In [None]:
import os
import cv2 
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import glob
from pathlib import Path
from shutil import copyfile
from shutil import rmtree

**Define paths to find video and save files**

In [None]:
# Because this was on an external drive and the top directories 
# are different on different platforms (linux vs windows) I put
# the top level of the path to the videos in a separate variable
PLATFORM_HEAD = '/mnt/N-SeaVids/'

# This is the main part of the path to where the videos are
DEST_PLATFORM_HEAD = '/media/hdd/cmccaig/images/N-Sea'

# this is the section to the actual videos
PATH_HEAD = 'DATA201902/Project 1/IC2'


extractedParentFolder = '/media/hdd/cmccaig/images/N-Sea/combinedAnnotation/'

**Define which observation codes are of interest**

In [None]:
relEvs = ('FJ','FJD','AN','SUS','SUE','EXS','EXE')

**Template to create file/event label**

In [None]:
filename_format='Location-{}-EventID-{}-Type-{}-Timestamp-{}.png'

**Extract frames for each event, numS secs either side of recorded time**

In [None]:
numS = 3

**Functions**

In [None]:
######################################
# fn to parse timestamp
######################################
def parseTimestamp(ts):
    return datetime.strptime(ts,'%Y%m%d%H%M%S%f')

######################################
# fn to create timestamp
######################################
def createTimestamp(dt):
    ts = str(dt.year)
    ts = ts+"{0:0=2d}".format(dt.month)
    ts = ts+"{0:0=2d}".format(dt.day)
    ts = ts+"{0:0=2d}".format(dt.hour)
    ts = ts+"{0:0=2d}".format(dt.minute)
    ts = ts+"{0:0=2d}".format(dt.second)
    ts = ts+"{0:0=3d}".format(int(dt.microsecond/1000))

    return ts
    
######################################
# fn to mkdir and handle exceptions
######################################
def mkdirExcept(dirName):
    try:
        # Create target Directory
        os.mkdir(dirName)
        print('Folder ',dirName,' created')
    except FileExistsError:
        print('Folder ',dirName,' already exists')
        return
    
######################################
# Function to extract centre frames 
######################################

def FrameCaptureCentre(path,foldTS): 
    
    #step = 1

    centreFrameTimestamps = []
    
    centreVidPath = glob.glob(str(path) + '/**/*Ch2.mpg', recursive=True)[0]
    #centreVidPath = Path(centreVidPath)
    centreVidTimestamp = centreVidPath[-31:-14]

    escapedPath = str(path.relative_to(PLATFORM_HEAD)).replace('/', ':')
    
    folderName = Path(extractedParentFolder+'extractedFiles_'+foldTS)
    
    mkdirExcept(str(folderName))
    mkdirExcept(str(folderName)+'/centreFrames/')
    
    #######################################################
    # Centre video
    #######################################################
    
    # Path to video file 
    vidObj = cv2.VideoCapture(centreVidPath)
    
    # Used as counter variable 
    count = 0
  
    # checks whether frames were extracted 
    success = 1
    
    time = parseTimestamp(centreVidTimestamp)
  
    #while success and (count < 10001): 
    while success: # REINSTATE THIS TO GET ALL FRAMES
        
        if((np.floor(count/10000))==(count/10000)):
            print('CENTRE:\tcount = ',count,'\tTS = ',createTimestamp(time))
        
        # vidObj object calls read 
        # function extract frames 
        success, image = vidObj.read() 
        

        # Saves the frames with frame-count 
        printTS = int(createTimestamp(time))    
    
        cv2.imwrite(str(folderName)+'/centreFrames/'+escapedPath+'-'+str(printTS)+'.png',image)
        centreFrameTimestamps.append(printTS)
                
        time = time+timedelta(seconds = 1/25)
  
        count += 1
        
    return(centreFrameTimestamps)

######################################
# Not sure what this fuction is for! 
# Not sure it's needed but I don't 
# have the data so I can't test it
######################################

def list_dir(self, pattern="*"):
    return [Path(x) for x in glob.glob(str(self/pattern))]
Path.ls = list_dir


**Main body of code**

**Use glob to identify Events.csv files**

In [None]:
PATH = Path(PLATFORM_HEAD) / PATH_HEAD
print('PATH: ',PATH)
evFiles = glob.glob(str(PATH) + '/**/Events.csv', recursive=True)
print('evFiles: ',evFiles[:5])

**If any of evFiles are empty remove from list & sort remainder & create list of event file lengths**

In [None]:
rem = []
evFileLengths = []
evFiles = np.sort(evFiles).tolist()
print('evFiles1: ',evFiles)
for i in range(len(evFiles)):
    events = pd.read_csv(evFiles[i])
    
    events = events[events['Observation Code'].isin(relEvs)]

    if(events.shape[0]==0):
        rem.append(i)
    else:
        evFileLengths.append(events.shape[0])

print('len(evFiles): ',len(evFiles))

for index in sorted(rem, reverse=True):
    del evFiles[index]

In [None]:
if __name__ == '__main__': 
    
    # for each events file
    for i in range(len(evFiles)):
        print('evFiles: ',i,' / ',len(evFiles))
        parentPath = Path(evFiles[i]).parent
        foldTS = str(parentPath)[-17:]
        
        # get array of events
        events = pd.read_csv(evFiles[i])
        events = events[events['Observation Code'].isin(relEvs)]
        events = events.reset_index()
        events = events.drop('index',axis=1)
        times = events.VWTimestamp
        
        ### CENTRE FILE
        # Calling the function to extract the centre images
        centreTimestamps = FrameCaptureCentre(parentPath,foldTS)
                
        # for each event in the file
        for r in range(events.shape[0]):
            print('Event: ',r,' / ',events.shape[0])
            curTS = times[r]
            timeBefore = curTS-(2*numS*1000)
            timeAfter = curTS+(numS*1000)

            frameIndices = [i for i, x in enumerate(centreTimestamps) if ((x > timeBefore) and (x < timeAfter))]
            
            ###################################################
            folderName = Path(extractedParentFolder+'extractedFiles_'+foldTS)
            
            centreFolder = Path(str(folderName)+'/centreFrames/')
                        
            escapedPath = str(parentPath.relative_to(PLATFORM_HEAD)).replace('/', ':')

            destFold = Path(extractedParentFolder) / ('extractedFiles_'+foldTS) / ('extractedFiles_'+str(curTS)+'-ObsCode_'+events['Observation Code'][r])
            mkdirExcept(destFold)

            for t in range(len(frameIndices)):
                srcFile = centreFolder / (escapedPath+'-'+str(centreTimestamps[frameIndices[t]])+'.png')

                fname = filename_format.format(parentPath.relative_to(PLATFORM_HEAD),curTS,events['Observation Code'][r],str(centreTimestamps[frameIndices[t]])).replace('/', ':')

                destFile = destFold / fname
                           
                copyfile(srcFile, destFile)
                            
        
        rmtree(centreFolder)
                