#### Author: Thejasvi Beleyur
#### Last updated: 24-01-2020

This notebook will detail all the steps required to get matching audio snippets for the annotations of bat flights made for 2018-08-17 between 03:00-04:00 hours. 

### Steps carried out before hand:
1. Generating the video sync for the video file *OrlovaChukaDome_01_20180817_03.00.00-04.00.00[R][@1e7f][2].avi* with frame-wise light intensities and time stamps

2. Video annotations of the horseshoebats flying done by Neetash MR and Aditya Kanjoor. 


In [1]:
import matplotlib.pyplot as plt
import pandas as pd 
from tqdm import tqdm

package_folder = '/home/tbeleyur/Documents/packages_dev/match_audio_to_video/bin/'
import os
import sys 
sys.path.append(package_folder) # include the modules in the outer folder in the search path
from process_video_annotations import video_sync_over_annotation_block



In [2]:
%load_ext line_profiler

In [3]:
# loading the annotations
annotations = pd.read_csv('annotations/3am_4am - 3am_4am.csv')

In [4]:
annotations.head()

Unnamed: 0,video_path,annotation_id,old_annotation_id,Sampling minute,timestamp from camera,start_timestamp,start_framenumber,end_timestamp,end_framenumber,no. of flying bats,bat behaviour,strength (1 - strong,0- weak),weak in (Behaviour Be,Bat number Bn),comments,Details on the comment,Additional_details
0,F:\analysis_horseshoebat\videoanalysis\16.08.2...,2018-08-17_34_1,34_1,56,2,2018-08-17 03:56:00,15,2018-08-17 03:56:00,18,1,,1,,,,,,
1,F:\analysis_horseshoebat\videoanalysis\16.08.2...,2018-08-17_34_2,34_2,56,2,2018-08-17 03:56:00,1,2018-08-17 03:56:00,21,1,,1,,,,,,
2,F:\analysis_horseshoebat\videoanalysis\16.08.2...,2018-08-17_34_3,34_3,56,2,2018-08-17 03:57:00,19,2018-08-17 03:57:00,8,1,,1,,,,,,
3,F:\analysis_horseshoebat\videoanalysis\16.08.2...,2018-08-17_34_4,34_4,56,2,2018-08-17 03:57:00,11,2018-08-17 03:57:00,1,1,,1,,,,,,
4,F:\analysis_horseshoebat\videoanalysis\16.08.2...,2018-08-17_34_5,34_5,56,2,2018-08-17 03:56:00,1,2018-08-17 03:56:00,1,0,,1,,,,,,


In [5]:
annots_w_bats = annotations[annotations['no. of flying bats']>0].reset_index(drop=True)

### Carrying out the common-fps conversion:


# load the video sync file with corrected timestamps
video_sync_file = 'videosync_OrlovaChukaDome_01_20180817_03.00.00-04.00.00[R][@1e7f][2].avi_[corrected_timestamps].csv'
path_to_videosync = os.path.join('/home/tbeleyur/Documents/packages_dev/match_audio_to_video/experimental_testdata/horseshoebat_data/whole_data_analysis/raw_sync/2018-08-16',
                                 video_sync_file)
video_sync = pd.read_csv(path_to_videosync)

kwargs = {'timestamp_pattern': '%Y-%m-%d %H:%M:%S'}
kwargs['min_fps']= 21 # Hz
kwargs['min_durn'] = 75.0 # seconds 
kwargs['common_fps'] = 25 # Hz
kwargs['output_folder'] = './common_fps/2018-08-16/'

unprocessed_annotations = []

for i, each_annotation in tqdm(annots_w_bats.iterrows()):
    try:
        success = video_sync_over_annotation_block(each_annotation,video_sync, 
                                                     **kwargs)
    except :
        unprocessed_annotations.append(i)
        print('Unable to process annotation: %s'%annots_w_bats['annotation_id'][i])


len(unprocessed_annotations)

Cause of the unprocessed annotations: A quick check showed me that all the unprocessed annotations were caused only by annotations with odd timestamps - with the end timestamp before the starting or with both and start and end timestamps being the same. 

### Matching Audio from the common_fps files

In [6]:
### Finding the matching audio snippet !! 
import glob 
import soundfile as sf
from audio_for_videoannotation import match_video_sync_to_audio, search_for_best_fit

#all_commonfps = glob.glob('common_fps_video_sync*') # get all the relevant common_fps_sync files
all_commonfps = glob.glob('common_fps/2018-08-16/common_fps*')
audio_folder = '/media/tbeleyur/THEJASVI_DATA_BACKUP_3/fieldwork_2018_002/horseshoe_bat/audio_non_spikey/2018-08-16/' # the current folder

# the output folders
audiosync_folder = './sync_audio/'
audioannotation_folder = './annotation_audio/'
fs = 250000 # change according to the recording sampling rate in Hz!! 


In [7]:
all_ccs = []
files_to_run = sorted(all_commonfps)
def generate_matching_audio(each_commonfps):
    '''
    '''
    print(each_commonfps)
    video_sync = pd.read_csv(each_commonfps)
    best_audio, syncblock_audio, crosscoef = match_video_sync_to_audio(video_sync, audio_folder, 
                                                                       audio_fileformat='*.WAV',
                                           audio_sync_spikey=False,
                                           file_subset=('non_spikey_T0000550','non_spikey_T0000629'))
    all_ccs.append(crosscoef)
    fname  = os.path.split(each_commonfps)[-1]
    annotation_id = '-'.join(os.path.split(fname)[-1].split('common_fps_video_sync')[-1:])[:-4]

    try:
        audio_file_base_name = annotation_id+'.WAV'
        sf.write(audiosync_folder+'matching_sync_'+audio_file_base_name, syncblock_audio,fs)
        sf.write(audioannotation_folder+'matching_annotaudio_'+audio_file_base_name, best_audio,fs)
        return(True)
    except:
        print('Could not save ', common_fps_file)
        return(False)



In [8]:
each_commonfps = files_to_run[0]
video_sync = pd.read_csv(each_commonfps)

In [None]:

%lprun -f search_for_best_fit  match_video_sync_to_audio(video_sync, audio_folder, audio_fileformat='*.WAV', 
                          audio_sync_spikey=False, file_subset=('non_spikey_T0000550','non_spikey_T0000552'))

In [9]:
%%timeit
match_video_sync_to_audio(video_sync, audio_folder, audio_fileformat='*.WAV', 
                          audio_sync_spikey=False, file_subset=('non_spikey_T0000550','non_spikey_T0000552'))

('video_fps obtained is :', 25)
Did not find user-provided sample rate - getting it from first file that matches format!
('sampling rate is : ', 250000)
.....finding best audio segment.....
Unable to get proper audio match for video segment!


TypeError: too many arguments: expected 3, got 5

success = map(generate_matching_audio, files_to_run)