In [1]:
%load_ext autoreload
%autoreload 2

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))


In [2]:
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import h5py
from pathlib import Path
import librosa
import librosa.display
from moviepy.editor import AudioFileClip,VideoFileClip,CompositeVideoClip, VideoClip
from moviepy.video.io.bindings import mplfig_to_npimage
from itertools import cycle

import sys
sys.path.append('../')

import project_paths
import project_params
from data import BatchRawDataset

%matplotlib notebook

# Load TAU2021 validation dataset

In [3]:
dataset = BatchRawDataset(index_path=project_paths.index_folder.joinpath('TAU2021.json'),
                                  folds=[0],
                                  root=project_paths.tauuavs2021_root, sr=project_params.sr,
                                  batch_size=project_params.batch_size, in_dur=10,
                                  num_regions=project_params.num_regions, labels_period=project_params.labels_period,
                                  fov=project_params.fov)

In [4]:
dataset_iter = iter(dataset)

# Load batch

In [5]:
batch_dict, batch_gt = next(dataset_iter)

# Visualize gt

In [6]:
batch_idx = 25

uid = batch_dict['uid'][batch_idx]

start = np.nanmin(batch_dict['in_time'][batch_idx])
end = np.nanmax(batch_dict['in_time'][batch_idx])

file_index = dataset.index[uid]
file_gt = batch_gt[batch_idx]
print(f'Visualizing prediction for uid: {batch_dict["uid"][batch_idx]}')
print(f'start: {start:.1f} - end: {end:.1f}')

video_root = Path('/nas/shared/dataset/public/tau-urban-audio-visual-scenes-2021/development/video/')
audio_root = Path('/nas/shared/dataset/public/tau-urban-audio-visual-scenes-2021/')

audio_path = audio_root.joinpath(file_index['path'])
video_path = video_root.joinpath(Path(file_index['path']).stem + '.mp4')

# Load audio and display ground truth
file_audio, file_sr = librosa.load(audio_path,sr=None,mono=True,offset=start,duration=end-start)

fig, axs = plt.subplots(2,1,figsize=(9,6),sharex='all')

# Plot wave and spectrogram
plt.sca(axs[0])
librosa.display.waveshow(file_audio,sr=file_sr)
plt.grid()

plt.sca(axs[1])
file_audio_stft = librosa.stft(file_audio)
file_audio_specgram = librosa.amplitude_to_db(np.abs(file_audio_stft), ref=np.max)
librosa.display.specshow(file_audio_specgram,sr=file_sr,y_axis='mel',x_axis='time')

# Add ground truth
bar_height = 0.4
bar_bottom_it = cycle(2**np.arange(8,15,0.5))

plt.sca(axs[1])
for event in file_index['events']:
    if 'time' in event:
        ev_start = event['time'][0]
        ev_end = event['time'][-1] + project_params.labels_period
    else:
        ev_start = event['start']
        ev_end = event['end']
    if ev_start < end and ev_end > start:
        ctr = (ev_start+ev_end)/2
        width = ev_end-ev_start
        bottom = next(bar_bottom_it)
        height = bar_height*bottom
        plt.bar(x=ctr,width=width,bottom=bottom,height=height,alpha=0.5)
        plt.text(ev_start+0.05,bottom+height/2,f'({event["source"][:1].upper()}) {event["label"]}',color='w',va='center')

Visualizing prediction for uid: 392
start: 0.0 - end: 10.0


<IPython.core.display.Javascript object>

In [7]:
angles = np.linspace(-dataset.fov/2,dataset.fov/2,dataset.num_regions)
classes = dataset.classes

# Load video and display a static frame with predictions
file_video = VideoFileClip(str(video_path)).set_start(start).set_end(end)

fig,axs = plt.subplots(1,2,figsize=(12,3))

def make_frame(frame_time:float):

    frame_time += start
    win_out_time = batch_dict['out_time'][batch_idx]
    win_out_time = win_out_time[~np.isnan(win_out_time)]
    gt_time_idx = np.argmin(np.abs(frame_time-win_out_time))


    axs[0].clear()
    axs[0].imshow(file_video.get_frame(frame_time))

    axs[1].clear()
    axs[1].imshow(np.fliplr(file_gt[gt_time_idx]),vmin=0,vmax=1,cmap='magma')
    axs[1].axis('auto')
    axs[1].set_yticks(np.arange(len(classes)))
    axs[1].set_yticklabels(classes)
    axs[1].set_xticks(np.arange(len(angles)))
    axs[1].set_xticklabels([ f'{a:.0f}' for a in angles[::-1] ])
    axs[1].set_xlabel('Azimuth [deg]')

    plt.tight_layout()

    return mplfig_to_npimage(fig)

# Render and display
file_audio_clip = AudioFileClip(str(audio_path)).set_start(start).set_end(end)
file_video_pred = VideoClip(make_frame, duration=file_video.duration).set_audio(file_audio_clip)
file_video_pred.ipython_display(width=960,fps=2)

<IPython.core.display.Javascript object>

Moviepy - Building video __temp__.mp4.
MoviePy - Writing audio in __temp__TEMP_MPY_wvf_snd.mp3


                                                                                                          

MoviePy - Done.
Moviepy - Writing video __temp__.mp4



                                                                                                          

Moviepy - Done !
Moviepy - video ready __temp__.mp4


