In [9]:
from pathlib import Path
from typing import List
import numpy as np
import json
import librosa

from nemo.collections.asr.parts.utils.manifest_utils import write_manifest, read_manifest
from nemo.collections.asr.parts.utils.vad_utils import load_speech_segments_from_rttm

data_root = Path("/media/data/datasets/sd_eval")
output_dir = Path("../manifests_sd_eval_40ms")
output_dir.mkdir(exist_ok=True)
prefix_src = "/home/taejinp/gdrive/datasets/riva_nemo_sd_eval_data/"
prefix_tgt = "/media/data/datasets/sd_eval/"
frame_length_secs = 0.04

def get_frame_labels(segments: List[List[float]], frame_length: float, offset: float, duration: float) -> str:
    labels = []
    n_frames = int(np.ceil(duration / frame_length))
    sid = 0
    for i in range(n_frames):
        t = offset + i * frame_length
        while sid < len(segments) - 1 and segments[sid][1] < t:
            sid += 1
        if segments[sid][0] <= t <= segments[sid][1]:
            labels.append('1')
        else:
            labels.append('0')
    return ' '.join(labels)

def change_prefix(manifest_entry):
    for key,val in manifest_entry.items():
        if isinstance(val, str) and val.startswith(prefix_src):
            manifest_entry[key] = val.replace(prefix_src, prefix_tgt)
    return manifest_entry


In [10]:
input_dir = data_root / Path("DIHARD3/domain_manifests")
input_list = list(input_dir.glob("*.json"))
print(len(input_list))
for manifest_file in input_list:
    print(manifest_file)
    manifest_data = read_manifest(str(manifest_file))
    new_manifest_data = []
    for entry in manifest_data:
        entry = change_prefix(entry)
        segments = load_speech_segments_from_rttm(entry['rttm_filepath'])
        if  entry['duration'] is None:
            entry['duration'] = librosa.get_duration(filename=entry['audio_filepath'])
        if 'offset' not in entry or entry['offset'] is None:
            entry['offset'] = 0.0
        entry['label'] = get_frame_labels(segments, frame_length_secs, entry['offset'], entry['duration'])
        new_manifest_data.append(entry)
    
    output_file = output_dir / manifest_file.name
    write_manifest(output_file, new_manifest_data)
    

22
/media/data/datasets/sd_eval/DIHARD3/domain_manifests/dh3_eval_broadcast_interview_manifest.json
/media/data/datasets/sd_eval/DIHARD3/domain_manifests/dh3_dev_broadcast_interview_manifest.json
/media/data/datasets/sd_eval/DIHARD3/domain_manifests/dh3_eval_socio_field_manifest.json
/media/data/datasets/sd_eval/DIHARD3/domain_manifests/dh3_eval_clinical_manifest.json
/media/data/datasets/sd_eval/DIHARD3/domain_manifests/dh3_dev_clinical_manifest.json
/media/data/datasets/sd_eval/DIHARD3/domain_manifests/dh3_eval_cts_manifest.json
/media/data/datasets/sd_eval/DIHARD3/domain_manifests/dh3_dev_socio_lab_manifest.json
/media/data/datasets/sd_eval/DIHARD3/domain_manifests/dh3_dev_webvideo_manifest.json
/media/data/datasets/sd_eval/DIHARD3/domain_manifests/dh3_dev_restaurant_manifest.json
/media/data/datasets/sd_eval/DIHARD3/domain_manifests/dh3_eval_maptask_manifest.json
/media/data/datasets/sd_eval/DIHARD3/domain_manifests/dh3_dev_socio_field_manifest.json
/media/data/datasets/sd_eval/DIH

In [11]:
input_dir = Path("/media/data/datasets/sd_eval/VoxConverse_v0.3/example_manifest")
input_list = list(input_dir.glob("*.json"))
print(len(input_list))
for manifest_file in input_list:
    print(manifest_file)
    manifest_data = read_manifest(str(manifest_file))
    new_manifest_data = []
    for entry in manifest_data:
        entry = change_prefix(entry)
        segments = load_speech_segments_from_rttm(entry['rttm_filepath'])
        if  entry['duration'] is None:
            entry['duration'] = librosa.get_duration(filename=entry['audio_filepath'])
        if 'offset' not in entry or entry['offset'] is None:
            entry['offset'] = 0.0
        entry['label'] = get_frame_labels(segments, frame_length_secs, entry['offset'], entry['duration'])
        new_manifest_data.append(entry)
    
    output_file = output_dir / manifest_file.name
    write_manifest(output_file, new_manifest_data)

4
/media/data/datasets/sd_eval/VoxConverse_v0.3/example_manifest/voxconv_test_full_manifest.json
/media/data/datasets/sd_eval/VoxConverse_v0.3/example_manifest/voxconv_dev_full_manifest.short3.json
/media/data/datasets/sd_eval/VoxConverse_v0.3/example_manifest/voxconv_test_full_manifest.short3.json
/media/data/datasets/sd_eval/VoxConverse_v0.3/example_manifest/voxconv_dev_full_manifest.json
