In [1]:
import os

import numpy as np
import pandas as pd
from scipy.io.wavfile import write as wavwrite, read as wavread
from lared_dataset.constants import (processed_audio_path)

In [2]:
denoised_audio_path = os.path.join(processed_audio_path, 'denoised')
redacted_audio_path = os.path.join(processed_audio_path, 'redacted')
diarizations_path = os.path.join(processed_audio_path, 'diarization')
vad_path = os.path.join(processed_audio_path, 'vad')

In [3]:
def load_diarization(fpath):
    return pd.read_csv(fpath, 
        header=None, 
        names=['x', 'y', 'z', 'ini', 'dur', 'n1', 'n2', 'speaker', 'n3', 'n4'], 
        usecols=['ini', 'dur','speaker'],
        delim_whitespace=True, 
        index_col=False)

In [4]:
d = load_diarization(os.path.join(diarizations_path, '7.rttm'))
row = d.iloc[7,:]

In [5]:
row['speaker'].split('_')[1]

'1'

In [6]:
d.head()

Unnamed: 0,ini,dur,speaker
0,786.38,0.67,speaker_1
1,787.42,0.91,speaker_1
2,790.3,0.59,speaker_3
3,792.86,1.47,speaker_1
4,796.38,1.07,speaker_1


In [15]:
main_speakers = {
    1: [0],
    2: [0],
    3: [0],
    4: [0],
    5: [0],
    7: [1,3],
    9: [0],
    10: [1],
    11: [0],
    12: [0,1],
    13: [1],
    14: [1],
    15: [0],
    16: [0],
    17: [0],
    18: [0], # fail: two women with same voice
    19: [0],
    20: [0],
    21: [1],
    22: [0],
    23: [1],
    24: [0],
    25: [1],
    26: [0],
    27: [2],
    29: [1,3], # check
    30: [0,3], # check
    31: [0],
    32: [0],
    33: [0],
    34: [0],
    35: [1],
    45: [0]
}

# Produce VAD

In [16]:
import numpy as np
from scipy.io.wavfile import write as wavwrite

In [17]:
def make_vad(df: pd.DataFrame, pid, size=9900,  fs=100):
    ''' len is in seconds
    '''

    vad = np.zeros((size*fs))
    for idx, row in df.iterrows():
        spk = int(row['speaker'].split('_')[1])
        if pid in main_speakers and spk not in main_speakers[pid]:
            continue
        
        ini = round(row['ini'] * fs)
        end = round((row['ini'] + row['dur']) * fs)
        
        vad[ini:end] = 1

    return vad

In [18]:
def store_vad(df: pd.DataFrame, pid, fname, size=9900, fs=100):
    vad = make_vad(df, pid, size=size, fs=fs)
    # write .vad file
    np.savetxt(fname + '.vad', vad, fmt='%d')
    # write .wav file
    wavwrite(fname + '.wav', fs, vad)


In [19]:
from pathlib import Path
for f in Path(diarizations_path).glob('*.rttm'):
    df = load_diarization(f)
    pid = int(f.stem)
    out_path = os.path.join(vad_path, f.stem)
    store_vad(df, pid, out_path)

# Redact audio

In [12]:
def redact_audio(audio, df, pid, fs=8000):
    ''' len is in seconds
    '''

    new_audio = np.zeros(audio.shape)
    for idx, row in df.iterrows():
        spk = int(row['speaker'].split('_')[1])
        if pid in main_speakers and spk not in main_speakers[pid]:
            continue
        
        ini = round(row['ini'] * fs)
        end = round((row['ini'] + row['dur']) * fs)
        
        new_audio[ini:end] = audio[ini:end]

    return new_audio

In [13]:
from pathlib import Path
for f in Path(diarizations_path).glob('*.rttm'):
    df = load_diarization(f)
    pid = int(f.stem)
    
    audio_path = os.path.join(denoised_audio_path, f'{pid}.wav')
    out_path = os.path.join(redacted_audio_path, f'{pid}.wav')
    
    _, audio = wavread(audio_path)
    audio = redact_audio(audio, df, pid)
    wavwrite(out_path, 8000, audio)