# Additive noise datasets

In [3]:
from sbb_project import consts

In [16]:
# Import the data augmentation component from ASR collection
from nemo.collections.asr.parts.preprocessing import perturb, segment
import glob
import IPython.display as ipd
import torch

### Types of perturbations

In [2]:
perturb.perturbation_types

{'speed': nemo.collections.asr.parts.preprocessing.perturb.SpeedPerturbation,
 'time_stretch': nemo.collections.asr.parts.preprocessing.perturb.TimeStretchPerturbation,
 'gain': nemo.collections.asr.parts.preprocessing.perturb.GainPerturbation,
 'impulse': nemo.collections.asr.parts.preprocessing.perturb.ImpulsePerturbation,
 'shift': nemo.collections.asr.parts.preprocessing.perturb.ShiftPerturbation,
 'noise': nemo.collections.asr.parts.preprocessing.perturb.NoisePerturbation,
 'white_noise': nemo.collections.asr.parts.preprocessing.perturb.WhiteNoisePerturbation,
 'rir_noise_aug': nemo.collections.asr.parts.preprocessing.perturb.RirAndNoisePerturbation,
 'transcode_aug': nemo.collections.asr.parts.preprocessing.perturb.TranscodePerturbation,
 'random_segment': nemo.collections.asr.parts.preprocessing.perturb.RandomSegmentPerturbation}

### Obtain all audio files and define loader

In [12]:
train_dataset = consts.MANIFEST_DIR.joinpath(consts.MANIFEST_FILE.format("train"))
test_dataset = consts.MANIFEST_DIR.joinpath(consts.MANIFEST_FILE.format("test"))
val_dataset = consts.MANIFEST_DIR.joinpath(consts.MANIFEST_FILE.format("val"))

In [14]:
!head -n 1 {train_dataset}

{"audio_filepath": "/home/user/code/sbb_asr/data/sbb_exchange/all_samples/audios/9dae9654-d72f-4b0c-9212-f2dc8e58f1ad.wav", "text": "rangierfahrt von eins ins gleis eins drei antworten", "duration": 12.0}


In [17]:
audio_files = glob.glob(str(consts.SBB_DATA_EXCHANGE_AUDIO.joinpath('*.wav')))

In [None]:
noise_files = glob.glob(str(consts.NOISE_DIR.joinpath('*.wav')))

In [24]:
def load_audio(filepath, sr) -> segment.AudioSegment:
    sample_segment = segment.AudioSegment.from_file(filepath, target_sr=sr)
    return sample_segment

In [53]:
sample_segment = load_audio(audio_files[0], sr = 16000)
ipd.Audio(sample_segment.samples, rate = 16000)

## White Noise

In [71]:
sample_segment_whitenoise = load_audio(audio_files[0], sr = 16000)

In [72]:
white_noise = perturb.WhiteNoisePerturbation(min_level=-80, max_level=-40)

In [73]:
white_noise.perturb(sample_segment_whitenoise)

In [74]:
ipd.Audio(sample_segment_whitenoise.samples, rate=16000)

## Shift Perturbation

In [78]:
sample_segment_shift = load_audio(audio_files[0], sr = 16000)

In [79]:
shift = perturb.ShiftPerturbation(min_shift_ms=250.0, max_shift_ms=500.0)

In [80]:
shift.perturb(sample_segment_shift)

In [81]:
ipd.Audio(sample_segment_shift.samples, rate=16000)

## Real World Noise Perturbation

In [84]:
noise_manifest = consts.MANIFEST_DIR.joinpath(consts.NOISE_MANIFEST_FILE)

AttributeError: module 'sbb_project.consts' has no attribute 'NOISE_MANIFEST_FILE'

In [83]:
def write_noise_manifest(filepath, manifest_file, duration_max=None, duration_stride=1.0, filter_long=True, duration_limit=15.0):
    if duration_max is None:
        duration_max = 1e9
                
    with open(manifest_file, 'w') as fout:
        
        try:
            x, _sr = librosa.load(filepath)
            duration = librosa.get_duration(x, sr=_sr)

        except Exception:
            print(f"\n>>>>>>>>> WARNING: Librosa failed to load file {filepath}. Skipping this file !\n")
            return

        if filter_long and duration > duration_limit:
            print(f"Skipping sound sample {filepath}, exceeds duration limit of {duration_limit}")
            return

        offsets = []
        durations = []

        if duration > duration_max:
            current_offset = 0.0

            while current_offset < duration:
                difference = duration - current_offset
                segment_duration = min(duration_max, difference)

                offsets.append(current_offset)
                durations.append(segment_duration)

                current_offset += duration_stride

        else:
            offsets.append(0.0)
            durations.append(duration)


        for duration, offset in zip(durations, offsets):
            metadata = {
                'audio_filepath': filepath,
                'duration': duration,
                'label': 'noise',
                'text': '_',  # for compatibility with ASRAudioText collection
                'offset': offset,
            }

            json.dump(metadata, fout)
            fout.write('\n')
            fout.flush()

        print(f"Wrote {len(durations)} segments for filename {filename}")
            
    print("Finished preparing manifest !")

In [None]:
write_noise_manifest(noise_files, noise_manifest)