In [12]:
# https://www.kaggle.com/masoudmzb/gradient-tape-tutorial-audio-proccesing-example

In [1]:
import tensorflow as tf
import os

from os.path import isfile, join
import numpy as np
import shutil
from tensorflow import keras
from pathlib import Path

from IPython.display import display, Audio
import subprocess

In [2]:
# Our variables are defined here

In [None]:
# Down load data from kaggle

In [7]:
DATASET_ROOT = "audio_train/16000_pcm_speeches"

my_audios_folder = 'audio'
my_noises_folder = 'noise'

DATASET_AUDIOS_PATH = os.path.join(DATASET_ROOT, my_audios_folder)
DATASET_NOISES_PATH = os.path.join(DATASET_ROOT, my_noises_folder)

# percentage of samples to use for validation
valid_split = 0.1

# Seed to use when shuffling the dataset and the noise
shuffle_seed = 43

# The sampling rate to use.
# This is the one used in all of the audio samples.
# We will resample all of the noise to this sampling rate.
# This will also be the output size of the audio wave samples
# (since all samples are of 1 second long)
SAMPLING_RATE = 16000

# The factor to multiply the noise with according to:
#   noisy_sample = sample + noise * prop * scale
#      where prop = sample_amplitude / noise_amplitude
SCALE = 0.5

BATCH_SIZE = 128
EPOCHS = 30

In [5]:
#    Structure of data folders :
#    
#    main_directory/
#    ...speaker_a/
#    ...speaker_b/
#    ...speaker_c/
#    ...speaker_d/
#    ...speaker_e/
#    ...other/
#    ..._background_noise_/
#    After sorting, we end up with the following structure:
#    
#    main_directory/
#    ...audio/
#    ......speaker_a/
#    ......speaker_b/
#    ......speaker_c/
#    ......speaker_d/
#    ......speaker_e/
#    ...noise/
#    ......other/
#    ......_background_noise_/

In [8]:
# If folder `audio`, does not exist, create it, otherwise do nothing
if os.path.exists(DATASET_AUDIOS_PATH) is False:
    os.makedirs(DATASET_AUDIOS_PATH)

# If folder `noise`, does not exist, create it, otherwise do nothing
# MY OWN NOTE : LET's SEE tf.io.gfile
if tf.io.gfile.exists(DATASET_NOISES_PATH) is False:
    tf.io.gfile.makedirs(DATASET_NOISES_PATH)

for folder in os.listdir(DATASET_ROOT):
    if os.path.isdir(os.path.join(DATASET_ROOT, folder)):
        if folder in [my_audios_folder, my_noises_folder]:
            # If folder is `audio` or `noise`, do nothing
            continue
        elif folder in ["other", "_background_noise_"]:
            # If folder is one of the folders that contains noise samples,
            # move it to the `noise` folder
            shutil.move(
                os.path.join(DATASET_ROOT, folder),
                os.path.join(DATASET_NOISES_PATH, folder),
            )
        else:
            # Otherwise, it should be a speaker folder, then move it to
            # `audio` folder
            shutil.move(
                os.path.join(DATASET_ROOT, folder),
                os.path.join(DATASET_AUDIOS_PATH, folder),
            )

In [9]:
# Get the list of all noise files
noise_paths = []
for subdir in tf.io.gfile.listdir(DATASET_NOISES_PATH):
    subdir_path = Path(DATASET_NOISES_PATH) / subdir
    if os.path.isdir(subdir_path):
        noise_paths += [
            os.path.join(subdir_path, filepath)
            for filepath in os.listdir(subdir_path)
            if filepath.endswith(".wav")
        ]

print(
    "Found {} files belonging to {} directories".format(
        len(noise_paths), len(os.listdir(DATASET_NOISES_PATH))
    )
)

Found 6 files belonging to 2 directories


In [10]:
command = (
    "for dir in `ls -1 " + DATASET_NOISES_PATH + "`; do "
    "for file in `ls -1 " + DATASET_NOISES_PATH + "/$dir/*.wav`; do "
    "sample_rate=`ffprobe -hide_banner -loglevel panic -show_streams "
    "$file | grep sample_rate | cut -f2 -d=`; "
    "if [ $sample_rate -ne 16000 ]; then "
    "ffmpeg -hide_banner -loglevel panic -y "
    "-i $file -ar 16000 temp.wav; "
    "mv temp.wav $file; "
    "fi; done; done"
)

os.system(command)

1

In [11]:
# Split noise into chunks of 16000 each
def load_noise_sample(path):
    sample, sampling_rate = tf.audio.decode_wav(
        tf.io.read_file(path), desired_channels=1
    )
    if sampling_rate == SAMPLING_RATE:
        # Number of slices of 16000 each that can be generated from the noise sample
        slices = int(sample.shape[0] / SAMPLING_RATE)
        sample = tf.split(sample[: slices * SAMPLING_RATE], slices)
        return sample
    else:
        print("Sampling rate for {} is incorrect. Ignoring it".format(path))
        return None


noises = []
for path in noise_paths:
    sample = load_noise_sample(path)
    if sample:
        noises.extend(sample)
noises = tf.stack(noises)

print(
    "{} noise files were split into {} noise samples where each is {} sec. long".format(
        len(noise_paths), noises.shape[0], noises.shape[1] // SAMPLING_RATE
    )
)

Sampling rate for audio_train\16000_pcm_speeches\noise\other\exercise_bike.wav is incorrect. Ignoring it
Sampling rate for audio_train\16000_pcm_speeches\noise\other\pink_noise.wav is incorrect. Ignoring it
Sampling rate for audio_train\16000_pcm_speeches\noise\_background_noise_\10convert.com_Audience-Claps_daSG5fwdA7o.wav is incorrect. Ignoring it
Sampling rate for audio_train\16000_pcm_speeches\noise\_background_noise_\doing_the_dishes.wav is incorrect. Ignoring it
Sampling rate for audio_train\16000_pcm_speeches\noise\_background_noise_\dude_miaowing.wav is incorrect. Ignoring it
Sampling rate for audio_train\16000_pcm_speeches\noise\_background_noise_\running_tap.wav is incorrect. Ignoring it


IndexError: list index out of range