# Dataset creation for sound localisation



In [9]:
from data.utils import *
from audio_utils import filter_voice, get_fft_gram, get_fbanks_gcc
import pandas as pd

# Parameters for the creation of the dataset
INPUT_LENGTH = 1000
INPUT_TYPE = "gammagram"
CSV_DATA = "./sound_angles.csv"
CHUNCK_OVERLAP = 150
OUTPUT_DIR = "dataset_"+ str(INPUT_LENGTH)
RESAMPLING_F = 0
THRESHOLD_VOICE = 60
SAVE_RAW = False

if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)
    os.makedirs(os.path.join(OUTPUT_DIR, "data"))
    os.makedirs(os.path.join(OUTPUT_DIR, "raw"))



## Process the audio files and create chuncks of audio

In [10]:
def get_feature(signal, fs):
    if INPUT_TYPE == "gammagram":
        return get_fft_gram(signal, fs, time_window=0.015, channels=128, freq_min=120)
    elif INPUT_TYPE == "fbank_gcc":
        signal = np.transpose(signal)
        return get_fbanks_gcc(signal, fs,  win_size=1024, hop_size=512, nfbank=50)

data_csv = pd.read_csv(CSV_DATA)
new_df = pd.DataFrame()

file_cpt = 0
for index, item in tqdm(data_csv.iterrows(), total=data_csv.shape[0]):
    audio_filename = item['audio_filename']
    end_audio = item['stop_audio_timestamp'] - item['start_audio_timestamp']
    sample_rate, chunks_channel1, chunks_channel2 = split_audio_chunks(audio_filename, end_audio,
                                                                       size_chunks=INPUT_LENGTH,
                                                                       overlap=CHUNCK_OVERLAP)

    for j, (signal1, signal2) in enumerate(zip(chunks_channel1, chunks_channel2)):

        signal1 = librosa.util.normalize(signal1 / 32768.0)
        signal2 = librosa.util.normalize(signal2 / 32768.0)

        if filter_voice(signal1, sample_rate, threshold=THRESHOLD_VOICE):
            filename = str(file_cpt) + '.npy'

            if RESAMPLING_F:
                    signal1 = np.array(scipy.signal.resample(signal1, RESAMPLING_F))
                    signal2 = np.array(scipy.signal.resample(signal2, RESAMPLING_F))
                    sample_rate = RESAMPLING_F

            data = np.stack((signal1, signal2), axis=1)
            feat = get_feature(data, sample_rate)

            np.save(os.path.join(OUTPUT_DIR, "data",filename), feat)
            if SAVE_RAW:
                filename_raw = str(file_cpt) + '.wav'
                scipy.io.wavfile.write(os.path.join(OUTPUT_DIR, "raw",filename), sample_rate, data)

            new_df = new_df.append(item, ignore_index=True)
            new_df.at[file_cpt, 'audio_filename'] = filename
            file_cpt += 1

new_df.to_csv(os.path.join(OUTPUT_DIR, "dataset.csv"), index=False)

print(f"Created {file_cpt} samples")

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1186.0), HTML(value='')))


Created 30820 samples


## Process the audio files and create chuncks of audio background



In [11]:
background_file = "/home/icub/Documents/Jonas/dataset/data/background/mix_bg.wav"

df = pd.DataFrame(columns=['audio_filename', 'labels'])

sample_rate, chunks_channel1, chunks_channel2 = split_audio_chunks(background_file, None,
                                                                   size_chunks=INPUT_LENGTH,
                                                                   overlap=INPUT_LENGTH)
cpt = 0
for (audio_c1, audio_c2) in tqdm(zip(chunks_channel1, chunks_channel2), total=len(chunks_channel2)):
    filename = f"{cpt}_bg.npy"

    audio_c1 = librosa.util.normalize(audio_c1 / 32768.0)
    audio_c2 = librosa.util.normalize(audio_c2 / 32768.0)

    if RESAMPLING_F:
        audio_c1 = np.array(scipy.signal.resample(audio_c1, RESAMPLING_F))
        audio_c2 = np.array(scipy.signal.resample(audio_c2, RESAMPLING_F))
        sample_rate = RESAMPLING_F

    data = np.stack((audio_c1, audio_c2), axis=1)
    feat = get_feature(data, sample_rate)
    np.save(os.path.join(OUTPUT_DIR, "data",filename), feat)
    cpt += 1
    df = df.append({"audio_filename": filename, "labels": -1}, ignore_index=True)


df.to_csv(os.path.join(OUTPUT_DIR, f"background.csv"), index=False)
print(f"Created {cpt} background samples")

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=2439.0), HTML(value='')))


Created 2439 background samples
