Load the audio samples: set the correct folder in the variable data_path

In [1]:
import os

import sys
sys.path.insert(0, '../src/')
import vision
import utils

from scipy.io import wavfile
import librosa
import numpy as np
import skimage.io
import wave
import cv2 as cv

In [None]:


def collect_audio_files(path):
    ret_list = []

    for (root, dirs, filenames) in os.walk(path):
        for f in filenames:
            if f.endswith('.wav'):
                ret_list.append(os.path.join(root, f))
    
    return ret_list

data_path = "/home/lorenzo/Music/tape-samples/"

datasets_paths = []
wav_dict = {}
labels = []

for (root, dirs, files) in os.walk(data_path, topdown=True):
    if len(files) != 0:
        datasets_paths.append(root + "/")

        label_name = root.split('/')[-1]
        labels.append(label_name)

print(f"{len(labels)} datasets collected")

for i in range(0, len(labels)):
    wav_dict.update({labels[i] : collect_audio_files(datasets_paths[i])})
    print(f"Collected {len(wav_dict[labels[i]])} files for label {labels[i]}")

- Save the separate channels as their own .wav file
- Load each separate channel and compute the spectrogram
- Divide the spectrum in half and save the two halfs in separate folders

In [None]:
separate_channels_path = "/home/lorenzo/Music/separate-channels-tape-samples/"
utils.create_folder(separate_channels_path) 

# settings
hop_length = 256 # number of samples per time-step in spectrogram
n_mels = 128 # number of bins in spectrogram. Height of image
time_steps = 512 # number of time-steps. Width of image (if there's sufficient samples)

right_half_path = '../right-half/'
left_half_path = '../left-half/'

utils.create_folder(right_half_path)
utils.create_folder(left_half_path)

for label in labels:
    # create subdirectories
    utils.create_folder(separate_channels_path + label + "/") 

    for sample in wav_dict[label]:
        fs, data = wavfile.read(sample)

        # for each channel:
        for i in range(0, data.shape[1]):
            # save audio of channel
            channel_filename = separate_channels_path + label + "/" + sample.split("/")[-1].split(".wav")[0] + "ch" + str(i) + ".wav"
            wavfile.write(channel_filename, fs, data[:, i])

            y, sr = librosa.load(channel_filename, offset=0.0, duration=None)

            filename = sample.split('/')[-1][0:-4] + '.png'
            img = vision.spectrogram_image(y, sr=sr, out="", hop_length=hop_length, n_mels=n_mels, save=False)

            # highlight split region and divide the image in half
            thresholded = vision.highlight_split(img)
            comparison_img = np.concatenate([img, thresholded])

            img_splits = vision.find_splits(comparison_img)

            # take the most central split as the middle (not always right, needs to change)
            if len(img_splits) != 0:
                vision.divide_half(img=comparison_img, filename=filename, middle=img_splits[len(img_splits)//2], left_path=left_half_path, right_path=right_half_path)

Extract multiple 128x256 segments from each half, in order to make a dataset which will be later divided in training, validation and test

In [3]:
left_segments = '../dataset/c/'
right_segments = '../dataset/w/'

vision.compute_segments([left_half_path, right_half_path], [left_segments, right_segments], step=64, window_size=256, overwrite=True, multiple=True, offset=64)

Substituted folder ../dataset/c/
Substituted folder ../dataset/w/


Delete temporary directories

In [3]:
os.rmdir(left_half_path)
os.rmdir(right_half_path)
os.rmdir(separate_channels_path)