# This is a notebook to create my sample dataset

In [33]:
import scipy
import librosa
import os
import numpy as np
import glob2
import joblib

In [34]:
def transform_sample_tf(sample_path, tf_path, target_sr=16000):
    """Takes a sample that is not ready for use in the model, 
        and transforms it to a sample that the model can use.
        @param sample_path - the path to the sample, a String
        @param tf_sample_path - the directory to save the information of the sample
        @returns: a path to the transformed sample"""
    signal, _ = librosa.core.load(sample_path, sr=target_sr, dtype=np.float32)
    # take only the first channel
    if len(signal.shape) > 1:
        signal = signal[:,0]
    # calculate the real part of the stft and the complex part of the stft
    tf = librosa.core.stft(signal)
    real = np.real(tf)
    imag = np.imag(tf)
    # write it to the file, seperate it to real and imag parts
    real_p = os.path.join(tf_path, 'real_tfs')
    imag_p = os.path.join(tf_path, 'imag_tfs')
    joblib.dump(real, real_p, compress=0)
    joblib.dump(imag, imag_p, compress=0)
    return tf_path


In [35]:
unprocessed_path = "samples"
target_path = os.path.join('tf_samples', 'my_dataset', 'train')

In [36]:
def sep():
    sep = '/'
    windows = True
    if windows:
        sep = '\\'
    return sep

def extract_name(path):
    # a file with the path - './.../folder/<name>.wav', returs <name>
    file_name = path.split(sep())[-1]
    return file_name.split('.')[0]

# test:
extract_name(".\\some folder\\example.wav")

'example'

In [37]:
sample_list = glob2.glob(unprocessed_path + f'{sep()}*')
sample_list

['samples\\bass1.wav',
 'samples\\bass2.wav',
 'samples\\drums1.wav',
 'samples\\drums2.wav',
 'samples\\vocals1.wav']

In [38]:
for sample_path in sample_list:
    name = extract_name(sample_path)
    tf_path = os.path.join(target_path, name)
    if not os.path.isdir(tf_path):
        os.mkdir(tf_path)
    transform_sample_tf(sample_path, tf_path)

### Also for *val* and for *test* 

In [39]:
target_path = os.path.join('tf_samples', 'my_dataset', 'val')
sample_list = glob2.glob(unprocessed_path + f'{sep()}*')
for sample_path in sample_list:
    name = extract_name(sample_path)
    tf_path = os.path.join(target_path, name)
    if not os.path.isdir(tf_path):
        os.mkdir(tf_path)
    transform_sample_tf(sample_path, tf_path)

In [40]:
target_path = os.path.join('tf_samples', 'my_dataset', 'test')
sample_list = glob2.glob(unprocessed_path + f'{sep()}*')
for sample_path in sample_list:
    name = extract_name(sample_path)
    tf_path = os.path.join(target_path, name)
    if not os.path.isdir(tf_path):
        os.mkdir(tf_path)
    transform_sample_tf(sample_path, tf_path)