# Data Loader?
- hi

In [1]:
import os
import numpy as np
import librosa
from tensorflow.keras.utils import Sequence

In [2]:
import threading, sys, os
import time
import tensorflow as tf
from tensorflow.keras.models import Model, Sequential, load_model
from tensorflow.keras import optimizers
from collections import namedtuple

In [3]:
import numpy as np
from src.util.global_function import mkdir_p


In [4]:
from src.util.math_function import create_padding_mask, create_look_ahead_mask
from src.losses.custom_loss import mse_with_proper_loss, MSE_Custom_Loss_No_Length, pit_with_outputsize, pit_with_stft_trace
from src.models.Layers import TransformerSpeechSep
from src.models.Schedulers import CustomSchedule
from src.models.Real_Layers import T5Model, T5ModelNoMaskCreationModel, T5ModelYesMaskCreationModel
from src.pre_processing.data_pre_processing import load_data
from src.models.T5_variations import T5ChangedSTFT

In [160]:
Config = namedtuple('Config',  
    field_names="d_ff,     d_kv,     d_model,              dropout, feed_forward_proj, num_layers, init_factor," 
                "layer_norm_epsilon, model_type, num_heads, positional_embedding, n_epochs, vocab_size, relative_attention_num_buckets,"
                    "model_path, wav_type, size_type, train_type, loss_type, learning_rate_type,"
                    "input_size, output_size, batch_size, case, ckpt_path, tr_path, val_path, tt_path,"
                    "test_wav_dir, is_load_model")
args = Config( 2048      , 64      , 512              , 0.1 , "gated-gelu", 4       , 1.,
                1e-06    , "t5"             , 8 , "absolute" , 5     , 129   , 32,
                "CKPT", "wav8k", "min", "train-360", "mse", "inverse_root",
                40, 40, 25, 'trace', 'C:/J_and_J_Research/CKPT/gen_code', 
                'C:/J_and_J_Research/mycode/wsj0_2mix/use_this/tr/', 
                'C:/J_and_J_Research/mycode/wsj0_2mix/use_this/cv/',
                'C:/J_and_J_Research/mycode/wsj0_2mix/use_this/tt/', 
                'C:/J_and_J_Research/test_wav/gen',
                True) 

In [6]:

def create_masks(inp, tar, length=None):
    # Encoder padding mask
    enc_padding_mask = create_padding_mask(inp, length) # (batch, 1, 1, seq_len)

    # Used in the 2nd attention block in the decoder.
    # This padding mask is used to mask the encoder outputs.
    dec_padding_mask = create_padding_mask(inp, length)

    # Used in the 1st attention block in the decoder.
    # It is used to pad and mask future tokens in the input received by
    # the decoder.
    look_ahead_mask = create_look_ahead_mask(tf.shape(tar)[1]) # (seq_len, seq_len)
    dec_target_padding_mask = create_padding_mask(tar, length) # (batch, 1, 1, seq_len)
    combined_mask = tf.maximum(dec_target_padding_mask, look_ahead_mask) # (batch, 1, seq_len, seq_len)

    return enc_padding_mask, combined_mask, dec_padding_mask

# Raw 1Conv Layer 
- 주원씨가 만든 raw data를 conv로 embedding한거 클래스임니다.

In [31]:
class RawDataGenerator(Sequence):
    def __init__(self, Mix, wav_dir, files, batch_size=10, shuffle=True):
        self.Mix = Mix
        self.wav_dir = wav_dir
        self.files = files
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.on_epoch_end()
    
    def on_epoch_end(self):
        self.indexes = np.arange(len(self.Mix))
        
        if self.shuffle:
            np.random.shuffle(self.indexes)
    
    def __audioread__(self, path, offset=0.0, duration=None, sample_rate=16000):
        signal = librosa.load(path, sr=sample_rate, mono=False, offset=offset, duration=duration)

        return signal[0]
    
    def __padding__(self, data):
        n_batch = len(data)
        max_len = max([d.shape[0] for d in data])
        pad = np.zeros((n_batch, max_len, data[0].shape[1]))
        
        for i in range(n_batch):
            pad[i, :data[i].shape[0]] = data[i]
        
        return pad
        
    def __data_generation__(self, Mix_list):
        sample_rate = 8000
        L = 40
        
        mix_wav_list = []
        label_wav_list = []
        for name in Mix_list:
            name = name.strip('\n')
            
            s1_wav_name = self.wav_dir + self.files + '/s1/' + name
            
            # ------- AUDIO READ -------
            s1_wav = (self.__audioread__(s1_wav_name,  offset=0.0, duration=None, sample_rate=sample_rate))
            # --------------------------
            
            # ------- TIME AXIS CALCULATE -------
            K = int(np.ceil(len(s1_wav) / L))
            # -----------------------------------
            
            # ------- PADDING -------
            pad_len = K * L
            pad_s1 = np.concatenate([s1_wav, np.zeros([pad_len - len(s1_wav)])])
            # -----------------------
            
            # ------- RESHAPE -------
            s1 = np.reshape(pad_s1, [K, L])
            # -----------------------
            
            # ------- CONCAT S1 S2 -------
            # ----------------------------
            
            label_wav_list.append(s1)
        
        return label_wav_list, label_wav_list

    
    def __len__(self):
        return int(np.floor(len(self.Mix) / self.batch_size))
    
    def __getitem__(self, index):
        indexes = self.indexes[index * self.batch_size : (index + 1) * self.batch_size]
        Mix_list = [self.Mix[k] for k in indexes]
        
        if self.files is not 'tt':
            mix, labels = self.__data_generation__(Mix_list)
            
            # Get Lengths(K value of each batch)
            lengths = np.array([m.shape[0] for m in mix])
            tiled = np.tile(np.expand_dims(lengths, 1), [1, labels[0].shape[1]])
            tiled = np.expand_dims(tiled, 1)
            
            # Padding
            mix_pad = self.__padding__(mix) # [Batch, Time_step, Dimension]
            label_pad = self.__padding__(labels) # [Batch, Time_step, Dimension * 2]
            
            return mix_pad, np.concatenate([label_pad, tiled], axis=1), lengths
        else:
            mix, labels = self.__data_generation__(Mix_list)
            
            # Get Lengths(K value of each batch)
            lengths = np.array([m.shape[0] for m in mix])
            tiled = np.tile(np.expand_dims(lengths, 1), [1, labels[0].shape[1]])
            tiled = np.expand_dims(tiled, 1)
            
            # Padding
            mix_pad = self.__padding__(mix) # [Batch, Time_step, Dimension]
            
            return mix_pad, tiled, lengths, Mix_list

In [32]:
WAV_DIR = './mycode/wsj0_2mix/use_this/'
LIST_DIR = './mycode/wsj0_2mix/use_this/lists/'

In [33]:
# Directory List file create
wav_dir = WAV_DIR
output_lst = LIST_DIR

for folder in ['tr', 'cv', 'tt']:
    wav_files = os.listdir(wav_dir + folder + '/mix')
    output_lst_files = output_lst + folder + '_wav.lst'
    with open(output_lst_files, 'w') as f:
        for file in wav_files:
            f.write(file + "\n")

print("Generate wav file to .lst done!")

Generate wav file to .lst done!


In [152]:
batch_size = 2

train_dataset = 0
valid_dataset = 0
test_dataset = 0

name_list = []
for files in ['tr', 'cv', 'tt']:
    # --- Lead lst file ---
    output_lst_files = LIST_DIR + files + '_wav.lst'
    fid = open(output_lst_files, 'r')
    lines = fid.readlines()
    fid.close()
    # ---------------------
    
    if files == 'tr':
        train_dataset = RawDataGenerator(lines, WAV_DIR, files, batch_size)
    elif files == 'cv':
        valid_dataset = RawDataGenerator(lines, WAV_DIR, files, batch_size)
    else:
        test_batch = 1
        test_dataset = RawDataGenerator(lines, WAV_DIR, files, 's1', test_batch)

In [35]:
def build_real_T5(input_size, output_size, args):
    inputs = (tf.keras.layers.Input(shape=(None, input_size)),
    tf.keras.layers.Input(shape=(None, output_size)),
    tf.keras.layers.Input(shape=(1)) )
    # targets, length
        
    transformer = T5ModelNoMaskCreationModel(num_layers=args.num_layers, d_model=args.d_model, num_heads=args.num_heads, d_ff=args.d_ff, d_kv = args.d_kv, vocab_size=0, feed_forward_proj = args.feed_forward_proj, 
            relative_attention_num_buckets=args.relative_attention_num_buckets, eps=args.layer_norm_epsilon, dropout=args.dropout, factor=args.init_factor,
            embed_or_dense="conv", target_size=output_size)

    inp, tar, length = inputs
    #dec_padding_mask = tf.squeeze(dec_padding_mask)
    #outputs = tf.keras.layers.Conv1D(filters=129, kernel_size=2, activation = 'sigmoid', padding='same')(inp)

    enc_padding_mask, combined_mask, dec_padding_mask = create_masks(inp, tar, length)
    enc_padding_mask = tf.squeeze(enc_padding_mask)
    outputs = transformer(input_ids=inp, attention_mask=enc_padding_mask, 
            decoder_input_ids=tar, 
             training=False) # (batch_size, tar_seq_len, target_vocab_size)
    
    model = T5ChangedSTFT(inputs=inputs, outputs=outputs)
    model.summary()
    learning_rate = CustomSchedule(args.d_model)
    optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98,epsilon=1e-8)
    #optimizer = tfa.optimizers.AdamW(learning_rate=learning_rate, beta_1=0.9, beta_2=0.999,epsilon=1e-8, weight_decay = 0.01)
    #model.add_metric(tf.keras.metrics.Mean(name='train_loss')(outputs))
    #model.compile(loss=mse_with_proper_loss(output_size), optimizer=optimizer)
    model.compile(loss=pit_with_stft_trace(output_size), optimizer=optimizer)
#     model.compile(loss=keras.losses.mean_squared_error, optimizer=adam)

    return model


In [36]:
ckpt_path = args.ckpt_path
mkdir_p(ckpt_path) # model check point 폴더 만드는 코드

filepath = ckpt_path + "/CKP_ep_{epoch:d}__loss_{val_loss:.5f}_.h5"

# validation loss에 대해서 좋은 것만 저장됨
checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(
    filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min', save_weights_only=True
)

# early stop 하는 부분인데, validation loss에 대해서 제일 좋은 모델이 저장됨
early_stopping_cb = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', mode='min', verbose=1, patience=50, restore_best_weights=True
)

# Training part

epoch = args.n_epochs
strategy = tf.distribute.MirroredStrategy() # '/gpu:0','/gpu:1','/gpu:2','/gpu:4','/gpu:5','/gpu:6','/gpu:7'
#physical_devices = tf.config.list_physical_devices('GPU')
#tf.config.set_visible_devices(physical_devices[0:7], 'GPU')
#strategy =  tf.distribute.MultiWorkerMirroredStrategy()
print('장치의 수: {}'.format(strategy.num_replicas_in_sync))

with strategy.scope():
    # 사용 안할 때는 load_model 주석 처리 하자
#     model = load_model('./CKPT/CKP_ep_29__loss_102.63367_.h5', custom_objects={'pit_loss': pit_with_outputsize(OUTPUT_SIZE)})

    model = build_real_T5(args.input_size, args.output_size, args)
    #if args.is_load_model is True:

    tf.executing_eagerly()

history = model.fit(
    train_dataset,
    epochs=epoch,
    validation_data=valid_dataset,
    shuffle=True,
    callbacks=[checkpoint_cb, early_stopping_cb],
)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)
장치의 수: 1
Model: "t5_changed_stft_5"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_16 (InputLayer)           [(None, None, 40)]   0                                            
__________________________________________________________________________________________________
input_18 (InputLayer)           [(None, 1)]          0                                            
__________________________________________________________________________________________________
tf.compat.v1.shape_20 (TFOpLamb (3,)                 0           input_16[0][0]                   
__________________________________________________________________________________________________
tf.compat.v1.squeeze_20 (TFOpLa None                 0           input

KeyboardInterrupt: 

In [37]:
# Show training and validation loss graph

def graph_util(history):
    fig = plt.figure()
    fig = matplotlib.pyplot.gcf()
    fig.set_size_inches(18.5, 10.5)
    plt.plot(history.history['loss'], c='b')
    plt.plot(history.history['val_loss'], c='r')
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['training loss', 'validation loss'], loc='upper left')
    
    plt.show()

In [38]:
graph_util(history)

NameError: name 'history' is not defined

In [39]:

import matplotlib
import matplotlib.pyplot as plt

# Predict
- 만들어봅시다. token by token

In [63]:
from tqdm.auto import tqdm

In [65]:
def evaluate(inp, transformer, output, length=None, max_length=1800):
    # inp sentence is portuguese, hence adding the start and end token
    encoder_input = inp
    max_length = inp.shape[1]
    cur_length = output.shape[1]
    batch_size = inp.shape[0]
    
    # as the target is english, the first word to the transformer should be the
    # english start token.
    #startMask = tf.cast(tf.fill([1,258],-1),dtype=tf.float32)
    #output = tf.expand_dims(startMask, 0)
    #output = tf.repeat(output, batch_size, 0)
    #zero_clipping = tf.constant([0.])
    output = tf.cast(output,tf.float32)
    print("output:",output)

    progress_bar = tqdm(range(max_length-cur_length))
    for i in range(max_length-cur_length):
        # predictions.shape == (batch_size, seq_len, vocab_size)
        predictions = transformer((encoder_input, output, length), training=False)

        # select the last word from the seq_len dimension
        predictions = predictions[:, -1:, :]  # (batch_size, 1, vocab_size)
        #predictions = tf.math.maximum(predictions, zero_clipping)
        predicted_id = predictions

        # concatentate the predicted_id to the output which is given to the decoder
        # as its input.
        output = tf.concat([output, predicted_id], axis=1)
        progress_bar.update(1)

    return output

In [97]:
mkdir_p(args.test_wav_dir) # Result wav 폴더 만드는 코드

sample_rate = 8000
window_size = 256
window_shift = 128


In [61]:

with tf.device('/gpu:0'):
    ckpt_path = args.ckpt_path
    model_path = ckpt_path + "/CKP_ep_8__loss_0.00011_.h5"

    # T5 load
    if True:
        model = build_real_T5(args.input_size, args.output_size, args)
        model.load_weights(model_path)
    # baseline
    if False:
        model = load_model(model_path, custom_objects={'pit_loss': pit_with_outputsize(OUTPUT_SIZE)})
    cnt = 0
    check = 0
       

Model: "t5_changed_stft_7"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_22 (InputLayer)           [(None, None, 40)]   0                                            
__________________________________________________________________________________________________
input_24 (InputLayer)           [(None, 1)]          0                                            
__________________________________________________________________________________________________
tf.compat.v1.shape_28 (TFOpLamb (3,)                 0           input_22[0][0]                   
__________________________________________________________________________________________________
tf.compat.v1.squeeze_28 (TFOpLa None                 0           input_24[0][0]                   
__________________________________________________________________________________

In [49]:
tf.slice(mix_pad,[0,0,0],[-1,1,-1])

<tf.Tensor: shape=(2, 1, 40), dtype=float64, numpy=
array([[[-1.52587891e-04,  9.15527344e-05,  1.22070312e-04,
         -1.83105469e-04, -9.15527344e-05, -6.10351562e-05,
         -3.05175781e-04, -3.05175781e-05, -9.15527344e-05,
         -2.44140625e-04, -1.83105469e-04, -1.83105469e-04,
          3.05175781e-05,  3.05175781e-05, -9.15527344e-05,
         -5.49316406e-04, -6.71386719e-04, -1.22070312e-03,
         -5.79833984e-04, -5.79833984e-04, -3.35693359e-04,
         -5.49316406e-04, -3.05175781e-04, -1.52587891e-04,
         -2.44140625e-04,  6.10351562e-05,  4.27246094e-04,
          6.71386719e-04,  3.05175781e-04,  0.00000000e+00,
         -3.05175781e-05, -2.74658203e-04, -6.10351562e-05,
         -3.05175781e-04, -6.10351562e-05,  9.15527344e-05,
         -3.96728516e-04, -3.96728516e-04, -6.10351562e-04,
         -6.71386719e-04]],

       [[-1.52587891e-04,  9.15527344e-05,  1.52587891e-04,
         -2.13623047e-04, -1.22070312e-04, -9.15527344e-05,
         -3.6621093

In [72]:
for batch in test_dataset:
    mix_pad, tiled, lengths, names = batch
    # input_batch, angle_batch, label_batch, name, length
    tf.executing_eagerly() # requires r1.7
    
    
    #startMask = tf.cast(tf.fill([tf.shape(label_batch)[0], 1, tf.shape(label_batch)[-1]],-1),dtype=tf.float32)
    #tar = tf.concat([startMask, label_batch],1)
    max_length = 132
    tar_inp = mix_pad[:, :-2, :]
    cnt = 0
    check = 0
    results = tf.reshape(result,[mix_pad.shape[0],-1,1])
    sample_rate = 8000
    output = tf.slice(mix_pad,[0,0,0],[-1,1,-1])
    result = evaluate(mix_pad, model, output, lengths, max_length)
    for i in range(mix_pad.shape[0]):
        wav_name = names[i]
        wav_name1 = args.test_wav_dir + '/' + wav_name + '_s1.wav'
        wav = tf.squeeze(results[i,:,:]).numpy()
        audiowrite(wav, wav_name1, sample_rate, True, True)
    if check == -1:
        break

    if (cnt + 1) % 10 == 0:
        print((cnt + 1) * args.batch_size)

    cnt += 1

output: tf.Tensor(
[[[-1.5258789e-04  9.1552734e-05  1.5258789e-04 -2.1362305e-04
   -9.1552734e-05 -9.1552734e-05 -3.3569336e-04 -3.0517578e-05
   -9.1552734e-05 -2.4414062e-04 -2.1362305e-04 -2.1362305e-04
    3.0517578e-05  3.0517578e-05 -9.1552734e-05 -6.1035156e-04
   -7.3242188e-04 -1.3427734e-03 -6.1035156e-04 -6.4086914e-04
   -3.6621094e-04 -6.1035156e-04 -3.3569336e-04 -1.5258789e-04
   -2.7465820e-04  9.1552734e-05  4.5776367e-04  7.3242188e-04
    3.3569336e-04  0.0000000e+00 -3.0517578e-05 -3.0517578e-04
   -6.1035156e-05 -3.3569336e-04 -6.1035156e-05  9.1552734e-05
   -4.2724609e-04 -4.2724609e-04 -6.7138672e-04 -7.6293945e-04]]

 [[ 3.6621094e-04 -9.1552734e-05 -9.1552734e-05 -6.1035156e-04
   -3.9672852e-04 -1.5258789e-04 -5.1879883e-04 -5.4931641e-04
   -4.2724609e-04  0.0000000e+00  3.6621094e-04 -2.7465820e-04
   -3.6621094e-04 -2.4414062e-04 -3.9672852e-04  3.3569336e-04
   -1.5258789e-04 -3.9672852e-04 -6.1035156e-05  2.4414062e-04
    1.2207031e-04 -6.4086914e-04 

HBox(children=(FloatProgress(value=0.0, max=1411.0), HTML(value='')))

KeyboardInterrupt: 

In [168]:
cnt = 0
check = 0
results = tf.reshape(result,[mix_pad.shape[0],-1,1])
sample_rate = 8000
for i in range(mix_pad.shape[0]):
    wav_name = names[i].strip()
    wav_name1 = args.test_wav_dir + '/' + wav_name + '_s1.wav'
    wav = tf.squeeze(results[i,:,:]).numpy()
    wav = wav[:lengths[i]*40]
    audiowrite(wav, wav_name1, sample_rate, True, True)


In [147]:
print(result[0][1][0:11])
signal[0][40:51]

tf.Tensor(
[-0.18641873 -0.18021329 -0.1403747  -0.09617954 -0.16668871 -0.12911767
 -0.11958054 -0.14608869 -0.1580817  -0.15201487 -0.1519129 ], shape=(11,), dtype=float32)


array([-1.2207031e-03, -1.0681152e-03, -7.6293945e-04, -3.6621094e-04,
       -6.7138672e-04, -4.5776367e-04, -9.1552734e-05, -3.3569336e-04,
       -5.7983398e-04, -2.7465820e-04, -9.1552734e-05], dtype=float32)

In [142]:
results = tf.reshape(result,[mix_pad.shape[0],-1,1])
print(tf.squeeze(results[0,:,:]).numpy()[41:51])

array([-0.18021329, -0.1403747 , -0.09617954, -0.16668871, -0.12911767,
       -0.11958054, -0.14608869, -0.1580817 , -0.15201487, -0.1519129 ],
      dtype=float32)

In [125]:
sample_rate = 8000
audiowrite(tf.squeeze(results[0,:,:]).numpy(), "C:/J_and_J_Research/test_wav/gen/test.wav", sample_rate, False, False)

0

In [None]:
    
    
for i in range(args.batch_size):
    if i >= input_batch.shape[0]:
        check = -1
        break
    else:
        wav_name = name[i][0].numpy().decode('utf-8')

        wav_name1 = args.test_wav_dir + '/' + wav_name + '_s1.wav'
        wav_name2 = args.test_wav_dir + '/' + wav_name + '_s2.wav'
        wav1 = istft(spec1[i, 0:input_batch[i].shape[0], :], size=window_size, shift=window_shift)
        wav2 = istft(spec2[i, 0:input_batch[i].shape[0], :], size=window_size, shift=window_shift)
        audiowrite(wav1, wav_name1, sample_rate, True, True)
        audiowrite(wav2, wav_name2, sample_rate, True, True)

if check == -1:
    break

if (cnt + 1) % 10 == 0:
    print((cnt + 1) * args.batch_size)

cnt += 1

In [122]:
from scipy.io.wavfile import write as wav_write


In [158]:
write("example2.wav", sample_rate, tf.squeeze(train_test[1,:,:]).numpy())

In [111]:
tf.squeeze(results[0,:,:]).numpy().min()

-0.20463917

In [113]:
signal = librosa.load("C:/J_and_J_Research/mycode/wsj0_2mix/use_this/tt/s1/447o0302_0.62948_441c0212_-0.62948.wav", sr=sample_rate, mono=False, offset=0.0, duration=None)

In [157]:

train_test = tf.reshape(next(iter(train_dataset))[0],[mix_pad.shape[0],-1,1])
train_test

<tf.Tensor: shape=(2, 56480, 1), dtype=float64, numpy=
array([[[-1.52587891e-04],
        [ 9.15527344e-05],
        [ 1.52587891e-04],
        ...,
        [ 0.00000000e+00],
        [ 0.00000000e+00],
        [ 0.00000000e+00]],

       [[-1.52587891e-04],
        [ 9.15527344e-05],
        [ 1.52587891e-04],
        ...,
        [-6.40869141e-04],
        [ 7.62939453e-04],
        [-6.22558594e-03]]])>

In [119]:

write("test.wav", sample_rate, signal[0])

In [150]:
def audiowrite(data, path, samplerate=16000, normalize=False, threaded=True):
    """ Write the audio data ``data`` to the wav file ``path``
    The file can be written in a threaded mode. In this case, the writing
    process will be started at a separate thread. Consequently, the file will
    not be written when this function exits.
    :param data: A numpy array with the audio data
    :param path: The wav file the data should be written to
    :param samplerate: Samplerate of the audio data
    :param normalize: Normalize the audio first so that the values are within
        the range of [INTMIN, INTMAX]. E.g. no clipping occurs
    :param threaded: If true, the write process will be started as a separate
        thread
    :return: The number of clipped samples
    """
    data = data.copy()
    int16_max = np.iinfo(np.int16).max
    int16_min = np.iinfo(np.int16).min

    if normalize:
        if not data.dtype.kind == 'f':
            data = data.astype(np.float)
        data /= np.max(np.abs(data))

    if data.dtype.kind == 'f':
        data *= int16_max

    sample_to_clip = np.sum(data > int16_max)
    if sample_to_clip > 0:
        print('Warning, clipping {} samples'.format(sample_to_clip))
    data = np.clip(data, int16_min, int16_max)
    data = data.astype(np.int16)

    if threaded:
        threading.Thread(target=wav_write, args=(path, samplerate, data)).start()
    else:
        wav_write(path, samplerate, data)

    return sample_to_clip