# Imported Libraries

In [2]:
# Standard Libraries
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
# PyTorch
import torch
import torchaudio
import torchaudio.transforms as transforms
# Audio Playback
from IPython.display import Audio

# Dataset Variables

In [19]:
SAMPLE_RATE     = 44100
final_SAMPLE_RATE = 6000
sample_duration = 8 # seconds

path_do_data_dir = 'DATA/'
path_to_augmented_data = "AUGMENTED_DATA/"
size_augmented_dataset = 960 # for each instrument

# Original Dataset

In [4]:
# Get the list of files in the 'Piano Loops' directory using os.listdir()
piano_file_list = [path_do_data_dir + 'Piano Loops/' + filename for filename in os.listdir(path_do_data_dir + 'Piano Loops')]

# Create a pandas DataFrame with the "path_to_data" and "Instrument" columns for "Piano" entries
piano_df = pd.DataFrame({"path_to_data": piano_file_list, "Instrument": "Piano"})

# Get the list of files in the 'Guitar Loops' directory using os.listdir()
guitar_file_list = [path_do_data_dir + 'Guitar Loops/' + filename for filename in os.listdir(path_do_data_dir + 'Guitar Loops')]

# Create a DataFrame with the "path_to_data" and "Instrument" columns for "Guitar" entries
guitar_df = pd.DataFrame({"path_to_data": guitar_file_list, "Instrument": "Guitar"})

# Auxiliary Functions

In [5]:
# -> THIS FUNCTION HAS BEEN TESTED

# waveform has to be torch tensor
def cut_pad(waveform, SAMPLE_RATE, sample_duration=sample_duration):
    # get max len, signal len and number of chanels (num_rows)
    max_len = sample_duration*SAMPLE_RATE
    num_rows, sig_len = waveform.shape
    # get duration of waveform audio in seconds
    waveform_duration = waveform.shape[1]/SAMPLE_RATE
    
    #print(f"waveform_duration = {waveform_duration}")
    #print(f"sample_duration = {sample_duration}")
    
    if waveform_duration > sample_duration: # cut waveform in random position
        start_pos = random.randint(0, sig_len - max_len)
        waveform = waveform[:, start_pos:start_pos+max_len]
        
    if waveform_duration < sample_duration: # pad waveform
        pad_begin_len = random.randint(0, max_len - sig_len)
        pad_end_len = max_len - sig_len - pad_begin_len

        # Pad with 0s
        pad_begin = torch.zeros((num_rows, pad_begin_len))
        pad_end = torch.zeros((num_rows, pad_end_len))

        waveform = torch.cat((pad_begin, waveform, pad_end), 1)
        
    return waveform

In [None]:
# -> THIS FUNCTION HAS BEEN TESTED

def get_random_effects(SAMPLE_RATE=SAMPLE_RATE, reverb_prob=0.5, filter_prob=0.5):
    speed = np.random.uniform(0.7, 1.7)  # Random speed between 0.7 and 1.7
    filter_value = np.random.randint(5000) + 1

    # Randomly decide whether to include reverb
    if np.random.rand() < reverb_prob:
        reverb_strength = "-w"  # Random reverb strength
    else:
        reverb_strength = ""

    effects = [
        ["lowpass", "-1", str(filter_value)],  # apply single-pole lowpass filter
        ["speed", str(speed)],  # reduce or increase the speed
        # This only changes the sample rate, so it is necessary to
        # add the `rate` effect with the original sample rate after this.
        ["rate", f"{SAMPLE_RATE}"],
        ["reverb", reverb_strength],  # Reverberation with random strength (conditionally added)
    ]

    if reverb_strength == "": effects = effects[:-2]
    if np.random.rand() < filter_prob: effects.pop(0)
    return effects

In [17]:
# -> THIS FUNCTION HAS BEEN TESTED

def change_sample_rate(waveform, original_SAMPLE_RATE, final_SAMPLE_RATE):
    
    # Resample the waveform using torch.resample
    resample = transforms.Resample(orig_freq=original_SAMPLE_RATE, 
                                   new_freq=final_SAMPLE_RATE)
    resampled_waveform = resample(waveform)
    
    return resampled_waveform, final_SAMPLE_RATE

# Test Functions

In [8]:
# Load data
SAMPLE_WAV = piano_df.loc[0]['path_to_data']
waveform, SAMPLE_RATE = torchaudio.load(SAMPLE_WAV, normalize=True)

In [9]:
# Original Sample
Audio(waveform.detach().numpy(), rate=SAMPLE_RATE)

In [10]:
print(f"Audio Duration: {round(waveform.shape[1]/SAMPLE_RATE, 2)} seconds")

Audio Duration: 7.5 seconds


In [11]:
check, check_SAMPLE_RATE = change_sample_rate(waveform, 
                                              SAMPLE_RATE, 
                                              final_SAMPLE_RATE)

Audio(check.detach().numpy(), rate=check_SAMPLE_RATE)

In [12]:
# check cut_pad function

check = cut_pad(waveform, SAMPLE_RATE=SAMPLE_RATE, sample_duration=50) # pad
print(check.shape[1]/SAMPLE_RATE)

check = cut_pad(waveform, SAMPLE_RATE=SAMPLE_RATE, sample_duration=5) # cut
print(check.shape[1]/SAMPLE_RATE)

# plot to check random position....

50.0
5.0


In [13]:
# check get_random_effects function

effects = get_random_effects(SAMPLE_RATE=SAMPLE_RATE, reverb_prob=0.1)

# Apply effects
check, check_SAMPLE_RATE = torchaudio.sox_effects.apply_effects_tensor(waveform, SAMPLE_RATE, effects)
Audio(check, rate=check_SAMPLE_RATE)

# Augment Dataset

In [14]:
def generate_data(instrument, instrument_df, file_name, check=False, final_SAMPLE_RATE=final_SAMPLE_RATE):
    # randomly choose original source waveform
    ind = random.randint(0, len(instrument_df)-1)
    source_wav = instrument_df.loc[ind]['path_to_data']
    waveform, SAMPLE_RATE = torchaudio.load(source_wav, normalize=True)
    if check: print('succesfully loaded audio signal')

    # randomly process audio signal
    effects  = get_random_effects(SAMPLE_RATE=SAMPLE_RATE, reverb_prob=0.1)
    waveform, new_SAMPLE_RATE = torchaudio.sox_effects.apply_effects_tensor(waveform, SAMPLE_RATE, effects)
    if check: print('succesfully processed audio signal')

    # change sample rate
    waveform, final_SAMPLE_RATE = change_sample_rate(waveform=waveform, 
                                                     original_SAMPLE_RATE=new_SAMPLE_RATE, 
                                                     final_SAMPLE_RATE=final_SAMPLE_RATE)
    if check: print('succesfully resampled audio signal')
        
    # set size -> has to be at end not to have sparsity mess with resample function
    waveform = cut_pad(waveform, SAMPLE_RATE=final_SAMPLE_RATE, sample_duration=sample_duration)
    if check: print('succesfully set audio signal size')

    # save to dataset
    output_path = path_to_augmented_data + instrument + ' Loops/' + str(file_name) + '.wav'
    torchaudio.save(output_path, waveform, final_SAMPLE_RATE)
    if check: print('succesfully saved audio signal')

In [14]:
generate_data(instrument='Piano', 
              instrument_df=piano_df, 
              file_name='piano_aug_1', 
              check=True)

succesfully loaded audio signal
succesfully processed audio signal
succesfully resampled audio signal
succesfully set audio signal size
succesfully saved audio signal


In [None]:
# generate piano data
for i in range(size_augmented_dataset):
    generate_data(instrument='Piano', 
                  instrument_df=piano_df, 
                  file_name='piano_aug_' + str(i + 68), 
                  check=False)
    print(f"generated {i + 1} samples")

generated 1 samples
generated 2 samples
generated 3 samples
generated 4 samples
generated 5 samples
generated 6 samples
generated 7 samples
generated 8 samples
generated 9 samples
generated 10 samples
generated 11 samples
generated 12 samples
generated 13 samples
generated 14 samples
generated 15 samples
generated 16 samples
generated 17 samples
generated 18 samples
generated 19 samples
generated 20 samples
generated 21 samples
generated 22 samples
generated 23 samples
generated 24 samples
generated 25 samples
generated 26 samples
generated 27 samples


In [None]:
# generate guitar data
for i in range(size_augmented_dataset):
    generate_data(instrument='Guitar', 
                  instrument_df=piano_df, 
                  file_name='guitar_aug_' + str(i + 1), 
                  check=False)