# Load data in

In [None]:
import os
from pathlib import Path

current_dir = Path('./')
data_dir = current_dir / 'speech_commands_v0.01'
train_file_path = data_dir / "train_digit_list.txt"
test_file_path = data_dir / "testing_digit_list.txt"

train_file = open(train_file_path, "r")
training_list = [data_dir / x for x in train_file.read().splitlines()]

test_file = open(test_file_path, "r")
testing_list = [data_dir / x for x in test_file.read().splitlines()]

In [None]:
import librosa.feature as lf
import scipy.io.wavfile as wav
import numpy as np
from tqdm.notebook import tnrange

#initialize lists
all_train_wav_list = []
all_train_feat_list = []
all_train_labels = []

#loop through all audio files listed in the text file
for i in tnrange(len(training_list), desc='Load in files'):
    #load in the given audio file
    fs, audio = wav.read(training_list[i])
#     print(i, training_list[i])


    z=np.zeros((fs,))
    #if an audio file is less than a second, add zeros to it to make it a second
    if audio.size<=fs:
        z[:audio.size]=audio
    # if an audio file is longer than a second, clip it to a second
    elif audio.size>fs:
        z=audio[:fs]
    feat = lf.melspectrogram(z.astype('float'), sr =fs)
    #here, we use the melspectrogram as a feature. You can use other features like
    #LPCs, mfccs, or whatever you find.  The Librosa library has more features,
    #and you can explore other libraries
    all_train_wav_list.append(z.astype('float'))
    all_train_feat_list.append(feat.reshape(1, feat.shape[0], feat.shape[1]))

    # get labels from the file name (ie which word is in the audio file)
    which_word=str(training_list[i].parent.name)
    all_train_labels.append(which_word)

In [None]:
#initialize lists
all_test_wav_list = []
all_test_feat_list = []
all_test_labels = []

#loop through all audio files listed in the text file
for i in tnrange(len(testing_list), desc='Load in files'):
  #load in the given audio file
  fs, audio = wav.read(testing_list[i])

  
  z=np.zeros((fs,))
  #if an audio file is less than a second, add zeros to it to make it a second
  if audio.size<=fs:
    z[:audio.size]=audio
  # if an audio file is longer than a second, clip it to a second
  elif audio.size>fs:
    z=audio[:fs]
  feat = lf.melspectrogram(z.astype('float'), sr =fs)
  #here, we use the melspectrogram as a feature. You can use other features like
  #LPCs, mfccs, or whatever you find.  The Librosa library has more features,
  #and you can explore other libraries
  all_test_wav_list.append(z.astype('float'))
  all_test_feat_list.append(feat.reshape(1, feat.shape[0], feat.shape[1]))

# get labels from the file name (ie which word is in the audio file)
  which_word=str(testing_list[i].parent.name)
  all_test_labels.append(which_word)

# Generate original signal

In [None]:
from sklearn import preprocessing
import scipy as sp

X_train_sig = np.vstack(all_train_wav_list)
X_train_sig = X_train_sig.reshape(X_train_sig.shape[0], X_train_sig.shape[1], 1)


le = preprocessing.LabelEncoder()
le.fit(all_train_labels)
encoded_labels = le.transform(all_train_labels)

oh_enc = preprocessing.OneHotEncoder()
oh_enc.fit(encoded_labels.reshape(-1,1))

y_train = oh_enc.transform(encoded_labels.reshape(-1,1))
y_train = sp.sparse.csr_matrix.toarray(y_train)

In [None]:
np.save("X_train_original.npy", X_train_sig)
np.save("y_train.npy", y_train)

In [None]:
X_test_sig = np.vstack(all_test_wav_list)
X_test_sig = X_test_sig.reshape(X_test_sig.shape[0], X_test_sig.shape[1], 1)


le = preprocessing.LabelEncoder()
le.fit(all_test_labels)
encoded_labels = le.transform(all_test_labels)

oh_enc = preprocessing.OneHotEncoder()
oh_enc.fit(encoded_labels.reshape(-1,1))

y_test = oh_enc.transform(encoded_labels.reshape(-1,1))
y_test = sp.sparse.csr_matrix.toarray(y_test)

In [None]:
np.save("X_test_original.npy", X_train_sig)
np.save("y_test.npy", y_train)

# Generate echo signal from one configuration

In [None]:
from tqdm.notebook import tnrange
import pyroomacoustics as pra
import scipy.io.wavfile as wav
import numpy as np
import librosa.feature as lf

all_train_reverb_signals =[]
all_train_reverb_feat=[]
trial_train_labels = []

# The desired reverberation time and dimensions of the room
rt60 = 1.0  # seconds
room_dim = [20, 30, 10]  # meters

e_absorption, max_order = pra.inverse_sabine(rt60, room_dim)

# Create the room
for i in tnrange(len(training_list)):

  fs, audio = wav.read(training_list[i])
  room = pra.ShoeBox(
      room_dim, fs=fs, materials=pra.Material(e_absorption), max_order=max_order
  )

  # place the source in the room
  room.add_source([2.5, 3.73, 1.76], signal=audio, delay=1.3)

  mic_locs = np.c_[
      [10, 1, 1], 
  ]

  # finally place the array in the room
  room.add_microphone_array(mic_locs)

  # Run the simulation (this will also build the RIR automatically)
  room.simulate()

  mics_signals = room.mic_array.signals
  mics_signals = mics_signals.reshape(mics_signals.size,)
  z=mics_signals[int(1.5*fs):int(2.5*fs)]

  feat = lf.melspectrogram(z.astype('float'), sr =fs)
  all_train_reverb_signals.append(z.astype('float'))
  all_train_reverb_feat.append(feat.reshape(1, feat.shape[0], feat.shape[1]))

  # get labels from the file name (ie which word is in the audio file)
  which_word=training_list[i].parent.name
  trial_train_labels.append(which_word)
  

In [None]:
X_train_reverb_sig = np.vstack(all_train_reverb_signals)
X_train_reverb_sig = X_train_reverb_sig.reshape(X_train_reverb_sig.shape[0], X_train_reverb_sig.shape[1], 1)
print(X_train_reverb_sig.shape)

In [None]:
np.save("X_train_reverb.npy", X_train_reverb_sig)

In [None]:
all_test_reverb_signals =[]
all_test_reverb_feat=[]
trial_test_labels = []

# The desired reverberation time and dimensions of the room
rt60 = 1.0  # seconds
room_dim = [20, 30, 10]  # meters

e_absorption, max_order = pra.inverse_sabine(rt60, room_dim)

# Create the room
for i in tnrange(len(testing_list)):

  fs, audio = wav.read(testing_list[i])
  room = pra.ShoeBox(
      room_dim, fs=fs, materials=pra.Material(e_absorption), max_order=max_order
  )

  # place the source in the room
  room.add_source([2.5, 3.73, 1.76], signal=audio, delay=1.3)

  mic_locs = np.c_[
      [10, 1, 1], 
  ]

  # finally place the array in the room
  room.add_microphone_array(mic_locs)

  # Run the simulation (this will also build the RIR automatically)
  room.simulate()

  mics_signals = room.mic_array.signals
  mics_signals = mics_signals.reshape(mics_signals.size,)
  z=mics_signals[int(1.5*fs):int(2.5*fs)]

  feat = lf.melspectrogram(z.astype('float'), sr =fs)
  all_test_reverb_signals.append(z.astype('float'))
  all_test_reverb_feat.append(feat.reshape(1, feat.shape[0], feat.shape[1]))

  # get labels from the file name (ie which word is in the audio file)
  which_word=testing_list[i].parent.name
  trial_test_labels.append(which_word)

In [None]:
X_test_reverb_sig = np.vstack(all_test_reverb_signals)
X_test_reverb_sig = X_test_reverb_sig.reshape(X_test_reverb_sig.shape[0], X_test_reverb_sig.shape[1], 1)
print(X_test_reverb_sig.shape)

In [None]:
np.save("X_test_reverb.npy", X_test_reverb_sig)

# Generate echo signal from random configuration

In [None]:
from tqdm.notebook import tnrange
import pyroomacoustics as pra
import scipy.io.wavfile as wav
import numpy as np
import librosa.feature as lf

all_train_reverb_random_signals =[]
all_train_reverb_random_feat=[]

# The desired reverberation time and dimensions of the room
rt60 = 1.0  # seconds
room_dim = [20, 30, 10]  # meters

e_absorption, max_order = pra.inverse_sabine(rt60, room_dim)

# Create the room
for i in tnrange(len(training_list)):

  fs, audio = wav.read(training_list[i])
  room = pra.ShoeBox(
      room_dim, fs=fs, materials=pra.Material(e_absorption), max_order=max_order
  )

  # place the source in the room
  room.add_source([np.random.uniform(1,19), np.random.uniform(1,29), np.random.uniform(1,9)], signal=audio, delay=1.3)

  mic_locs = np.c_[
      [10, 1, 1], 
  ]

  # finally place the array in the room
  room.add_microphone_array(mic_locs)

  # Run the simulation (this will also build the RIR automatically)
  room.simulate()

  mics_signals = room.mic_array.signals
  mics_signals = mics_signals.reshape(mics_signals.size,)
  z=mics_signals[int(1.5*fs):int(2.5*fs)]

  feat = lf.melspectrogram(z.astype('float'), sr =fs)
  all_train_reverb_random_signals.append(z.astype('float'))
  all_train_reverb_random_feat.append(feat.reshape(1, feat.shape[0], feat.shape[1]))

In [None]:
X_train_reverb_random_sig = np.vstack(all_train_reverb_random_signals)
X_train_reverb_random_sig = X_train_reverb_sig.reshape(X_train_reverb_random_sig.shape[0], X_train_reverb_random_sig.shape[1], 1)
print(X_train_reverb_random_sig.shape)

In [None]:
np.save("X_train_reverb_random.npy", X_train_reverb_random_sig)

In [None]:
from tqdm.notebook import tnrange
import pyroomacoustics as pra
import scipy.io.wavfile as wav
import numpy as np
import librosa.feature as lf

all_test_reverb_random_signals =[]
all_test_reverb_random_feat=[]

# The desired reverberation time and dimensions of the room
rt60 = 1.0  # seconds
room_dim = [20, 30, 10]  # meters

e_absorption, max_order = pra.inverse_sabine(rt60, room_dim)

# Create the room
for i in tnrange(len(testing_list)):

  fs, audio = wav.read(testing_list[i])
  room = pra.ShoeBox(
      room_dim, fs=fs, materials=pra.Material(e_absorption), max_order=max_order
  )

  # place the source in the room
  room.add_source([np.random.uniform(1,19), np.random.uniform(1,29), np.random.uniform(1,9)], signal=audio, delay=1.3)

  mic_locs = np.c_[
      [10, 1, 1], 
  ]

  # finally place the array in the room
  room.add_microphone_array(mic_locs)

  # Run the simulation (this will also build the RIR automatically)
  room.simulate()

  mics_signals = room.mic_array.signals
  mics_signals = mics_signals.reshape(mics_signals.size,)
  z=mics_signals[int(1.5*fs):int(2.5*fs)]

  feat = lf.melspectrogram(z.astype('float'), sr =fs)
  all_test_reverb_random_signals.append(z.astype('float'))
  all_test_reverb_random_feat.append(feat.reshape(1, feat.shape[0], feat.shape[1]))

In [None]:
X_test_reverb_random_sig = np.vstack(all_test_reverb_random_signals)
X_test_reverb_random_sig = X_test_reverb_sig.reshape(X_test_reverb_random_sig.shape[0], X_test_reverb_random_sig.shape[1], 1)
print(X_test_reverb_random_sig.shape)

In [None]:
np.save("X_test_reverb_random.npy", X_test_reverb_random_sig)

In [None]:
print(min(X_test_reverb_random_sig.flatten()))

# Generate echo from random room configuration

In [None]:
from tqdm.notebook import tnrange
import pyroomacoustics as pra
import scipy.io.wavfile as wav
import numpy as np
import librosa.feature as lf

all_train_reverb_random_signals =[]
all_train_reverb_random_feat=[]


# Create the room
for i in tnrange(len(training_list)):

    rt60 = np.random.uniform(0.5, 1.0)
    x = np.random.uniform(10, 20)
    y = np.random.uniform(10, 30)
    z = np.random.uniform(5, 10)
    room_dim = [x, y, z]  # meters
    e_absorption, max_order = pra.inverse_sabine(rt60, room_dim)

    fs, audio = wav.read(training_list[i])
    room = pra.ShoeBox(
    room_dim, fs=fs, materials=pra.Material(e_absorption), max_order=max_order
    )

    # place the source in the room
    room.add_source([np.random.uniform(1,x-1), np.random.uniform(1,y-1), np.random.uniform(1,z-1)], signal=audio, delay=1.3)

    mic_locs = np.c_[
    [10, 1, 1], 
    ]

    # finally place the array in the room
    room.add_microphone_array(mic_locs)

    # Run the simulation (this will also build the RIR automatically)
    room.simulate()

    mics_signals = room.mic_array.signals
    mics_signals = mics_signals.reshape(mics_signals.size,)
    z=mics_signals[int(1.5*fs):int(2.5*fs)]

    feat = lf.melspectrogram(z.astype('float'), sr =fs)
    all_train_reverb_random_signals.append(z.astype('float'))
    all_train_reverb_random_feat.append(feat.reshape(1, feat.shape[0], feat.shape[1]))

In [None]:
print(all_train_reverb_random_signals[410])

In [None]:
for i in range(len(all_train_reverb_random_signals)):
    length = len(all_train_reverb_random_signals[i])
#     print(length)
    if length != 16000:
        print(i)
        arr = all_train_reverb_random_signals[i].copy()
        zeros = np.full(16000 - length, 0.0e+00)
        all_train_reverb_random_signals[i] = np.concatenate((arr, zeros))

In [None]:
X_train_reverb_random_sig = np.vstack(all_train_reverb_random_signals)
X_train_reverb_random_sig = X_train_reverb_random_sig.reshape(X_train_reverb_random_sig.shape[0], X_train_reverb_random_sig.shape[1], 1)
print(X_train_reverb_random_sig.shape)

In [None]:
np.save("X_train_reverb_room_random.npy", X_train_reverb_random_sig)

In [None]:
from tqdm.notebook import tnrange
import pyroomacoustics as pra
import scipy.io.wavfile as wav
import numpy as np
import librosa.feature as lf

all_test_reverb_random_signals =[]
all_test_reverb_random_feat=[]



# Create the room
for i in tnrange(len(testing_list)):
    
    # The desired reverberation time and dimensions of the room
    rt60 = np.random.uniform(0.5, 1.0)
    x = np.random.uniform(10, 20)
    y = np.random.uniform(10, 30)
    z = np.random.uniform(5, 10)
    room_dim = [x, y, z]  # meters

    e_absorption, max_order = pra.inverse_sabine(rt60, room_dim)

    fs, audio = wav.read(testing_list[i])
    room = pra.ShoeBox(
      room_dim, fs=fs, materials=pra.Material(e_absorption), max_order=max_order
    )

    # place the source randomly in the room
    room.add_source([np.random.uniform(1,x-1), np.random.uniform(1,y-1), np.random.uniform(1,z-1)], signal=audio, delay=1.3)

    mic_locs = np.c_[
      [10, 1, 1], 
    ]

    # finally place the array in the room
    room.add_microphone_array(mic_locs)

    # Run the simulation (this will also build the RIR automatically)
    room.simulate()

    mics_signals = room.mic_array.signals
    mics_signals = mics_signals.reshape(mics_signals.size,)
    z=mics_signals[int(1.5*fs):int(2.5*fs)]

    feat = lf.melspectrogram(z.astype('float'), sr =fs)
    all_test_reverb_random_signals.append(z.astype('float'))
    all_test_reverb_random_feat.append(feat.reshape(1, feat.shape[0], feat.shape[1]))

In [None]:
for i in range(len(all_test_reverb_random_signals)):
    length = len(all_test_reverb_random_signals[i])
#     print(length)
    if length != 16000:
        arr = all_test_reverb_random_signals[i].copy()
        zeros = np.full(16000 - length, 0.0e+00)
        all_test_reverb_random_signals[i] = np.concatenate((arr, zeros))

In [None]:
X_test_reverb_random_sig = np.vstack(all_test_reverb_random_signals)
X_test_reverb_random_sig = X_test_reverb_random_sig.reshape(X_test_reverb_random_sig.shape[0], X_test_reverb_random_sig.shape[1], 1)
print(X_test_reverb_random_sig.shape)

In [None]:
np.save("X_test_reverb_room_random.npy", X_test_reverb_random_sig)