In [75]:
import polars as pl
import os
import librosa
import librosa.display
import matplotlib.pyplot as plt
from tensorflow.keras.layers import LSTM, Dense
import helpers.input_processor as ip
import tqdm
import tensorflow as tf
from tensorflow.data import Dataset as tfds
import tensorflow_io as tfio
import math, random
import numpy as np

from matplotlib import pyplot as plt
import tensorflow.python.util as util

In [76]:
def pl_to_tf(df):
    #convert to tensorflow dataset
    ds = df.select(pl.all().map(lambda s: s.to_numpy())).row(0)
    ds = tfds.from_tensor_slices(ds)
    return ds


In [77]:
# #split into train and test sets
# split_ratio = 0.8
# total_size = df.height
# train_size = round(split_ratio * total_size)
# test_size = total_size - train_size

# df = df.sample(frac=1.0, shuffle=True)  #shuffle rows in dataframe
# train_df = df.head(train_size)
# test_df = df.tail(test_size)

# #convert train and test sets from polars dataframe to tensorflow dataset
# train_ds = pl_to_tf(train_df)
# test_ds = pl_to_tf(test_df)

Andres tensorflow code

In [78]:
import torch
import torchaudio
import polars as pl
import matplotlib.pyplot as plt
import os
import numpy as np
import tqdm
from torchaudio import transforms
from IPython.display import Audio
import math, random

import helpers.input_processor as ip

In [79]:
# x = path to audio file
# samp_rate = sample rate of the signal
# duration = length of time (in seconds) to which the signal is resized
# do_augmentation = whether to perform audio and image augmentation on the signal
# n_freq_masks = number of frequency masks
# n_time_masks = number of time masks
# remaining keyword argument are passed to transforms.MelSpectrogram()
def preprocessAudio(x, samp_rate=4000, duration=25, do_augmentation=True, n_freq_masks=1, n_time_masks=1, n_mels=128, n_fft=1024, hop_len=None):
    # read and load audio file in .wav format
    sig, sr = torchaudio.load(x)

    # Check that audio is mono (has 1 audio channel)
    num_channels = sig.shape[0]
    if num_channels != 1:
        raise Exception('The provided audio file \'%s\' has %s channels, when 1 was expected' % (x, num_channels))
    
    # resize sample, either by padding it with silence or truncating it
    num_rows, sig_len = sig.shape
    max_len = sr * duration
    if (sig_len > max_len):
        # Truncate the signal to the given length
        sig = sig[:,:max_len]
    elif (sig_len < max_len):
        # Pad with zeroes at the beginning and end of the signal
        pad_begin_len = random.randint(0, max_len - sig_len)
        pad_end_len = max_len - sig_len - pad_begin_len
        pad_begin = torch.zeros((num_rows, pad_begin_len))
        pad_end = torch.zeros((num_rows, pad_end_len))
        sig = torch.cat((pad_begin, sig, pad_end), 1)

    # Audio Augmentation    --------\
    if do_augmentation == True:
        # time shift signal to the left or right by a random percent of its original length (max 99%)
        _, sig_len = sig.shape
        max_shift = 0.99
        sig = sig.roll(int(random.random() * max_shift * sig_len))
    #-------------------------------/

    # get Mel spectrogram
    top_db = 80
    melSpec = torchaudio.transforms.MelSpectrogram(sr, n_fft=n_fft, hop_length=hop_len, n_mels=n_mels)(sig)
    melSpec = torchaudio.transforms.AmplitudeToDB(top_db=top_db)(melSpec)

    # Image Augmentation    --------\
    if do_augmentation == True:
        # Apply time and frequency mask
        max_mask_pct=0.1
        n_steps = melSpec.shape[2]
        mask_value = melSpec.mean()
        for i in range(n_freq_masks):
            melSpec = torchaudio.transforms.FrequencyMasking(max_mask_pct * n_mels)(melSpec, mask_value)
        for i in range(n_time_masks):
            melSpec = torchaudio.transforms.TimeMasking(max_mask_pct * n_steps)(melSpec, mask_value)
    #-------------------------------/

    out = melSpec.numpy()
    e3, e2, e1 = out.shape
    out = out.reshape(e2,e1)
    
    return out

In [80]:
def splitDataframe(df, split_ratio=0.8):
    total_size = df.height
    train_size = round(split_ratio * total_size)
    test_size = total_size - train_size

    df = df.sample(frac=1.0, shuffle=True)
    trainSet = df.head(train_size)
    testSet = df.tail(test_size)
    return trainSet, testSet

In [81]:
import numpy as np
import keras, keras.utils

class DataGenerator(keras.utils.Sequence):
    # data is represented as a sequence of batches
    def __init__(self, audioPaths, labels, preProcessor, batchSize = 32, shuffle=True):
        self.audioPaths = audioPaths
        self.labels = labels
        self.preProcessor = preProcessor
        self.batchSize = batchSize
        self.shuffle = shuffle
        self.classes = np.unique(self.labels)
        self.numClasses = len(self.classes)
        self.on_epoch_end()

    def __len__(self):
        # Get total number of batches
        return int(np.floor(len(self.audioPaths) / self.batchSize))

    def __getitem__(self, index):
        # Get the batch at position 'index'
        batch = self.batches[index]
        # Apply preprocessing function to audio file paths and get the labels
        X = np.array([self.preProcessor(self.audioPaths[x]) for x in batch])
        # Y = keras.utils.to_categorical([self.labels[x] for x in batch], num_classes=self.numClasses)
        Y = np.array([self.labels[x] for x in batch])
        return X, Y

    def on_epoch_end(self):
        # Get array of indices used to access data
        dataIndices = np.arange(len(self)*self.batchSize)
        if self.shuffle == True:
            np.random.shuffle(dataIndices)
        # Reshape into an array of batches, where each batch is an array of indices used to access data
        self.batches = np.reshape(dataIndices, (len(self), self.batchSize))


In [82]:
#load data from dataframe
data_dir = "data/raw_training/training_data/"
target_label = 'murmur_in_recording'
df = (
    ip.loadTrainingData(data_dir)
    .filter(pl.col(target_label) != 'Unknown')
    .pipe(ip.encodeData)
    .select([
        pl.col('audio_file').apply(lambda x: os.path.join(data_dir, x)),
        pl.col(target_label)
    ])
)

loading data from save file:  cache/ingested_data.json


In [83]:
# #balance the data so that there is an equal number of murmur positive and murmur negative samples
# #do this by duplicating random rows of whichever group (pos or neg) is smaller
# neg_df = df.filter(pl.col(target_label)==0.0)
# pos_df = df.filter(pl.col(target_label)==1.0)
# numNeg = neg_df.height
# numPos = pos_df.height

# while numNeg != numPos:
#     if numNeg < numPos:
#         df.vstack(neg_df.sample(n=min(numPos-numNeg, neg_df.height), shuffle=True), in_place=True)
#     else: 
#         df.vstack(pos_df.sample(n=min(numNeg-numPos, pos_df.height), shuffle=True), in_place=True)
#     numNeg = df.filter(pl.col(target_label)==0.0).height
#     numPos = df.filter(pl.col(target_label)==1.0).height

# #reshuffle rows
# df = df.sample(frac=1.0, shuffle=True)

# #check number of positive and negative samples
# numNeg = df.filter(pl.col(target_label)==0.0).height
# numPos = df.filter(pl.col(target_label)==1.0).height
# print('Total Samples:       ', df.height)
# print('Positive Samples:    ', numPos)
# print('Negative Samples:    ', numNeg)
# print('Percent Positive Samples:    ', numPos/(numPos+numNeg))


# method 2:

#balance the data so that there is an equal number of murmur positive and murmur negative samples
#do this by duplicating random rows of whichever group (pos or neg) is smaller
neg_df = df.filter(pl.col(target_label)==0.0)
pos_df = df.filter(pl.col(target_label)==1.0)
numNeg = neg_df.height
numPos = pos_df.height

if numNeg < numPos:
    df = neg_df.vstack(pos_df.sample(n=numNeg))
elif numPos < numNeg:
    df = pos_df.vstack(neg_df.sample(n=numPos))
else:
    df = neg_df.vstack(pos_df)

#reshuffle rows
df = df.sample(frac=1.0, shuffle=True)

#check number of positive and negative samples
numNeg = df.filter(pl.col(target_label)==0.0).height
numPos = df.filter(pl.col(target_label)==1.0).height
print('Total Samples:       ', df.height)
print('Positive Samples:    ', numPos)
print('Negative Samples:    ', numNeg)
print('Percent Positive Samples:    ', numPos/(numPos+numNeg))

Total Samples:        998
Positive Samples:     499
Negative Samples:     499
Percent Positive Samples:     0.5


In [84]:
train_df, test_df = splitDataframe(df)
classes = df.get_column(target_label).unique().to_list()

train_gen = DataGenerator(
    audioPaths=train_df.get_column('audio_file').to_list(),
    labels=train_df.get_column(target_label).to_list(),
    preProcessor=preprocessAudio
)
test_gen = DataGenerator(
    audioPaths=test_df.get_column('audio_file').to_list(),
    labels=test_df.get_column(target_label).to_list(),
    preProcessor=preprocessAudio
)

In [91]:
test_gen.__getitem__(0)[0].shape

(32, 128, 196)

In [86]:
import keras as ks
from keras.models import Sequential
from keras.layers import Dense, InputLayer, Flatten, Conv2D

# a sequential model is a model that is made up of layers
model = Sequential()
# the input layer is the first layer in the model
model.add(InputLayer(input_shape=(201, 201, 1)))
# try modifying the number of nodes in the hidden layer to see how it affects the model
# you can also try changing the activation function to see how it affects the model
# adding more layers to the model may also help

model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [87]:
model.compile(optimizer='adam', loss='bce', metrics=['accuracy']) # we will keep track of the mean squared error using mse
# hist = model.fit(train_gen, steps_per_epoch=np.ceil(float(len(train_labels)) / float(batch_size)), validation_data=test_gen, validation_steps=25, epochs=25) # changing the number of epochs may help the model!
model.fit(train_gen)

2022-07-19 20:32:52.005931: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8401
2022-07-19 20:32:52.628877: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory


InvalidArgumentError: Graph execution error:

Detected at node 'sequential_6/flatten_4/Reshape' defined at (most recent call last):
    File "/usr/lib/python3.8/runpy.py", line 194, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/usr/lib/python3.8/runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "/home/tiaan/.local/lib/python3.8/site-packages/ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "/home/tiaan/.local/lib/python3.8/site-packages/traitlets/config/application.py", line 976, in launch_instance
      app.start()
    File "/home/tiaan/.local/lib/python3.8/site-packages/ipykernel/kernelapp.py", line 712, in start
      self.io_loop.start()
    File "/home/tiaan/.local/lib/python3.8/site-packages/tornado/platform/asyncio.py", line 199, in start
      self.asyncio_loop.run_forever()
    File "/usr/lib/python3.8/asyncio/base_events.py", line 570, in run_forever
      self._run_once()
    File "/usr/lib/python3.8/asyncio/base_events.py", line 1859, in _run_once
      handle._run()
    File "/usr/lib/python3.8/asyncio/events.py", line 81, in _run
      self._context.run(self._callback, *self._args)
    File "/home/tiaan/.local/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 510, in dispatch_queue
      await self.process_one()
    File "/home/tiaan/.local/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 499, in process_one
      await dispatch(*args)
    File "/home/tiaan/.local/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 406, in dispatch_shell
      await result
    File "/home/tiaan/.local/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 730, in execute_request
      reply_content = await reply_content
    File "/home/tiaan/.local/lib/python3.8/site-packages/ipykernel/ipkernel.py", line 383, in do_execute
      res = shell.run_cell(
    File "/home/tiaan/.local/lib/python3.8/site-packages/ipykernel/zmqshell.py", line 528, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/home/tiaan/.local/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2881, in run_cell
      result = self._run_cell(
    File "/home/tiaan/.local/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2936, in _run_cell
      return runner(coro)
    File "/home/tiaan/.local/lib/python3.8/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "/home/tiaan/.local/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3135, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/home/tiaan/.local/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3338, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "/home/tiaan/.local/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3398, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/tmp/ipykernel_492690/1405856346.py", line 3, in <cell line: 3>
      model.fit(train_gen)
    File "/home/tiaan/.local/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/home/tiaan/.local/lib/python3.8/site-packages/keras/engine/training.py", line 1409, in fit
      tmp_logs = self.train_function(iterator)
    File "/home/tiaan/.local/lib/python3.8/site-packages/keras/engine/training.py", line 1051, in train_function
      return step_function(self, iterator)
    File "/home/tiaan/.local/lib/python3.8/site-packages/keras/engine/training.py", line 1040, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/home/tiaan/.local/lib/python3.8/site-packages/keras/engine/training.py", line 1030, in run_step
      outputs = model.train_step(data)
    File "/home/tiaan/.local/lib/python3.8/site-packages/keras/engine/training.py", line 889, in train_step
      y_pred = self(x, training=True)
    File "/home/tiaan/.local/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/home/tiaan/.local/lib/python3.8/site-packages/keras/engine/training.py", line 490, in __call__
      return super().__call__(*args, **kwargs)
    File "/home/tiaan/.local/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/home/tiaan/.local/lib/python3.8/site-packages/keras/engine/base_layer.py", line 1014, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/home/tiaan/.local/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "/home/tiaan/.local/lib/python3.8/site-packages/keras/engine/sequential.py", line 374, in call
      return super(Sequential, self).call(inputs, training=training, mask=mask)
    File "/home/tiaan/.local/lib/python3.8/site-packages/keras/engine/functional.py", line 458, in call
      return self._run_internal_graph(
    File "/home/tiaan/.local/lib/python3.8/site-packages/keras/engine/functional.py", line 596, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "/home/tiaan/.local/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/home/tiaan/.local/lib/python3.8/site-packages/keras/engine/base_layer.py", line 1014, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/home/tiaan/.local/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "/home/tiaan/.local/lib/python3.8/site-packages/keras/layers/reshaping/flatten.py", line 98, in call
      return tf.reshape(inputs, flattened_shape)
Node: 'sequential_6/flatten_4/Reshape'
Input to reshape is a tensor with 24379392 values, but the requested shape requires a multiple of 1241888
	 [[{{node sequential_6/flatten_4/Reshape}}]] [Op:__inference_train_function_5621]

In [89]:
import os
path = os.path.join(data_dir, '85349_TV.wav')
x = preprocessAudio(path)
x.shape


(128, 196)