In [13]:
import torch
import torchaudio
import polars as pl
import matplotlib.pyplot as plt
import os

import helpers.input_processor as ip

In [14]:
#load data from dataframe
data_dir = "data/raw_training/training_data/"
target_label = 'murmur_in_recording'
df = (
    ip.loadTrainingData(data_dir)
    .filter(pl.col(target_label) != 'Unknown')
    .pipe(ip.encodeData)
    .select([
        pl.col('audio_file').apply(lambda x: os.path.join(data_dir, x)),
        pl.col(target_label)
    ])
)

loading data from save file:  cache/ingested_data.json


In [16]:
#balance the data so that there is an equal number of murmur positive and murmur negative samples
#do this by duplicating random rows of whichever group (pos or neg) is smaller
neg_df = df.filter(pl.col(target_label)==0.0)
pos_df = df.filter(pl.col(target_label)==1.0)
numNeg = neg_df.height
numPos = pos_df.height

while numNeg != numPos:
    if numNeg < numPos:
        df.vstack(neg_df.sample(n=min(numPos-numNeg, neg_df.height), shuffle=True), in_place=True)
    else: 
        df.vstack(pos_df.sample(n=min(numNeg-numPos, pos_df.height), shuffle=True), in_place=True)
    numNeg = df.filter(pl.col(target_label)==0.0).height
    numPos = df.filter(pl.col(target_label)==1.0).height

#reshuffle rows
df = df.sample(frac=1.0, shuffle=True)

#check number of positive and negative samples
numNeg = df.filter(pl.col(target_label)==0.0).height
numPos = df.filter(pl.col(target_label)==1.0).height
print('Total Samples:       ', df.height)
print('Positive Samples:    ', numPos)
print('Negative Samples:    ', numNeg)
print('Percent Positive Samples:    ', numPos/(numPos+numNeg))

Total Samples:        5328
Positive Samples:     2664
Negative Samples:     2664
Percent Positive Samples:     0.5


In [18]:
import librosa
import math, random
import numpy as np

# x = path to audio file
# duration = length of time (in seconds) to which the signal is resized
# sr = sample rate of the signal
def preprocessAudio(x, duration, sr=4000):
    #read and load audio file in .wav format
    sig, samp_rate = librosa.load(x, sr=sr) #make sure that the correct sample rate is passed as a parameters. if unspecified, the function chooses some default value

    #resize sample, either by padding it with silence or truncating it
    sig = librosa.util.fix_length(sig, size=sr*duration)

    #implement audio augmentation:  ------------
    #time shift signal to the left or right by a random percent of its original length (max 99%)
    sig_len = sig.shape[0]
    max_shift = 0.99
    sig = np.roll(sig, round(random.random() * max_shift * sig_len))
    #-------------------------------------------

    #get Mel spectrogram
    melSpec = librosa.feature.melspectrogram(y=sig, sr=samp_rate)
    melSpec = librosa.amplitude_to_db(melSpec)

    #implement image augmentation   ------------
    #-------------------------------------------
    
    return melSpec

In [19]:
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

class AudioDataset(Dataset):
    def __init__(self, audioPaths, labels):
        self.audioPaths = audioPaths
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):

        spec = preprocessAudio(self.audioPaths[idx])
        label = self.labels[idx]
        return spec, label

In [32]:
def splitDataset(ds, split_ratio=0.8):
    total_size = len(ds)
    train_size = round(split_ratio * total_size)
    test_size = total_size - train_size

    trainSet, testSet = torch.utils.data.random_split(ds, [train_size, test_size], generator=torch.Generator().manual_seed(0))
    return trainSet, testSet

In [34]:
audioPaths = df.get_column('audio_file').to_list()
labels = df.get_column(target_label).to_list()

ds = AudioDataset(audioPaths, labels)
trainset, testset = splitDataset(ds)


4262 1066
