In [16]:
from pipeline.dl_models import MLP, CombinedMLP
from pipeline.preprocessing import feature_mfcc, feature_bandpower_struct, remove_high_frequencies
from pipeline.dataloader import PhonocardiogramAudioDataset, PhonocardiogramByIDDatasetOnlyResult
from pipeline.utils import compose_feature_label, audio_random_windowing

from tqdm import tqdm
import numpy as np

from pathlib import Path
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import torch.optim as optim
import re, random

In [17]:
file = Path(".") / "assets" / "the-circor-digiscope-phonocardiogram-dataset-1.0.3"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [18]:
# Training on actual patient audio files

def augmentation(data :np.ndarray, sr : int=4000, window_length_hz :int =200, window_len_sec :float=5.) ->np.ndarray:
    x = data
    # x = energy_band_augmentation_random_win(x, sr=sr, window_hz_length=window_length_hz)
    # x = np.fft.ifft(x).real
    x = audio_random_windowing(x, window_len_sec)
    return x

In [19]:
lookup = PhonocardiogramByIDDatasetOnlyResult(str(file / "training_data.csv"))

# Feature functions
features_fn = [
    feature_mfcc, 
    # feature_chromagram, 
    # feature_melspectrogram,
    feature_bandpower_struct(4000,200,0.7),
    # NMF, # found -> takes around 0.1s per file
    ]    

In [20]:
def dset_trans(f : str): # each takes ~0.1s

    result = compose_feature_label(
        f,
        lookup, 
        features_fn,
        lambda ary_data : remove_high_frequencies(augmentation(ary_data,4000,200,3.), sample_rate=4000,cutoff_frequency=450).real,
        dim=2,
        is_np=False
    )

    return result

In [21]:
      
def create_MLPs():
    rand_sample = np.random.random((4000 * 10,)) # 10 sec sample for 4000sr
    feature_space = [f_fn(rand_sample) for f_fn in features_fn]
    
    feat_sizes = [feat_matx.shape[0] for feat_matx in feature_space]
    mlps = [
        MLP([
            feat_size, 
            64,
            64 * 2, 
            1,] , torch.nn.ReLU)
        for feat_size in feat_sizes
    ]
    
    return mlps, feature_space
        

In [22]:

        
feature_based_mlps, example_feature = create_MLPs()
combinedMLP = CombinedMLP(feature_based_mlps)
    
print([f.__qualname__ for f in features_fn])
dset = PhonocardiogramAudioDataset(
    file / "clear_training_data",
    ".wav",
    "*", # Everything
    transform=dset_trans,
    balancing=True,
    csvfile=str(file / "training_data.csv"),
    shuffle=True,
)
    

            

['feature_mfcc', 'feature_bandpower_struct.<locals>.feature_bandpower']


In [23]:
train_size = int(0.8 * len(dset))
test_size = len(dset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=64,shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64,shuffle=False)

# training
combinedMLP.to(device)


criterion = nn.BCELoss()
optimizer = optim.Adam(combinedMLP.parameters(), lr=0.00001)

num_epoch = 5



In [24]:

from time import time

combinedMLP.train()
for epoch in range(num_epoch):
    for X,y in tqdm(train_loader):

        X = [x_sub.to(device) for x_sub in X]
        y = y.to(device)

        optimizer.zero_grad()
        
        out = combinedMLP(X)

        # LATER
        loss = criterion(out.squeeze(), y.float())

        loss.backward()
        optimizer.step()

        

    print(f'Epoch [{epoch+1}/{num_epoch}], Loss: {loss.item():.4f}')

100%|██████████| 39/39 [00:21<00:00,  1.83it/s]


Epoch [1/5], Loss: 0.6722


100%|██████████| 39/39 [00:25<00:00,  1.56it/s]


Epoch [2/5], Loss: 0.7437


100%|██████████| 39/39 [00:29<00:00,  1.34it/s]


Epoch [3/5], Loss: 0.7043


100%|██████████| 39/39 [00:18<00:00,  2.06it/s]


Epoch [4/5], Loss: 0.6821


100%|██████████| 39/39 [00:18<00:00,  2.16it/s]

Epoch [5/5], Loss: 0.6995





In [53]:
# Testing
combinedMLP.eval()
acc = []

with torch.no_grad():
    for Xtrain, ytrain in tqdm(train_loader):
        Xtrain = [x_sub.to(device) for x_sub in Xtrain]
        ytrain = y.to(device)


        out = combinedMLP(Xtrain)
        print(out, ytrain)
        pred = (out.squeeze() > 0.5).float()  # Convert probabilities to binary predictions
        accu = (pred == ytrain).float().mean().item()
        acc.append(accu)
    print(f'Training set Accuracy: {sum(acc)/len(acc):.4f}')
    
    for Xtest, ytest in tqdm(test_loader):
        Xtest = [x_sub.to(device) for x_sub in Xtest]
        ytest = y.to(device)


        out = combinedMLP(Xtest)
        pred = (out.squeeze() > 0.5).float()  # Convert probabilities to binary predictions
        accu = (pred == ytest).float().mean().item()
        acc.append(accu)
    print(f'Testing Accuracy: {sum(acc)/len(acc):.4f}')

  0%|          | 0/39 [00:00<?, ?it/s]

tensor([[0.4944],
        [0.4588],
        [0.4817],
        [0.4712],
        [0.5320],
        [0.4783],
        [0.5081],
        [0.4868],
        [0.4577],
        [0.4575],
        [0.5396],
        [0.4973],
        [0.4756],
        [0.4551],
        [0.4898],
        [0.4766],
        [0.5305],
        [0.4907],
        [0.4252],
        [0.4564],
        [0.5019],
        [0.4743],
        [0.4497],
        [0.5045],
        [0.5014],
        [0.4706],
        [0.5454],
        [0.5246],
        [0.4854],
        [0.4894],
        [0.4579],
        [0.4686],
        [0.4676],
        [0.4767],
        [0.5109],
        [0.4959],
        [0.4571],
        [0.4753],
        [0.4610],
        [0.5017],
        [0.4595],
        [0.4334],
        [0.5050],
        [0.4730],
        [0.4763],
        [0.5021],
        [0.4448],
        [0.4606],
        [0.4665],
        [0.4242],
        [0.4582],
        [0.5090],
        [0.4526],
        [0.4904],
        [0.5178],
        [0




RuntimeError: The size of tensor a (64) must match the size of tensor b (14) at non-singleton dimension 0