# Setup

In [1]:
# Project Root directory
project_root = ".."

In [2]:
import random, os
import numpy as np

if "pipeline" not in os.listdir():
    os.chdir(project_root)
    assert "pipeline" in os.listdir()

from pipeline.preprocessing import \
    build_feature_extractor, \
    TCDPdata, \
    gen_datesets, \
    cross_train

# constant
dataset_root = "assets/the-circor-digiscope-phonocardiogram-dataset-1.0.3"

# Variables

In [3]:
from cnn2d import CNN2D

class args:
    
    cutoff_frequency = 2000 # use 0 to disable bandpass filter

    use_features = [
        "mel_2d"
    ]

    use_X = [
        "raw",
        "scaled",
        "minmax",
    ]

    train_size = 0.8

    random_state = 2024

    use_models = {
        "CNN2D": {
            "class": CNN2D,
            "kwargs": {
                "input_shape": [128, None, 1],  # (n_mels, time_frames, channels)
                "num_classes": 2,
                "learning_rate": 0.001,
                "epochs": 10,
                "batch_size": 32,
            }
        }
    }

    def set_n_time_frames(n_time_frames: int):
        args.use_models["CNN2D"]["kwargs"]['input_shape'][1] = n_time_frames

random.seed(args.random_state)
np.random.seed(args.random_state)

# Extract Features

In [5]:
extract_features = build_feature_extractor(
    args.use_features,
    args.cutoff_frequency
)
features, labels = TCDPdata(dataset_root).getXy(extract_features)

## defensive checking
# assert len(labels) == 3159
# assert sum(labels) == 1632
print('n features:', features.shape[1])
print('mean of labels:', labels.mean())

100%|██████████| 3159/3159 [02:33<00:00, 20.64it/s]


n features: 128
mean of labels: 0.51661918328585


# Generate Dataset

In [7]:
# features matrix has 3 dimensions: 0 for files, 1 for channels, 2 for time
# We want to normalize each channel(axis=1) separately,
# so the normalizer will move along axis 0 and 2. (normalize_axis=(0, 2))
X, y = gen_datesets(features, labels, args.use_X, args.train_size, args.random_state,
                    normalize_axis=(0, 2))

for x_type in X.keys():
    print(x_type, X[x_type]['train'].shape, X[x_type]['test'].shape)

raw (2527, 128, 505) (632, 128, 505)
scaled (2527, 128, 505) (632, 128, 505)
minmax (2527, 128, 505) (632, 128, 505)


In [8]:
n_time_frames = X['raw']['train'].shape[2]
args.set_n_time_frames(n_time_frames)
print(f"Model has aligned its input layer with n_time_frames: {n_time_frames}")

Model has aligned its input layer with n_time_frames: 505


# Train Models

In [9]:
"""
In case you run the next cell accidently,
which can make you lose all the data.
You need to run the cell first before the next one.
"""
models = {}
scores = {}

In [10]:
assert len(scores) == len(models) == 0, "rerun the cell above to start a new experiment"
assert len(args.use_X) > 0 and len(args.use_models) > 0, "at least one pair of train/test sets and one model is required"

models, scores = cross_train(X, y, args.use_models)

Dataset: raw, Model: CNN2D, Training...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m131s[0m 2s/step - accuracy: 0.5155 - loss: 2.5553
Epoch 2/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s[0m 2s/step - accuracy: 0.5571 - loss: 0.6758
Epoch 3/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s[0m 2s/step - accuracy: 0.6008 - loss: 0.6576
Epoch 4/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m103s[0m 1s/step - accuracy: 0.6335 - loss: 0.6233
Epoch 5/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m106s[0m 1s/step - accuracy: 0.6715 - loss: 0.6233
Epoch 6/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m103s[0m 1s/step - accuracy: 0.7469 - loss: 0.5294
Epoch 7/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m103s[0m 1s/step - accuracy: 0.7677 - loss: 0.4662
Epoch 8/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m115s[0m 1s/step - accuracy: 0.7731 - loss: 0.5517
Epoch 9/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m114s[0m 1s/step - accuracy: 0.4936 - loss: 0.6972
Epoch 2/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m120s[0m 2s/step - accuracy: 0.5205 - loss: 0.6925
Epoch 3/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m117s[0m 1s/step - accuracy: 0.5654 - loss: 0.6814
Epoch 4/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m118s[0m 1s/step - accuracy: 0.5720 - loss: 0.6679
Epoch 5/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m122s[0m 2s/step - accuracy: 0.5788 - loss: 0.6471
Epoch 6/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m107s[0m 1s/step - accuracy: 0.5855 - loss: 0.6301
Epoch 7/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m147s[0m 1s/step - accuracy: 0.6353 - loss: 0.5959
Epoch 8/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m114s[0m 1s/step - accuracy: 0.6585 - loss: 0.5800
Epoch 9/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m116s[0m 1s/step - accuracy: 0.5406 - loss: 1.0656
Epoch 2/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m116s[0m 1s/step - accuracy: 0.5859 - loss: 0.6755
Epoch 3/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m109s[0m 1s/step - accuracy: 0.6106 - loss: 0.6446
Epoch 4/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m107s[0m 1s/step - accuracy: 0.6686 - loss: 0.5932
Epoch 5/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m102s[0m 1s/step - accuracy: 0.7037 - loss: 0.5504
Epoch 6/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m108s[0m 1s/step - accuracy: 0.7366 - loss: 0.5154
Epoch 7/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m107s[0m 1s/step - accuracy: 0.7602 - loss: 0.4611
Epoch 8/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m108s[0m 1s/step - accuracy: 0.7890 - loss: 0.4720
Epoch 9/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━

In [11]:
scores

{'raw': {'CNN2D': {'train': {'accuracy': 0.7981796860694885,
    'f1': 0.7788378143972244,
    'auc': 0.8030785302864888},
   'test': {'accuracy': 0.5601266026496887,
    'f1': 0.49637681159420277,
    'auc': 0.5605327725201542}}},
 'scaled': {'CNN2D': {'train': {'accuracy': 0.7127028107643127,
    'f1': 0.7624345549738221,
    'auc': 0.7053420170914431},
   'test': {'accuracy': 0.5537974834442139,
    'f1': 0.6299212598425197,
    'auc': 0.5531520705022281}}},
 'minmax': {'CNN2D': {'train': {'accuracy': 0.8452710509300232,
    'f1': 0.8508202975963373,
    'auc': 0.8451589930856204},
   'test': {'accuracy': 0.5458860993385315,
    'f1': 0.5480314960629921,
    'auc': 0.5458765209553854}}}}