In [43]:
# docker shenanigans, please ignore 
#%cd /app/notebooks/enhance_crt_net/

### CRT-Net MIT-BIH Simple (200 samples, 2 leads, 5 classes)

In [44]:
import tensorflow as tf
from importlib import reload
from src import crtnet_models
reload(crtnet_models)

tf.keras.backend.clear_session()
model = crtnet_models.crt_net_original(
    n_classes=5,
    input_shape=(200,2),
    n_vgg_blocks=1,
    binary=False, # set this to true if using multilabel output (disables softmax and categorical cross entropy). MIT-BIH is not multilabel.
    use_focal=True, # addresses significant class imbalance (enables focal cross entropy)
    metrics=['accuracy', 'f1'], # May be better to evaluate on F1 score if using early stopping
    d_model=128, # default feature dim size (d_ffn set to 2*d_model)
)
model.summary()
del model


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 200, 2)]          0         
                                                                 
 vgg_net (VGGNet)            (None, 100, 128)          50176     
                                                                 
 bi_gru (BiGRU)              (None, 100, 256)          198144    
                                                                 
 stacked_transformer_encode  (None, 100, 256)          1583104   
 r (StackedTransformerEncod                                      
 er)                                                             
                                                                 
 global_average_pooling1d (  (None, 256)               0         
 GlobalAveragePooling1D)                                         
                                                             

### CRT-Net MIT-BIH Alternate (200 samples, 2 leads, 5 classes)

The provided CRT-Net models.py has some alterations which may be the result of tuning the model:
- Leaky ReLU (alpha=0.3) activation instead of ReLU.
- Dropout (rate=0.2) after every VGG block and the BiGRU layer.
- Sine position encoding uses max position encoding of 2048, instead of default 10000
- Additional dropout between transformer encoders and global pooling
- Additional dense layer before output (units=4*n_classes, SeLU activation)

In [45]:
import tensorflow as tf
from importlib import reload
from src import crtnet_models
reload(crtnet_models)

tf.keras.backend.clear_session()
model = crtnet_models.crt_net_original_alt(
    n_classes=5,
    input_shape=(200,2),
    n_vgg_blocks=1,
    binary=False, # set this to true if using multilabel output (disables softmax and categorical cross entropy). MIT-BIH is not multilabel.
    use_focal=True, # addresses significant class imbalance (enables focal cross entropy)
    metrics=['accuracy', 'f1'], # May be better to evaluate on F1 score if using early stopping
    d_model=128, # default feature dim size (d_ffn set to 2*d_model)
)
model.summary()
del model

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 200, 2)]          0         
                                                                 
 vgg_net (VGGNet)            (None, 100, 128)          443520    
                                                                 
 bi_gru (BiGRU)              (None, 100, 256)          198144    
                                                                 
 stacked_transformer_encode  (None, 100, 256)          1583104   
 r (StackedTransformerEncod                                      
 er)                                                             
                                                                 
 dropout_2 (Dropout)         (None, 100, 256)          0         
                                                                 
 global_average_pooling1d (  (None, 256)               0     

### CRT-Net CPSC Simple (3000 samples, 12 leads, 9 classes)

In [46]:
import tensorflow as tf
from importlib import reload
from src import crtnet_models
reload(crtnet_models)

tf.keras.backend.clear_session()
model = crtnet_models.crt_net_original(
    n_classes=9,
    input_shape=(3000,12),
    n_vgg_blocks=5, # increased signal length so more CNN blocks to downsample (3000 / 2**5 -> 94)
    binary=True, # set this to true if using multilabel output (disables softmax and categorical cross entropy). CPSC can be multilabel.
    use_focal=True, # addresses significant class imbalance (enables focal cross entropy)
    metrics=['accuracy', 'f1'], # May be better to evaluate on F1 score if using early stopping
    d_model=128, # default feature dim size (d_ffn set to 2*d_model)
)
model.summary()
del model


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 3000, 12)]        0         
                                                                 
 vgg_net (VGGNet)            (None, 94, 128)           448256    
                                                                 
 bi_gru (BiGRU)              (None, 94, 256)           198144    
                                                                 
 stacked_transformer_encode  (None, 94, 256)           1583104   
 r (StackedTransformerEncod                                      
 er)                                                             
                                                                 
 global_average_pooling1d (  (None, 256)               0         
 GlobalAveragePooling1D)                                         
                                                             

### CRT-Net CPSC Alternate (3000 samples, 12 leads, 9 classes)

The provided CRT-Net models.py has some alterations which may be the result of tuning the model:
- Leaky ReLU (alpha=0.3) activation instead of ReLU.
- Dropout (rate=0.2) after every VGG block and the BiGRU layer.
- Sine position encoding uses max position encoding of 2048, instead of default 10000
- Additional dropout between transformer encoders and global pooling
- Additional dense layer before output (units=4*n_classes, SeLU activation)

In [49]:
import tensorflow as tf
from importlib import reload
from src import crtnet_models
reload(crtnet_models)

tf.keras.backend.clear_session()
model = crtnet_models.crt_net_original_alt(
    n_classes=9,
    input_shape=(3000,12),
    n_vgg_blocks=5, # increased signal length so more CNN blocks to downsample (3000 / 2**5 -> 94)
    binary=True, # set this to true if using multilabel output (disables softmax and categorical cross entropy). CPSC can be multilabel.
    use_focal=True, # addresses significant class imbalance (enables focal cross entropy)
    metrics=['accuracy', 'f1'], # May be better to evaluate on F1 score if using early stopping
    d_model=128, # default feature dim size (d_ffn set to 2*d_model)
)
model.summary()
del model


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 3000, 12)]        0         
                                                                 
 vgg_net (VGGNet)            (None, 94, 128)           2414976   
                                                                 
 bi_gru (BiGRU)              (None, 94, 256)           198144    
                                                                 
 stacked_transformer_encode  (None, 94, 256)           1583104   
 r (StackedTransformerEncod                                      
 er)                                                             
                                                                 
 dropout_6 (Dropout)         (None, 94, 256)           0         
                                                                 
 global_average_pooling1d (  (None, 256)               0     