In [1]:
import os
import sys

PROJECT_ROOT = os.path.abspath(os.path.join(
                  os.path.dirname("test-mode1"), 
                  os.pardir)
)
sys.path.append(PROJECT_ROOT)

import pandas as pd
import numpy as np
import torch
import json
import random
import torch.nn as nn
import torch.nn.functional as F
from train import train, evaluate
from src.dataset import create_dataloader
from src.utils import feature_extraction_pipeline, read_features_files, choose_model, read_feature
from src.data_augmentation import Mixup, Cutmix, Specmix
from src.models.utils import SaveBestModel, weight_init
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import StepLR
from typing import Dict, Tuple, List
from sklearn.metrics import classification_report

# making sure the experiments are reproducible
seed = 2109
random.seed(seed)
np.random.seed(seed)
torch.cuda.manual_seed(seed)
os.environ["PYTHONHASHSEED"] = str(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

def seed_worker(worker_id: int):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

g = torch.Generator()
g.manual_seed(seed)

<torch._C.Generator at 0x7f45a6abc890>

In [2]:
features_path = "../features/propor2022/"

# loading training features
X_train = read_feature(path=features_path, fold="0", name="X_train.pth")
y_train = read_feature(path=features_path, fold="0", name="y_train.pth")
print(f"Train: {X_train.shape}, {y_train.shape}")

# loading validation features
X_valid = read_feature(path=features_path, fold="0", name="X_valid.pth")
y_valid = read_feature(path=features_path, fold="0", name="y_valid.pth")
print(f"Valid: {X_valid.shape}, {y_valid.shape}")

# loading testing features
X_test = read_feature(path=features_path, fold=None, name="X_test.pth")
y_test = read_feature(path=features_path, fold=None, name="y_test.pth")
print(f"Test: {X_test.shape}, {y_test.shape}")

Train: torch.Size([500, 1, 128000]), torch.Size([500, 3])
Valid: torch.Size([125, 1, 128000]), torch.Size([125, 3])
Test: torch.Size([308, 1, 128000]), torch.Size([308, 3])


## CNN 1

In [3]:
# reading the parameters configuration file
params = json.load(open("../config/mode_2.json", "r"))

# parameters defination
k_fold = None
max_seconds = 16

if "kfold" in params.keys():
    k_fold = params["kfold"]["num_k"]

max_samples = max_seconds * int(params["sample_rate"])

feature_config = params["feature"]
feature_config["sample_rate"] = int(params["sample_rate"])
data_augmentation_config = params["data_augmentation"]
dataset = params["dataset"]
wavelet_config = params["wavelet"]

model_config = params["model"]
model_config["name"] = "cnn"
model_config["use_gpu"] = True

mode = params["mode"]

feat_path = os.path.join(params["output_path"], params["dataset"])

if dataset == "propor2022":
    if data_augmentation_config["target"] == "majority":
        data_augment_target = [0]
    elif data_augmentation_config["target"] == "minority":
        data_augment_target = [1, 2]
    elif data_augmentation_config["target"] == "all":
        data_augment_target = [0, 1, 2]
    else:
        raise ValueError("Invalid arguments for target. Should be 'all', 'majority' or 'minority")
else:
    raise NotImplementedError

In [4]:
# creating and defining the model
device = torch.device("cuda" if torch.cuda.is_available and model_config["use_gpu"] else "cpu")

model = choose_model(
    mode=mode,
    model_name=model_config["name"],
    device=device
)

optimizer = torch.optim.Adam(
    params=model.parameters(),
    lr=model_config["learning_rate"],
    betas=(0.9, 0.98),
    eps=1e-9,
    weight_decay=0
)
loss = torch.nn.CrossEntropyLoss()
scheduler = None
mixer = None

# creating the model checkpoint object
sbm = SaveBestModel(
    output_dir=os.path.join(model_config["output_path"], dataset, mode, model_config["name"]),
    model_name=model_config["name"]
)

if model_config["use_lr_scheduler"]:
    print("\nWARNING: Using learning rate scheduler!\n")
    scheduler = StepLR(optimizer, step_size=10, gamma=0.1)

if "mixup" in data_augmentation_config["techniques"].keys():
    print("\nWARNING: Using mixup data augmentation technique!\n")
    mixer = Mixup(
        alpha=data_augmentation_config["techniques"]["mixup"]["alpha"]
    )

if "specmix" in data_augmentation_config["techniques"].keys():
    print("\nWARNING: Using specmix data augmentation technique!\n")
    mixer = Specmix(
        p=data_augmentation_config["p"],
        min_band_size=data_augmentation_config["techniques"]["specmix"]["min_band_size"],
        max_band_size=data_augmentation_config["techniques"]["specmix"]["max_band_size"],
        max_frequency_bands=data_augmentation_config["techniques"]["specmix"]["max_frequency_bands"],
        max_time_bands=data_augmentation_config["techniques"]["specmix"]["max_time_bands"],
        device=device
    )

if "cutmix" in data_augmentation_config["techniques"].keys():
    print("\nWARNING: Using cutmix data augmentation technique!\n")
    mixer = Cutmix(
        alpha=data_augmentation_config["techniques"]["cutmix"]["alpha"],
        p=data_augmentation_config["p"]
    )

# creating the training dataloader
training_dataloader = create_dataloader(
    X=X_train,
    y=y_train,
    feature_config=feature_config,
    wavelet_config=wavelet_config,
    data_augmentation_config=data_augmentation_config,
    num_workers=0,
    mode=mode,
    shuffle=False,
    training=True,
    batch_size=model_config["batch_size"],
    data_augment_target=data_augment_target,
    worker_init_fn=seed_worker,
    generator=g
)

# creating the validation dataloader
validation_dataloader = create_dataloader(
    X=X_valid,
    y=y_valid,
    feature_config=feature_config,
    wavelet_config=wavelet_config,
    data_augmentation_config=data_augmentation_config,
    num_workers=0,
    mode=mode,
    shuffle=False,
    training=False,
    batch_size=model_config["batch_size"],
    data_augment_target=data_augment_target,
    worker_init_fn=seed_worker,
    generator=g
)

# training loop
for epoch in range(1, model_config["epochs"] + 1):
    print(f"Epoch: {epoch}/{model_config['epochs']}")

    train_f1, train_loss = train(
        device=device,
        dataloader=training_dataloader,
        optimizer=optimizer,
        model=model,
        loss=loss,
        mixer=mixer
    )

    valid_f1, valid_loss = evaluate(
        device=device,
        dataloader=validation_dataloader,
        model=model,
        loss=loss
    )

    print(f"\nEpoch: {epoch}")
    print(f"Train F1-Score: {train_f1:1.6f}")
    print(f"Train Loss: {train_loss:1.6f}")
    print(f"Validation F1-Score: {valid_f1:1.6f}")
    print(f"Validation Loss: {valid_loss:1.6f}\n")

    # updating learning rate
    if not scheduler is None:
        scheduler.step()



Epoch: 1/100

Epoch: 1
Train F1-Score: 0.317514
Train Loss: 6.330465
Validation F1-Score: 0.294643
Validation Loss: 2.466162

Epoch: 2/100

Epoch: 2
Train F1-Score: 0.357496
Train Loss: 1.220314
Validation F1-Score: 0.285728
Validation Loss: 1.627631

Epoch: 3/100

Epoch: 3
Train F1-Score: 0.346884
Train Loss: 1.431178
Validation F1-Score: 0.294643
Validation Loss: 0.870709

Epoch: 4/100

Epoch: 4
Train F1-Score: 0.378746
Train Loss: 1.138878
Validation F1-Score: 0.294643
Validation Loss: 1.611214

Epoch: 5/100

Epoch: 5
Train F1-Score: 0.359964
Train Loss: 0.933236
Validation F1-Score: 0.294643
Validation Loss: 0.687110

Epoch: 6/100

Epoch: 6
Train F1-Score: 0.450343
Train Loss: 0.770722
Validation F1-Score: 0.294643
Validation Loss: 0.767883

Epoch: 7/100

Epoch: 7
Train F1-Score: 0.434742
Train Loss: 0.875190
Validation F1-Score: 0.294643
Validation Loss: 1.778136

Epoch: 8/100

Epoch: 8
Train F1-Score: 0.477555
Train Loss: 0.893147
Validation F1-Score: 0.362293
Validation Loss: 


Epoch: 65
Train F1-Score: 0.864051
Train Loss: 0.328679
Validation F1-Score: 0.294643
Validation Loss: 0.788029

Epoch: 66/100

Epoch: 66
Train F1-Score: 0.780291
Train Loss: 0.371931
Validation F1-Score: 0.292975
Validation Loss: 0.777221

Epoch: 67/100

Epoch: 67
Train F1-Score: 0.847011
Train Loss: 0.362661
Validation F1-Score: 0.294643
Validation Loss: 0.972381

Epoch: 68/100

Epoch: 68
Train F1-Score: 0.801813
Train Loss: 0.380822
Validation F1-Score: 0.294643
Validation Loss: 1.034332

Epoch: 69/100

Epoch: 69
Train F1-Score: 0.788347
Train Loss: 0.352996
Validation F1-Score: 0.292975
Validation Loss: 0.849804

Epoch: 70/100

Epoch: 70
Train F1-Score: 0.859703
Train Loss: 0.299168
Validation F1-Score: 0.294643
Validation Loss: 0.811231

Epoch: 71/100

Epoch: 71
Train F1-Score: 0.850833
Train Loss: 0.314636
Validation F1-Score: 0.325943
Validation Loss: 0.823040

Epoch: 72/100

Epoch: 72
Train F1-Score: 0.820433
Train Loss: 0.324193
Validation F1-Score: 0.294643
Validation Loss: 

## CNN 2

In [5]:
model_config["name"] = "cnn2"

# creating and defining the model
device = torch.device("cuda" if torch.cuda.is_available and model_config["use_gpu"] else "cpu")

model = choose_model(
    mode=mode,
    model_name=model_config["name"],
    device=device
)

optimizer = torch.optim.Adam(
    params=model.parameters(),
    lr=model_config["learning_rate"],
    betas=(0.9, 0.98),
    eps=1e-9,
    weight_decay=0
)
loss = torch.nn.CrossEntropyLoss()
scheduler = None
mixer = None

# creating the model checkpoint object
sbm = SaveBestModel(
    output_dir=os.path.join(model_config["output_path"], dataset, mode, model_config["name"]),
    model_name=model_config["name"]
)

if model_config["use_lr_scheduler"]:
    print("\nWARNING: Using learning rate scheduler!\n")
    scheduler = StepLR(optimizer, step_size=10, gamma=0.1)

if "mixup" in data_augmentation_config["techniques"].keys():
    print("\nWARNING: Using mixup data augmentation technique!\n")
    mixer = Mixup(
        alpha=data_augmentation_config["techniques"]["mixup"]["alpha"]
    )

if "specmix" in data_augmentation_config["techniques"].keys():
    print("\nWARNING: Using specmix data augmentation technique!\n")
    mixer = Specmix(
        p=data_augmentation_config["p"],
        min_band_size=data_augmentation_config["techniques"]["specmix"]["min_band_size"],
        max_band_size=data_augmentation_config["techniques"]["specmix"]["max_band_size"],
        max_frequency_bands=data_augmentation_config["techniques"]["specmix"]["max_frequency_bands"],
        max_time_bands=data_augmentation_config["techniques"]["specmix"]["max_time_bands"],
        device=device
    )

if "cutmix" in data_augmentation_config["techniques"].keys():
    print("\nWARNING: Using cutmix data augmentation technique!\n")
    mixer = Cutmix(
        alpha=data_augmentation_config["techniques"]["cutmix"]["alpha"],
        p=data_augmentation_config["p"]
    )
    
# creating the training dataloader
training_dataloader = create_dataloader(
    X=X_train,
    y=y_train,
    feature_config=feature_config,
    wavelet_config=wavelet_config,
    data_augmentation_config=data_augmentation_config,
    num_workers=0,
    mode=mode,
    shuffle=False,
    training=True,
    batch_size=model_config["batch_size"],
    data_augment_target=data_augment_target,
    worker_init_fn=seed_worker,
    generator=g
)

# creating the validation dataloader
validation_dataloader = create_dataloader(
    X=X_valid,
    y=y_valid,
    feature_config=feature_config,
    wavelet_config=wavelet_config,
    data_augmentation_config=data_augmentation_config,
    num_workers=0,
    mode=mode,
    shuffle=False,
    training=False,
    batch_size=model_config["batch_size"],
    data_augment_target=data_augment_target,
    worker_init_fn=seed_worker,
    generator=g
)

# training loop
for epoch in range(1, model_config["epochs"] + 1):
    print(f"Epoch: {epoch}/{model_config['epochs']}")

    train_f1, train_loss = train(
        device=device,
        dataloader=training_dataloader,
        optimizer=optimizer,
        model=model,
        loss=loss,
        mixer=mixer
    )

    valid_f1, valid_loss = evaluate(
        device=device,
        dataloader=validation_dataloader,
        model=model,
        loss=loss
    )

    print(f"\nEpoch: {epoch}")
    print(f"Train F1-Score: {train_f1:1.6f}")
    print(f"Train Loss: {train_loss:1.6f}")
    print(f"Validation F1-Score: {valid_f1:1.6f}")
    print(f"Validation Loss: {valid_loss:1.6f}\n")

    # updating learning rate
    if not scheduler is None:
        scheduler.step()



Epoch: 1/100

Epoch: 1
Train F1-Score: 0.353489
Train Loss: 93.962838
Validation F1-Score: 0.294643
Validation Loss: 34.902069

Epoch: 2/100

Epoch: 2
Train F1-Score: 0.332722
Train Loss: 36.308141
Validation F1-Score: 0.294643
Validation Loss: 20.631414

Epoch: 3/100

Epoch: 3
Train F1-Score: 0.331723
Train Loss: 27.883729
Validation F1-Score: 0.294643
Validation Loss: 63.848333

Epoch: 4/100

Epoch: 4
Train F1-Score: 0.292883
Train Loss: 36.450043
Validation F1-Score: 0.289593
Validation Loss: 24.012514

Epoch: 5/100

Epoch: 5
Train F1-Score: 0.340602
Train Loss: 15.378846
Validation F1-Score: 0.101062
Validation Loss: 18.073466

Epoch: 6/100

Epoch: 6
Train F1-Score: 0.337718
Train Loss: 13.433275
Validation F1-Score: 0.412565
Validation Loss: 5.312002

Epoch: 7/100

Epoch: 7
Train F1-Score: 0.352801
Train Loss: 9.426174
Validation F1-Score: 0.294643
Validation Loss: 6.970740

Epoch: 8/100

Epoch: 8
Train F1-Score: 0.369211
Train Loss: 15.766314
Validation F1-Score: 0.294643
Valid


Epoch: 65
Train F1-Score: 0.715804
Train Loss: 0.414286
Validation F1-Score: 0.294643
Validation Loss: 0.885978

Epoch: 66/100

Epoch: 66
Train F1-Score: 0.748971
Train Loss: 0.404490
Validation F1-Score: 0.294643
Validation Loss: 0.925589

Epoch: 67/100

Epoch: 67
Train F1-Score: 0.792358
Train Loss: 0.362832
Validation F1-Score: 0.294643
Validation Loss: 0.877715

Epoch: 68/100

Epoch: 68
Train F1-Score: 0.666393
Train Loss: 0.405764
Validation F1-Score: 0.294643
Validation Loss: 0.864035

Epoch: 69/100

Epoch: 69
Train F1-Score: 0.840385
Train Loss: 0.356207
Validation F1-Score: 0.291291
Validation Loss: 0.941510

Epoch: 70/100

Epoch: 70
Train F1-Score: 0.737186
Train Loss: 0.397550
Validation F1-Score: 0.292975
Validation Loss: 0.802837

Epoch: 71/100

Epoch: 71
Train F1-Score: 0.808457
Train Loss: 0.353529
Validation F1-Score: 0.294643
Validation Loss: 0.845431

Epoch: 72/100

Epoch: 72
Train F1-Score: 0.860048
Train Loss: 0.358148
Validation F1-Score: 0.294643
Validation Loss: 

## CNN 3

In [6]:
model_config["name"] = "cnn3"

# creating and defining the model
device = torch.device("cuda" if torch.cuda.is_available and model_config["use_gpu"] else "cpu")

model = choose_model(
    mode=mode,
    model_name=model_config["name"],
    device=device
)

optimizer = torch.optim.Adam(
    params=model.parameters(),
    lr=model_config["learning_rate"],
    betas=(0.9, 0.98),
    eps=1e-9,
    weight_decay=0
)
loss = torch.nn.CrossEntropyLoss()
scheduler = None
mixer = None

# creating the model checkpoint object
sbm = SaveBestModel(
    output_dir=os.path.join(model_config["output_path"], dataset, mode, model_config["name"]),
    model_name=model_config["name"]
)

if model_config["use_lr_scheduler"]:
    print("\nWARNING: Using learning rate scheduler!\n")
    scheduler = StepLR(optimizer, step_size=10, gamma=0.1)

if "mixup" in data_augmentation_config["techniques"].keys():
    print("\nWARNING: Using mixup data augmentation technique!\n")
    mixer = Mixup(
        alpha=data_augmentation_config["techniques"]["mixup"]["alpha"]
    )

if "specmix" in data_augmentation_config["techniques"].keys():
    print("\nWARNING: Using specmix data augmentation technique!\n")
    mixer = Specmix(
        p=data_augmentation_config["p"],
        min_band_size=data_augmentation_config["techniques"]["specmix"]["min_band_size"],
        max_band_size=data_augmentation_config["techniques"]["specmix"]["max_band_size"],
        max_frequency_bands=data_augmentation_config["techniques"]["specmix"]["max_frequency_bands"],
        max_time_bands=data_augmentation_config["techniques"]["specmix"]["max_time_bands"],
        device=device
    )

if "cutmix" in data_augmentation_config["techniques"].keys():
    print("\nWARNING: Using cutmix data augmentation technique!\n")
    mixer = Cutmix(
        alpha=data_augmentation_config["techniques"]["cutmix"]["alpha"],
        p=data_augmentation_config["p"]
    )

# creating the training dataloader
training_dataloader = create_dataloader(
    X=X_train,
    y=y_train,
    feature_config=feature_config,
    wavelet_config=wavelet_config,
    data_augmentation_config=data_augmentation_config,
    num_workers=0,
    mode=mode,
    shuffle=False,
    training=True,
    batch_size=model_config["batch_size"],
    data_augment_target=data_augment_target,
    worker_init_fn=seed_worker,
    generator=g
)

# creating the validation dataloader
validation_dataloader = create_dataloader(
    X=X_valid,
    y=y_valid,
    feature_config=feature_config,
    wavelet_config=wavelet_config,
    data_augmentation_config=data_augmentation_config,
    num_workers=0,
    mode=mode,
    shuffle=False,
    training=False,
    batch_size=model_config["batch_size"],
    data_augment_target=data_augment_target,
    worker_init_fn=seed_worker,
    generator=g
)

# training loop
for epoch in range(1, model_config["epochs"] + 1):
    print(f"Epoch: {epoch}/{model_config['epochs']}")

    train_f1, train_loss = train(
        device=device,
        dataloader=training_dataloader,
        optimizer=optimizer,
        model=model,
        loss=loss,
        mixer=mixer
    )

    valid_f1, valid_loss = evaluate(
        device=device,
        dataloader=validation_dataloader,
        model=model,
        loss=loss
    )

    print(f"\nEpoch: {epoch}")
    print(f"Train F1-Score: {train_f1:1.6f}")
    print(f"Train Loss: {train_loss:1.6f}")
    print(f"Validation F1-Score: {valid_f1:1.6f}")
    print(f"Validation Loss: {valid_loss:1.6f}\n")

    # updating learning rate
    if not scheduler is None:
        scheduler.step()



Epoch: 1/100

Epoch: 1
Train F1-Score: 0.301057
Train Loss: 1.130783
Validation F1-Score: 0.294643
Validation Loss: 1.317630

Epoch: 2/100

Epoch: 2
Train F1-Score: 0.304798
Train Loss: 0.716349
Validation F1-Score: 0.294643
Validation Loss: 0.816354

Epoch: 3/100

Epoch: 3
Train F1-Score: 0.308403
Train Loss: 0.714403
Validation F1-Score: 0.294643
Validation Loss: 0.782345

Epoch: 4/100

Epoch: 4
Train F1-Score: 0.300814
Train Loss: 0.681102
Validation F1-Score: 0.294643
Validation Loss: 0.754517

Epoch: 5/100

Epoch: 5
Train F1-Score: 0.292975
Train Loss: 0.681549
Validation F1-Score: 0.294643
Validation Loss: 0.721367

Epoch: 6/100

Epoch: 6
Train F1-Score: 0.292975
Train Loss: 0.681105
Validation F1-Score: 0.294643
Validation Loss: 0.746654

Epoch: 7/100

Epoch: 7
Train F1-Score: 0.292975
Train Loss: 0.678336
Validation F1-Score: 0.294643
Validation Loss: 0.716881

Epoch: 8/100

Epoch: 8
Train F1-Score: 0.292975
Train Loss: 0.672442
Validation F1-Score: 0.294643
Validation Loss: 


Epoch: 65
Train F1-Score: 0.301893
Train Loss: 0.586562
Validation F1-Score: 0.294643
Validation Loss: 0.736954

Epoch: 66/100

Epoch: 66
Train F1-Score: 0.310991
Train Loss: 0.594652
Validation F1-Score: 0.333001
Validation Loss: 0.692203

Epoch: 67/100

Epoch: 67
Train F1-Score: 0.328394
Train Loss: 0.610910
Validation F1-Score: 0.294643
Validation Loss: 0.723877

Epoch: 68/100

Epoch: 68
Train F1-Score: 0.349831
Train Loss: 0.585623
Validation F1-Score: 0.422779
Validation Loss: 0.698401

Epoch: 69/100

Epoch: 69
Train F1-Score: 0.368566
Train Loss: 0.589349
Validation F1-Score: 0.410127
Validation Loss: 0.732743

Epoch: 70/100

Epoch: 70
Train F1-Score: 0.384208
Train Loss: 0.573465
Validation F1-Score: 0.294643
Validation Loss: 0.740673

Epoch: 71/100

Epoch: 71
Train F1-Score: 0.363607
Train Loss: 0.564964
Validation F1-Score: 0.415636
Validation Loss: 0.738931

Epoch: 72/100

Epoch: 72
Train F1-Score: 0.420208
Train Loss: 0.578099
Validation F1-Score: 0.333001
Validation Loss: 