In [1]:
import os
import sys

PROJECT_ROOT = os.path.abspath(os.path.join(
                  os.path.dirname("test-mode1"), 
                  os.pardir)
)
sys.path.append(PROJECT_ROOT)

import pandas as pd
import numpy as np
import torch
import json
import random
import torch.nn as nn
import torch.nn.functional as F
from train import train, evaluate
from src.dataset import create_dataloader
from src.utils import feature_extraction_pipeline, read_features_files, choose_model, read_feature
from src.models.utils import SaveBestModel, weight_init
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import StepLR
from typing import Dict, Tuple, List
from sklearn.metrics import classification_report

# making sure the experiments are reproducible
seed = 2109
random.seed(seed)
np.random.seed(seed)
torch.cuda.manual_seed(seed)
os.environ["PYTHONHASHSEED"] = str(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

def seed_worker(worker_id: int):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

g = torch.Generator()
g.manual_seed(seed)

<torch._C.Generator at 0x7f10998fa870>

In [2]:
features_path = "../features/propor2022/"

# loading training features
X_train = read_feature(path=features_path, fold="0", name="X_train.pth")
y_train = read_feature(path=features_path, fold="0", name="y_train.pth")
print(f"Train: {X_train.shape}, {y_train.shape}")

# loading validation features
X_valid = read_feature(path=features_path, fold="0", name="X_valid.pth")
y_valid = read_feature(path=features_path, fold="0", name="y_valid.pth")
print(f"Valid: {X_valid.shape}, {y_valid.shape}")

# loading testing features
X_test = read_feature(path=features_path, fold=None, name="X_test.pth")
y_test = read_feature(path=features_path, fold=None, name="y_test.pth")
print(f"Test: {X_test.shape}, {y_test.shape}")

Train: torch.Size([500, 1, 128000]), torch.Size([500, 3])
Valid: torch.Size([125, 1, 128000]), torch.Size([125, 3])
Test: torch.Size([308, 1, 128000]), torch.Size([308, 3])


In [3]:
# reading the parameters configuration file
params = json.load(open("../config/mode_1.json", "r"))

# parameters defination
k_fold = None
max_seconds = 16

if "kfold" in params.keys():
    k_fold = params["kfold"]["num_k"]

max_samples = max_seconds * int(params["sample_rate"])

if params["mode"] != "mode_3":
    feature_config = params["feature"]
else:
    feature_config = {}

feature_config["sample_rate"] = int(params["sample_rate"])
data_augmentation_config = params["data_augmentation"]
dataset = params["dataset"]
wavelet_config = params["wavelet"]

model_config = params["model"]
model_config["use_lr_scheduler"] = False
model_config["epochs"] = 200
model_config["batch_size"] = 16

mode = params["mode"]

feat_path = os.path.join(params["output_path"], params["dataset"])

if dataset == "propor2022":
    if data_augmentation_config["target"] == "majority":
        data_augment_target = [0]
    elif data_augmentation_config["target"] == "minority":
        data_augment_target = [1, 2]
    elif data_augmentation_config["target"] == "all":
        data_augment_target = [0, 1, 2]
    else:
        raise ValueError("Invalid arguments for target. Should be 'all', 'majority' or 'minority")
else:
    raise NotImplementedError

In [4]:
class CNN(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.input_channels = 1
        self.linear_input_features = 185856
            
        self.model = nn.Sequential(
            nn.Conv2d(
                in_channels=self.input_channels,
                out_channels=64,
                kernel_size=(12, 12)
            ),
            nn.ReLU(),
            nn.MaxPool2d(
                kernel_size=(2, 2)
            ),
            nn.Dropout(p=0.1),
            nn.Conv2d(
                in_channels=64,
                out_channels=128,
                kernel_size=(12, 12)
            ),
            nn.ReLU(),
            nn.MaxPool2d(
                kernel_size=(2, 2)
            ),
            nn.Dropout(p=0.1),
            nn.Flatten(),
            nn.Linear(
                in_features=self.linear_input_features,
                out_features=128
            ),
            nn.Dropout(p=0.1),
            nn.Linear(
                in_features=128,
                out_features=3
            )
        )
        
        self.model.apply(weight_init)
    
    def forward(
        self,
        X: torch.Tensor
    ) -> torch.Tensor:
        return self.model(X)

In [5]:
# creating and defining the model
device = torch.device("cuda" if torch.cuda.is_available and model_config["use_gpu"] else "cpu")

model = CNN().to(device=device)

optimizer = torch.optim.Adam(
    params=model.parameters(),
    lr=model_config["learning_rate"],
    betas=(0.9, 0.98),
    eps=1e-9
)
loss = torch.nn.CrossEntropyLoss()
scheduler = None

# creating the model checkpoint object
sbm = SaveBestModel(
    output_dir=os.path.join(model_config["output_path"], dataset, mode, model_config["name"]),
    model_name=model_config["name"]
)

if model_config["use_lr_scheduler"]:
    scheduler = StepLR(optimizer, step_size=10, gamma=0.1)

# creating the training dataloader
training_dataloader = create_dataloader(
    X=X_train,
    y=y_train,
    feature_config=feature_config,
    wavelet_config=wavelet_config,
    data_augmentation_config=data_augmentation_config,
    num_workers=0,
    mode=mode,
    shuffle=False,
    training=True,
    batch_size=model_config["batch_size"],
    data_augment_target=data_augment_target,
    worker_init_fn=seed_worker,
    generator=g
)

# creating the validation dataloader
validation_dataloader = create_dataloader(
    X=X_valid,
    y=y_valid,
    feature_config=feature_config,
    wavelet_config=wavelet_config,
    data_augmentation_config=data_augmentation_config,
    num_workers=0,
    mode=mode,
    shuffle=False,
    training=False,
    batch_size=model_config["batch_size"],
    data_augment_target=data_augment_target,
    worker_init_fn=seed_worker,
    generator=g
)

# training loop
for epoch in range(1, model_config["epochs"] + 1):
    print(f"Epoch: {epoch}/{model_config['epochs']}")

    train_f1, train_loss = train(
        device=device,
        dataloader=training_dataloader,
        optimizer=optimizer,
        model=model,
        loss=loss
    )

    valid_f1, valid_loss = evaluate(
        device=device,
        dataloader=validation_dataloader,
        model=model,
        loss=loss
    )

    print(f"\nEpoch: {epoch}")
    print(f"Train F1-Score: {train_f1:1.6f}")
    print(f"Train Loss: {train_loss:1.6f}")
    print(f"Validation F1-Score: {valid_f1:1.6f}")
    print(f"Validation Loss: {valid_loss:1.6f}\n")

    # updating learning rate
    if not scheduler is None:
        scheduler.step()

Epoch: 1/200

Epoch: 1
Train F1-Score: 0.292975
Train Loss: 0.690344
Validation F1-Score: 0.294643
Validation Loss: 0.745476

Epoch: 2/200

Epoch: 2
Train F1-Score: 0.292975
Train Loss: 0.664315
Validation F1-Score: 0.294643
Validation Loss: 0.738709

Epoch: 3/200

Epoch: 3
Train F1-Score: 0.292975
Train Loss: 0.655119
Validation F1-Score: 0.294643
Validation Loss: 0.738622

Epoch: 4/200

Epoch: 4
Train F1-Score: 0.292975
Train Loss: 0.669426
Validation F1-Score: 0.294643
Validation Loss: 0.742769

Epoch: 5/200

Epoch: 5
Train F1-Score: 0.292975
Train Loss: 0.660821
Validation F1-Score: 0.294643
Validation Loss: 0.736576

Epoch: 6/200

Epoch: 6
Train F1-Score: 0.292975
Train Loss: 0.665350
Validation F1-Score: 0.294643
Validation Loss: 0.737303

Epoch: 7/200

Epoch: 7
Train F1-Score: 0.292975
Train Loss: 0.668242
Validation F1-Score: 0.294643
Validation Loss: 0.735579

Epoch: 8/200

Epoch: 8
Train F1-Score: 0.292975
Train Loss: 0.667920
Validation F1-Score: 0.294643
Validation Loss: 0.

KeyboardInterrupt: 