In [1]:
import os
import sys

PROJECT_ROOT = os.path.abspath(os.path.join(
                  os.path.dirname("test-pretrained"), 
                  os.pardir)
)
sys.path.append(PROJECT_ROOT)

import pandas as pd
import numpy as np
import torch
import json
import random
import torch.nn as nn
import torch.nn.functional as F
from copy import deepcopy
from src.features import extract_wavelet_from_raw_audio
from src.utils import feature_extraction_pipeline, read_features_files, choose_model, read_feature, pad_features
from src.models.utils import SaveBestModel, weight_init
from src.models.cnn3 import *
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import StepLR
from typing import Dict, Tuple, List, Union, Iterable
from sklearn.metrics import classification_report

# making sure the experiments are reproducible
seed = 2109
random.seed(seed)
np.random.seed(seed)
torch.cuda.manual_seed(seed)
os.environ["PYTHONHASHSEED"] = str(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

def seed_worker(worker_id: int):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

g = torch.Generator()
g.manual_seed(seed)

<torch._C.Generator at 0x7f0f500d6c10>

In [2]:
def train(
    model: nn.Module,
    dataloader: DataLoader,
    optimizer: torch.optim.Adam,
    loss: torch.nn.CrossEntropyLoss,
    device: torch.device
) -> Tuple[float, float]:
    """
    Function responsible for the model training.

    Args:
        model (nn.Module): the created model.
        dataloader (DataLoader): the training dataloader.
        optimizer (torch.optim.Adam): the optimizer used.
        loss (torch.nn.CrossEntropyLoss): the loss function used.
        device (torch.device): which device to use.

    Returns:
        Tuple[float, float]: the training f1 and loss, respectively.
    """
    model.train()
    predictions = []
    targets = []
    train_loss = 0.0
    
    for index, (batch) in enumerate(dataloader, start=1):
        data = batch["features"].to(device)
        target = batch["labels"].to(device)
        optimizer.zero_grad()
        
        data = data.to(dtype=torch.float32)
        target = target.to(dtype=torch.float32)
        
        output = model(data)

        l = loss(output['clipwise_output'], target)
        train_loss += l.item()
        
        l.backward()
        optimizer.step()
        
        prediction = output['clipwise_output'].argmax(dim=-1, keepdim=True).to(dtype=torch.int)
        prediction = prediction.detach().cpu().numpy()
        predictions.extend(prediction.tolist())
        
        target = target.argmax(dim=-1, keepdim=True).to(dtype=torch.int)
        target = target.detach().cpu().numpy()
        targets.extend(target.tolist())
        
    train_loss = train_loss/index
    train_f1 = classification_report(
        targets,
        predictions,
        digits=6,
        output_dict=True,
        zero_division=0.0
    )
    train_f1 = train_f1["macro avg"]["f1-score"]
    return train_f1, train_loss

def evaluate(
    model: nn.Module,
    dataloader: DataLoader,
    loss: torch.nn.CrossEntropyLoss,
    device: torch.device
) -> Tuple[float, float]:
    """
    Function responsible for the model evaluation.

    Args:
        model (nn.Module): the created model.
        dataloader (DataLoader): the validaiton dataloader.
        loss (torch.nn.CrossEntropyLoss): the loss function used.
        device (torch.device): which device to use.

    Returns:
        Tuple[float, float]: the validation f1 and loss, respectively.
    """
    model.eval()
    predictions = []
    targets = []
    validation_loss = 0.0
    validation_f1 = []
    
    with torch.inference_mode():
        for index, (batch) in enumerate(dataloader):
            data = batch["features"].to(device)
            target = batch["labels"].to(device)

            data = data.to(dtype=torch.float32)
            target = target.to(dtype=torch.float32)
                        
            output = model(data)
            
            l = loss(output['clipwise_output'], target)
            validation_loss += l.item()
            
            prediction = output['clipwise_output'].argmax(dim=-1, keepdim=True).to(dtype=torch.int)
            prediction = prediction.detach().cpu().numpy()
            predictions.extend(prediction.tolist())
            
            target = target.argmax(dim=-1, keepdim=True).to(dtype=torch.int)
            target = target.detach().cpu().numpy()
            targets.extend(target.tolist())
    
    validation_loss = validation_loss/index
    validation_f1 = classification_report(
        targets,
        predictions,
        digits=6,
        output_dict=True,
        zero_division=0.0
    )
    validation_f1 = validation_f1["macro avg"]["f1-score"]
    return validation_f1, validation_loss

In [3]:
class Custom_Dataset(Dataset):
    def __init__(
        self,
        X: torch.Tensor,
        y: torch.Tensor,
        feature_config: Dict,
        wavelet_config: Dict,
        data_augmentation_config: Union[Dict, None],
        training: bool,
        data_augment_target: Union[str, None]
    ) -> None:
        self.X = X
        self.y = y
        self.feature_config = feature_config
        self.wavelet_config = wavelet_config
        self.data_augmentation_config = data_augmentation_config
        self.training = training
        self.data_augment_target = data_augment_target
        
    def __len__(self):
        return len(self.y)
        
    def __getitem__(
        self,
        index: int
    ) -> Dict:
        batch = {}
        audio = deepcopy(self.X[index, :, :])
        
        if self.data_augment_target is not None:
            if self.y[index].argmax(dim=-1, keepdim=False).item() in self.data_augment_target and self.training and \
                self.data_augmentation_config["mode"] == "raw_audio":
                audio = _apply_augmentation_raw_audio(
                    audio=audio,
                    data_augmentation_config=self.data_augmentation_config,
                    feature_config=self.feature_config
                )
        
        assert audio.ndim == 2 and audio.shape[0] == 1
                
        if self.feature_config["name"] == "mel_spectrogram":
            feat = extract_melspectrogram(
                audio=audio,
                sample_rate=self.feature_config["sample_rate"],
                n_fft=self.feature_config["n_fft"],
                hop_length=self.feature_config["hop_length"],
                n_mels=self.feature_config["n_mels"]
            )
        elif self.feature_config["name"] == "mfcc":
            feat = extract_mfcc(
                audio=audio,
                sample_rate=self.feature_config["sample_rate"],
                n_fft=self.feature_config["n_fft"],
                hop_length=self.feature_config["hop_length"],
                n_mfcc=self.feature_config["n_mfcc"]
            )
        
        assert feat.ndim == 3 and feat.shape[0] == 1

        if self.data_augment_target is not None:
            if self.y[index].argmax(dim=-1, keepdim=False).item() in self.data_augment_target and self.training and \
                self.data_augmentation_config["mode"] == "feature":
                feat = _apply_augmentation_feature(
                    audio=feat,
                    data_augmentation_config=self.data_augmentation_config,
                    feature_config=self.feature_config
                )
        
        assert feat.ndim == 3 and feat.shape[0] == 1
        
        feat = feat.permute(0, 2, 1) # time and frequency axis permutation
        
        X, _ = extract_wavelet_from_spectrogram(
            spectrogram=feat.squeeze(0),
            wavelet=self.wavelet_config["name"],
            maxlevel=self.wavelet_config["level"],
            type=self.wavelet_config["type"],
            mode=self.wavelet_config["mode"]
        )

        assert X.ndim == 2

        batch["features"] = X.unsqueeze(0)
        batch["labels"] = self.y[index]
        return batch
    
def create_dataloader(
    X: torch.Tensor,
    y: torch.Tensor,
    batch_size: int,
    wavelet_config: Dict,
    worker_init_fn: Iterable,
    generator: torch.Generator,
    num_workers: int = 0,
    shuffle: bool = True,
    training: bool = True
) -> DataLoader:
    
    # creating the custom dataset
    dataset = Custom_Dataset(
        X=X,
        y=y,
        wavelet_config=wavelet_config,
        training=training
    )
    
    # creating the dataloader
    dataloader = DataLoader(
        dataset,
        batch_size=batch_size,
        num_workers=num_workers,
        shuffle=shuffle,
        drop_last=False,
        worker_init_fn=worker_init_fn,
        generator=generator
    )
    
    return dataloader

In [4]:
class Transfer_Cnn6(nn.Module):
    def __init__(self, sample_rate, window_size, hop_size, mel_bins, fmin, 
        fmax, classes_num, freeze_base):
        """Classifier for a new task using pretrained Cnn14 as a sub module.
        """
        super(Transfer_Cnn6, self).__init__()
        audioset_classes_num = 527
        
        self.base = Cnn6(sample_rate, window_size, hop_size, mel_bins, fmin, 
            fmax, audioset_classes_num)

        # Transfer to another task layer
        self.fc_transfer = nn.Linear(512, classes_num, bias=True)

        if freeze_base:
            # Freeze AudioSet pretrained layers
            for param in self.base.parameters():
                param.requires_grad = False

        self.init_weights()

    def init_weights(self):
        init_layer(self.fc_transfer)

    def load_from_pretrain(self, pretrained_checkpoint_path):
        checkpoint = torch.load(pretrained_checkpoint_path)
        self.base.load_state_dict(checkpoint['model'])

    def forward(self, input, mixup_lambda=None):
        """Input: (batch_size, data_length)
        """
        output_dict = self.base(input, mixup_lambda)
        embedding = output_dict['embedding']
        clipwise_output = torch.log_softmax(self.fc_transfer(embedding), dim=-1)
        output_dict['clipwise_output'] = clipwise_output
 
        return output_dict

In [6]:
features_path = "../features/propor2022/"

# loading training features
X_train = read_feature(path=features_path, fold="0", name="X_train.pth")
y_train = read_feature(path=features_path, fold="0", name="y_train.pth")
print(f"Train: {X_train.shape}, {y_train.shape}")

# loading validation features
X_valid = read_feature(path=features_path, fold="0", name="X_valid.pth")
y_valid = read_feature(path=features_path, fold="0", name="y_valid.pth")
print(f"Valid: {X_valid.shape}, {y_valid.shape}")

# loading testing features
X_test = read_feature(path=features_path, fold=None, name="X_test.pth")
y_test = read_feature(path=features_path, fold=None, name="y_test.pth")
print(f"Test: {X_test.shape}, {y_test.shape}")

Train: torch.Size([500, 1, 128000]), torch.Size([500, 3])
Valid: torch.Size([125, 1, 128000]), torch.Size([125, 3])
Test: torch.Size([308, 1, 128000]), torch.Size([308, 3])


In [7]:
# reading the parameters configuration file
params = json.load(open("../config/mode_2.json", "r"))

if params["mode"] != "mode_3":
    feature_config = params["feature"]
else:
    feature_config = {}

sample_rate = 32000
window_size = 1024
hop_size = 320
mel_bins = 64
fmin = 0
fmax = 32000
freeze_base = False
pretrained_checkpoint_path = "/home/greca/Downloads/Cnn6_mAP=0.343.pth"
pretrain = False
classes_num = 3

data_augmentation_config = None
dataset = params["dataset"]
wavelet_config = params["wavelet"]
model_config = params["model"]
mode = params["mode"]
device = torch.device("cpu" if torch.cuda.is_available and model_config["use_gpu"] else "cpu")
feat_path = os.path.join(params["output_path"], params["dataset"])

model = CNN3_Mode1(sample_rate, window_size, hop_size, mel_bins, fmin, fmax, classes_num)

# Load pretrained model
if pretrain:
    print('Load pretrained model from {}'.format(pretrained_checkpoint_path))
    model.load_from_pretrain(pretrained_checkpoint_path)

model.to(device)

print('Load pretrained model successfully!')

NameError: name 'Cnn6' is not defined

In [None]:
# creating and defining the model

optimizer = torch.optim.Adam(
    params=model.parameters(),
    lr=model_config["learning_rate"],
    betas=(0.9, 0.98),
    eps=1e-9
)
loss = torch.nn.CrossEntropyLoss()

# creating the training dataloader
training_dataloader = create_dataloader(
    X=X_train,
    y=y_train,
    wavelet_config=wavelet_config,
    num_workers=0,
    shuffle=False,
    training=True,
    batch_size=model_config["batch_size"],
    worker_init_fn=seed_worker,
    generator=g
)

# creating the validation dataloader
validation_dataloader = create_dataloader(
    X=X_valid,
    y=y_valid,
    wavelet_config=wavelet_config,
    num_workers=0,
    shuffle=False,
    training=False,
    batch_size=model_config["batch_size"],
    worker_init_fn=seed_worker,
    generator=g
)

train_f1, train_loss = train(
    device=device,
    dataloader=training_dataloader,
    optimizer=optimizer,
    model=model,
    loss=loss
)

valid_f1, valid_loss = evaluate(
    device=device,
    dataloader=validation_dataloader,
    model=model,
    loss=loss
)

# training loop
for epoch in range(1, model_config["epochs"] + 1):
    print(f"Epoch: {epoch}/{model_config['epochs']}")

    train_f1, train_loss = train(
        device=device,
        dataloader=training_dataloader,
        optimizer=optimizer,
        model=model,
        loss=loss
    )

    valid_f1, valid_loss = evaluate(
        device=device,
        dataloader=validation_dataloader,
        model=model,
        loss=loss
    )

    print(f"\nEpoch: {epoch}")
    print(f"Train F1-Score: {train_f1:1.6f}")
    print(f"Train Loss: {train_loss:1.6f}")
    print(f"Validation F1-Score: {valid_f1:1.6f}")
    print(f"Validation Loss: {valid_loss:1.6f}\n")