In [1]:
from __future__ import division
from torch.optim.lr_scheduler import ReduceLROnPlateau
import numpy as np
import pandas as pd
from timeit import default_timer
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, Dataset, Subset
import torch
import torch.nn.functional as F
import torch.nn as nn
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
import os
import random
import argparse
from tqdm import tqdm
import librosa
from sklearn.metrics import balanced_accuracy_score, accuracy_score
from torchsummary import summary
from sklearn.model_selection import KFold
from torchvision.models import densenet121



os.environ['CUDA_VISIBLE_DEVICES'] ='0'

In [2]:
EPOCHS = 20
SEED = 2024
BATCH_SIZE = 32
TEST_SPLIT_RATIO = 0.25
N_FFT = 256
HOP_LEN = 256 // 6
AUGM = True
# Creating the results directory
if not os.path.exists('results'):
    os.makedirs('results')
RESULTS_FILENAME = "./results/inrun_results" # _x.csv
VALID_RESULTS_FILENAME = "./results/valid_results" # _x.csv
TRAIN_RESULTS_FILENAME = "./results/train_results" # _x.csv
BEST_MODEL_FILENAME = "./results/best-model" # _x.pt
DIV_FACTOR = 5.
FINAL_DIV_FACTOR = 10.
WEIGHT_DECAY = 0.005
LEARNING_RATE = 0.0005
EVAL_FREQ=4

In [3]:
TRAIN_DATASET = "../data/train_whales.csv"
TEST_DATASET = "../data/test_whales.csv"

In [4]:
# Fixing the seeds
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
random.seed(SEED)
np.random.seed(SEED)

print(f"Cuda is available: {torch.cuda.is_available()}")
dev_names = [torch.cuda.get_device_name(i) for i in range(torch.cuda.device_count())]
print(f"Device: {dev_names}")
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

Cuda is available: True
Device: ['NVIDIA GeForce RTX 3090']


# Loading the data

In [5]:
target_names = ["no-whale","whale"]
target_names_dict = {target_names[i]: i for i in range(len(target_names))}

In [6]:
t_s = default_timer()
data_train = pd.read_csv(TRAIN_DATASET,sep=",")
columns = data_train.columns
data_train[columns[-1]]=data_train[columns[-1]].replace(target_names_dict)
data_train = data_train.values
data_train_labels = data_train[:,-1].reshape(-1)
data_train_labels = data_train_labels.astype(int)
data_train = data_train[:,:-1]
t_e = default_timer()

print(f"Data loading - Elapsed time: {t_e-t_s:.2f}s")

Data loading - Elapsed time: 5.56s


In [7]:
data_train.shape

(10316, 4000)

In [8]:
kf = KFold(n_splits=5, shuffle=True, random_state=SEED)

In [9]:
def random_data_shift(data, u=1.0):
    if np.random.random() < u:
        shift = int(round(np.random.uniform(-len(data)*0.25, len(data)*0.25)))
        data = np.roll(data, shift)
    return data

class AugmentedSTFTDataset(Dataset):
    def __init__(self, inputs, targets, n_fft, hop_length, augment=False):
        self.inputs = inputs
        self.targets = targets
        self.n_fft = n_fft
        self.hop_length = hop_length
        self.augment = augment

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        
        sample = self.inputs[idx]
        
        if self.augment:
            sample = random_data_shift(sample)
        
        data = librosa.stft(sample, n_fft=self.n_fft, hop_length=self.hop_length)
        data = librosa.amplitude_to_db(np.abs(data), ref=np.max)
        data = np.flipud(data)  # Flip vertically
        data = data.copy()  
        data = np.expand_dims(data, axis=-1)  # Add channel dimension
        data = np.transpose(data, (2, 0, 1))  # Reorder dimensions to match PyTorch expectations
        return torch.FloatTensor(data), torch.LongTensor([self.targets[idx]])

# Data loader
def create_dataloader(inputs, targets, batch_size, n_fft, hop_length, shuffle=True, augment=False):
    dataset = AugmentedSTFTDataset(inputs, targets, n_fft, hop_length, augment=augment)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)
    return loader

In [10]:
test = np.arange(20)
print(test.shape, test)
test_out = random_data_shift(test)
print(test_out.shape,test_out)

(20,) [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]
(20,) [18 19  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17]


# Model

In [11]:
class Densenet121(nn.Module):
    def __init__(self):
        super().__init__()
        self.densenet121 = densenet121().features
        self.densenet121.conv0 = torch.nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.classifier = torch.nn.Linear(1024, 1)
        
    def forward(self, x):
        out = self.densenet121.forward(x)
        out = torch.nn.functional.avg_pool2d(out, kernel_size = out.shape[2:], stride= out.shape[2:], padding=0, count_include_pad = False)
        out = self.classifier(out.view(out.shape[0], -1))
        return out

In [12]:
model = Densenet121()
model.to(device)

Densenet121(
  (densenet121): Sequential(
    (conv0): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
       

# Utilities

In [13]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [14]:
def evaluate(model, iterator, criterion, device):

    epoch_loss = 0.0
    epoch_acc = 0.0

    model.eval()
    number_of_elements = 0
    
    correct_pred = torch.zeros(2)
    total_pred = torch.zeros(2)


    with torch.no_grad():
        
        y_true = []
        y_pred = []
        for x, y in iterator:

            x = x.to(device)
            y = y.float().to(device).view(-1,1)
            
            batch_size = x.shape[0]
            number_of_elements += batch_size
            
            pred = model(x).view(-1,1)
            loss = criterion(pred, y)
            
            top_pred = (torch.sigmoid(pred) > 0.5).int()
            acc = top_pred.eq(y.int().view_as(top_pred)).sum()

            epoch_loss += loss.item()
            epoch_acc += acc.item()
            
            y_true.append(y.int().cpu().numpy())
            y_pred.append(top_pred.cpu().numpy())
            
        y_true_a = np.concatenate(y_true, axis=0)
        y_pred_a = np.concatenate(y_pred, axis=0)
                        
        #balanced_acc = balanced_accuracy_score(y_true_a, y_pred_a)
        acc = accuracy_score(y_true_a, y_pred_a)

    return epoch_loss / number_of_elements, acc

# Training

In [15]:

for split_num, (train_index, valid_index) in enumerate(kf.split(data_train)):
    for model_num in range(5): 
        X_train, X_valid = data_train[train_index], data_train[valid_index]
        y_train, y_valid = data_train_labels[train_index], data_train_labels[valid_index]
        
        
        
        train_loader = create_dataloader(X_train, y_train, batch_size=BATCH_SIZE, n_fft=N_FFT, hop_length=HOP_LEN, shuffle=True, augment=AUGM)
        valid_loader = create_dataloader(X_valid, y_valid, batch_size=BATCH_SIZE, n_fft=N_FFT, hop_length=HOP_LEN, shuffle=False, augment=False)
        
        EVAL_FREQ_ = len(train_loader)//EVAL_FREQ + 1
        
        
        model = Densenet121()
        model.to(device)
        
        if len(dev_names)>1:
            model = torch.nn.DataParallel(model)
        model.to(device)
        print(f"Number of the parameters: {count_parameters(model)}\n")
        
        optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
        
        criterion = torch.nn.BCEWithLogitsLoss(reduction="sum").to(device)
        scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=LEARNING_RATE, div_factor=DIV_FACTOR, final_div_factor=FINAL_DIV_FACTOR, steps_per_epoch=len(train_loader), epochs = EPOCHS, verbose=0)
        
    
        train_accs = []
        train_losses = []
        valid_accs = []
        valid_losses = []
    
    
    
        f = open(f"{RESULTS_FILENAME}_{split_num}_{model_num}.csv", "w")
        f.write(160*"-"+"\n")
        f.write(f"Device: {dev_names[0]} | Number: {len(dev_names)}\n")
        f.write(f"Epochs: {EPOCHS}\n")
        f.write(f"Optimizer: {type (optimizer).__name__}\n") 
        f.write(f"Scheduler: {type (scheduler).__name__}\n") 
        f.write(f"Div factor: {DIV_FACTOR}\n") 
        f.write(f"Final div factor: {FINAL_DIV_FACTOR}\n") 
        f.write(f"Weight decay: {WEIGHT_DECAY}\n") 
        f.write(f"Learning rate: {LEARNING_RATE}\n") 
        f.write(f"Number of the parameters: {count_parameters(model)}\n")
        f.write(f"Model: {model}\n")
        f.write(160*"-"+"\n")
        f.close()
        print("Training")
        print(5 * "-" + f"{split_num:5}"+f"{model_num:5}" + 4*" "+ 160 * "-")
    
        best_valid_loss = float('inf')
        best_valid_acc = -1.0
        valid_acc = 0.0
    
        all_time_s = 0.0
        lr = 0.0
    
        train_accs = []
        train_losses = []
        valid_accs = []
        valid_losses = []
        valid_indices = []
    
        # Training the `split_num`-th model
        for epoch in range(EPOCHS):
    
            start_time = default_timer()
    
            epoch_loss = 0.0
            epoch_acc = 0.0
    
            model.train()
    
            batch_id = 0
            number_of_training_elements = 0
    
            valid_accs_temp = []
            valid_losses_temp = []
            valid_indices_temp = []
    
            for x, y in train_loader:
                x, y = x.to(device), y.float().to(device).view(-1,1)
    
                optimizer.zero_grad()
    
                y_pred = model(x)
                
                loss = criterion(y_pred, y)
                
                batch_size = x.shape[0]
                number_of_training_elements += batch_size
    
                loss.backward()
                optimizer.step()
                
    
                end_time = default_timer()
    
                # Evaluating the model
                if (batch_id+1)%EVAL_FREQ_==0:
    
                    valid_indices_temp.append(batch_id+1)
                    valid_loss, valid_acc = evaluate(model, valid_loader, criterion, device)
    
                    valid_losses_temp.append(valid_loss)
                    valid_accs_temp.append(valid_acc)
    
                    if valid_acc > best_valid_acc:
                        best_valid_acc = valid_acc
                        torch.save(model.state_dict(), f"{BEST_MODEL_FILENAME}_{split_num}_{model_num}.pt")
    
                    if valid_loss < best_valid_loss:
                        best_valid_loss = valid_loss
    
                    lr = scheduler.get_last_lr()[0]
    
                    line = f'\t | Epoch: {epoch+1:03} | Batch Id: {batch_id+1:05} | ET: {end_time-start_time:.2f}s | lr: {lr:.2e} | Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}% | B. Val. Loss: {best_valid_loss:.3f} |  B. Val. Acc: {best_valid_acc*100:.2f}%'
                    print(line)
                    f = open(f"{RESULTS_FILENAME}_{split_num}_{model_num}.csv", "a")
                    f.write(line+"\n")
                    f.close()
    
    
    
                batch_id+=1
                scheduler.step()
    
            valid_indices_temp.append(batch_id)
            valid_loss, valid_acc = evaluate(model, valid_loader, criterion, device)
    
            valid_losses_temp.append(valid_loss)
            valid_accs_temp.append(valid_acc)
    
            valid_losses.append(valid_losses_temp)
            valid_accs.append(valid_accs_temp)
    
            valid_indices.append(valid_indices_temp)
    
            if valid_acc > best_valid_acc:
                best_valid_acc = valid_acc
                torch.save(model.state_dict(), f"{BEST_MODEL_FILENAME}_{split_num}_{model_num}.pt")
    
            if valid_loss < best_valid_loss:
                best_valid_loss = valid_loss
    
            train_loss, train_acc = evaluate(model, train_loader, criterion, device)
    
            end_time = default_timer()
    
            all_time_s += end_time - start_time
    
            train_losses.append(train_loss)
            train_accs.append(train_acc)
    
            line = f'Epoch: {epoch+1:03} | ET: {end_time-start_time:.2f}s | \t Train Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}% \t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}% \t | B. Val. Loss: {best_valid_loss:.3f} |  B. Val. Acc: {best_valid_acc*100:.2f}%'
            print(line)
            print(160*"-")
    
            f = open(f"{RESULTS_FILENAME}_{split_num}_{model_num}.csv", "a")
            f.write(line+"\n")
            f.write(160*"-"+"\n")
            f.close()
    
        line = f"\nDuration: {all_time_s:.2f}s\n"
        f = open(f"{RESULTS_FILENAME}_{split_num}_{model_num}.csv", "a")
        f.write(line+"\n")
        f.write(80*"-"+"\n")
        f.close()
    
        # Saving the results for analyzing them later in the evaluation part
        valid_losses_plot = []
        valid_accs_plot = []
        epoch_plot = []
        for epoch in range(len(valid_accs)):
            valid_accs_temp = valid_accs[epoch]
            valid_losses_temp = valid_losses[epoch]
            valid_indices_temp = valid_indices[epoch]
            ind = 0
            for mini_batch_id in valid_indices_temp:
                epoch_plot.append(epoch + mini_batch_id/len(train_loader))
                valid_accs_plot.append(valid_accs_temp[ind]*100)
                valid_losses_plot.append(valid_losses_temp[ind])
                ind += 1
    
        valid_results = pd.DataFrame({"epoch":epoch_plot,
                      "valid_loss":valid_losses_plot,
                      "valid_acc":valid_accs_plot
                      })
    
        valid_results.to_csv(f"{VALID_RESULTS_FILENAME}_{split_num}_{model_num}.csv",sep=";",index=False)
        train_accs = [acc*100 for acc in train_accs]
        train_results = pd.DataFrame({"epoch":list(np.arange(1,EPOCHS+1,1)),
                      "train_loss":train_losses,
                      "train_acc":train_accs
                      })
        train_results.to_csv(f"{TRAIN_RESULTS_FILENAME}_{split_num}_{model_num}.csv",sep=";",index=False)

Number of the parameters: 6948609

Training
-----    0    0    ----------------------------------------------------------------------------------------------------------------------------------------------------------------
	 | Epoch: 001 | Batch Id: 00065 | ET: 7.94s | lr: 1.02e-04 | Val. Loss: 0.437 |  Val. Acc: 81.20% | B. Val. Loss: 0.437 |  B. Val. Acc: 81.20%
	 | Epoch: 001 | Batch Id: 00130 | ET: 16.46s | lr: 1.07e-04 | Val. Loss: 0.418 |  Val. Acc: 80.91% | B. Val. Loss: 0.418 |  B. Val. Acc: 81.20%
	 | Epoch: 001 | Batch Id: 00195 | ET: 24.78s | lr: 1.15e-04 | Val. Loss: 0.422 |  Val. Acc: 81.25% | B. Val. Loss: 0.418 |  B. Val. Acc: 81.25%
Epoch: 001 | ET: 49.70s | 	 Train Loss: 0.348 | Train Acc: 84.17% 	 Val. Loss: 0.335 |  Val. Acc: 84.64% 	 | B. Val. Loss: 0.335 |  B. Val. Acc: 84.64%
----------------------------------------------------------------------------------------------------------------------------------------------------------------
	 | Epoch: 002 | Batch Id: 00

In [16]:
pd.DataFrame(y_train).value_counts()

1    4163
0    4090
Name: count, dtype: int64

In [17]:
pd.DataFrame(y_valid).value_counts()

0    1068
1     995
Name: count, dtype: int64