### Split the data into train set and test set AND VALIDATION SET !


In [6]:
import os
import random
import torch

from torch.utils.data import DataLoader, Subset
from src import DAPSAudioDataset_with_multiple_cropping

# Create class_mapping
audio_dir = "precomputed_spectrograms_aug"
class_1_speakers = ["f1", "f7", "f8", "m3", "m6", "m8"]

class_mapping = {}
for root, dirs, files in os.walk(audio_dir):
    for file in files:
        audio_path = os.path.join(root, file)
        try:
            speaker_prefix = file.split("_")[0]
            if speaker_prefix in class_1_speakers:
                class_mapping[audio_path] = 1
            else:
                class_mapping[audio_path] = 0
        except Exception as e:
            print(f"Error processing {audio_path}: {e}")

print(f"Class mapping created with {len(class_mapping)} items.")

RANDOM_SEED = 42
random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)

full_dataset = DAPSAudioDataset_with_multiple_cropping(
    class_mapping=class_mapping, num_crops=3
)

validation_ids = []
train_test_ids = []

for idx in range(len(full_dataset)):
    path, label = full_dataset.extended_mapping[idx]
    filename = os.path.basename(path)
    filename_without_ext = os.path.splitext(filename)[0]
    if "script1" in filename_without_ext and "aug" not in filename_without_ext:
        validation_ids.append(idx)
    else:
        train_test_ids.append(idx)


validation_dataset = Subset(full_dataset, validation_ids)


def get_train_test_loaders(dataset, test_split=0.2, batch_size=64):
    indices = list(range(len(dataset)))
    random.shuffle(indices)

    split_idx = int(len(indices) * (1 - test_split))
    train_ids = indices[:split_idx]
    test_ids = indices[split_idx:]

    train_loader = DataLoader(
        Subset(dataset, train_ids),
        batch_size=batch_size,
        shuffle=True,
    )
    test_loader = DataLoader(
        Subset(dataset, test_ids),
        batch_size=batch_size,
        shuffle=True,
    )
    return train_loader, test_loader


train_loader, test_loader = get_train_test_loaders(Subset(full_dataset, train_test_ids))

validation_loader = DataLoader(validation_dataset, batch_size=16, shuffle=False)

for batch in train_loader:
    inputs, labels = batch
    print(f"Train Inputs shape: {inputs.shape}")
    print(f"Train Labels: {labels}")
    break

for batch in test_loader:
    inputs, labels = batch
    print(f"Test Inputs shape: {inputs.shape}")
    print(f"Test Labels: {labels}")
    break

for batch in validation_loader:
    inputs, labels = batch
    print(f"Validation Inputs shape: {inputs.shape}")
    print(f"Validation Labels: {labels}")
    break

Class mapping created with 2400 items.
Train Inputs shape: torch.Size([64, 1, 64, 64])
Train Labels: tensor([1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0,
        1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0,
        1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1])
Test Inputs shape: torch.Size([64, 1, 64, 64])
Test Labels: tensor([1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0,
        1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1])
Validation Inputs shape: torch.Size([16, 1, 64, 64])
Validation Labels: tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])


In [7]:
print(len(train_loader))

79


In [8]:
print(len(train_loader))

79


In [9]:
len(test_loader)

20

### The model


In [10]:
from src import Net 

In [11]:
import torch



freq_bins = 64
time_steps = 64

model = Net(freq_bins, time_steps)
print(model)


# Test forward pass

test_input = torch.randn(1, 1, 64, 64)  # Simulated random input
output = model(test_input)

print(output.shape)

Net(
  (conv1): Conv2d(1, 60, kernel_size=(5, 5), stride=(1, 1))
  (bn1): BatchNorm2d(60, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(60, 160, kernel_size=(5, 5), stride=(1, 1))
  (bn2): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(160, 320, kernel_size=(3, 3), stride=(1, 1))
  (bn3): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=8000, out_features=120, bias=True)
  (dropout1): Dropout(p=0.2, inplace=False)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (dropout2): Dropout(p=0.3, inplace=False)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)
torch.Size([1, 10])


### Train the model

In [14]:
# AVOID YOUR COMPUTER TO CRASH
import torch

torch.set_num_threads(4) 

In [15]:
import multiprocessing


print("Nombre de cœurs physiques:", multiprocessing.cpu_count())


logical_cores = torch.get_num_threads()
print("Nombre de cœurs logiques (threads disponibles):", logical_cores)


Nombre de cœurs physiques: 8
Nombre de cœurs logiques (threads disponibles): 4


In [None]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from src import Net

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
freq_bins = 64
time_steps = 64


model = Net(freq_bins, time_steps).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)  


scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)


num_epochs = 30

for epoch in range(num_epochs):
    print(f"Epoch {epoch + 1}/{num_epochs}, start training...")
    model.train()
    running_loss = 0.0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)

        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    avg_loss = running_loss / len(train_loader)
    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {avg_loss:.4f}")
    current_lr = optimizer.param_groups[0]['lr']
    print(f"Current learning rate: {current_lr:.6f}")

    
    scheduler.step() 
    

torch.save(
    model.state_dict(),
    f"simple_cnn_model_reduced_first_layer_decaying_lr{freq_bins}x{time_steps}.pth",
)
print("Model saved successfully.")

Epoch 1/30, start training...


  spectrogram_tensor = torch.load(spectrogram_path)


Epoch [1/30], Loss: 0.7781
Current learning rate: 0.001000
Epoch 2/30, start training...
Epoch [2/30], Loss: 0.6426
Current learning rate: 0.001000
Epoch 3/30, start training...
Epoch [3/30], Loss: 0.5010
Current learning rate: 0.001000
Epoch 4/30, start training...
Epoch [4/30], Loss: 0.3969
Current learning rate: 0.001000
Epoch 5/30, start training...
Epoch [5/30], Loss: 0.3590
Current learning rate: 0.001000
Epoch 6/30, start training...
Epoch [6/30], Loss: 0.3312
Current learning rate: 0.001000
Epoch 7/30, start training...
Epoch [7/30], Loss: 0.2988
Current learning rate: 0.001000
Epoch 8/30, start training...
Epoch [8/30], Loss: 0.2930
Current learning rate: 0.001000
Epoch 9/30, start training...
Epoch [9/30], Loss: 0.2836
Current learning rate: 0.001000
Epoch 10/30, start training...
Epoch [10/30], Loss: 0.2681
Current learning rate: 0.001000
Epoch 11/30, start training...
Epoch [11/30], Loss: 0.2434
Current learning rate: 0.000100
Epoch 12/30, start training...
Epoch [12/30], L

### Evaluate Model

In [17]:
import torch
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score

from src import Net
freq_bins = 64
time_steps = 64

model = Net(freq_bins, time_steps)
model.load_state_dict(
    torch.load(
        f"simple_cnn_model_reduced_first_layer_decaying_lr{freq_bins}x{time_steps}.pth"
    )
)
print("the model is loaded")
model.eval()

all_labels = []
all_predictions = []

occurence = 0
with torch.no_grad():
    length = len(test_loader)
    for inputs, labels in test_loader:
        print(f"start testing occurence {occurence + 1}/{length}")
        occurence += 1
        inputs, labels = inputs.to(device), labels.to(device)

        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)

        all_labels.extend(labels.cpu().numpy())
        all_predictions.extend(predicted.cpu().numpy())

f1 = f1_score(all_labels, all_predictions, average="binary")
precision = precision_score(all_labels, all_predictions, average="binary")
recall = recall_score(all_labels, all_predictions, average="binary")
accuracy = accuracy_score(all_labels, all_predictions)

# Print metrics
print(f"F1-Score: {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"Accuracy: {accuracy:.4f}")

  torch.load(
  spectrogram_tensor = torch.load(spectrogram_path)


the model is loaded
start testing occurence 1/20
start testing occurence 2/20
start testing occurence 3/20
start testing occurence 4/20
start testing occurence 5/20
start testing occurence 6/20
start testing occurence 7/20
start testing occurence 8/20
start testing occurence 9/20
start testing occurence 10/20
start testing occurence 11/20
start testing occurence 12/20
start testing occurence 13/20
start testing occurence 14/20
start testing occurence 15/20
start testing occurence 16/20
start testing occurence 17/20
start testing occurence 18/20
start testing occurence 19/20
start testing occurence 20/20
F1-Score: 0.9027
Precision: 0.9707
Recall: 0.8435
Accuracy: 0.8865


In [18]:
import torch
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score

from src import Net

freq_bins = 64
time_steps = 64

model = Net(freq_bins, time_steps)
model.load_state_dict(
    torch.load(
        f"simple_cnn_model_reduced_first_layer_decaying_lr{freq_bins}x{time_steps}.pth"
    )
)
print("The model is loaded.")
model.eval()

all_labels = []
all_predictions = []

with torch.no_grad():
    length = len(validation_loader)
    for occurence, (inputs, labels) in enumerate(validation_loader, 1):
        print(f"Start testing occurrence {occurence}/{length}")
        inputs, labels = inputs.to(device), labels.to(device)

        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)

        all_labels.extend(labels.cpu().numpy())
        all_predictions.extend(predicted.cpu().numpy())

# Calculate metrics
f1 = f1_score(all_labels, all_predictions, average="binary")
precision = precision_score(all_labels, all_predictions, average="binary")
recall = recall_score(all_labels, all_predictions, average="binary")
accuracy = accuracy_score(all_labels, all_predictions)

# Print metrics
print(f"F1-Score: {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"Accuracy: {accuracy:.4f}")

  torch.load(
  spectrogram_tensor = torch.load(spectrogram_path)


The model is loaded.
Start testing occurrence 1/57
Start testing occurrence 2/57
Start testing occurrence 3/57
Start testing occurrence 4/57
Start testing occurrence 5/57
Start testing occurrence 6/57
Start testing occurrence 7/57
Start testing occurrence 8/57
Start testing occurrence 9/57
Start testing occurrence 10/57
Start testing occurrence 11/57
Start testing occurrence 12/57
Start testing occurrence 13/57
Start testing occurrence 14/57
Start testing occurrence 15/57
Start testing occurrence 16/57
Start testing occurrence 17/57
Start testing occurrence 18/57
Start testing occurrence 19/57
Start testing occurrence 20/57
Start testing occurrence 21/57
Start testing occurrence 22/57
Start testing occurrence 23/57
Start testing occurrence 24/57
Start testing occurrence 25/57
Start testing occurrence 26/57
Start testing occurrence 27/57
Start testing occurrence 28/57
Start testing occurrence 29/57
Start testing occurrence 30/57
Start testing occurrence 31/57
Start testing occurrence 32