### Split the data into train set and test set

In [10]:
from src import DAPSAudioDataset_with_cropping
import os
from torch.utils.data import DataLoader, Subset

import torch

audio_dir = "daps"

class_1_speakers = ["f1", "f7", "f8", "m3", "m6", "m8"]

class_mapping = {}

for root, dirs, files in os.walk('precomputed_spectrograms_aug'):
    for file in files:
        audio_path = os.path.join(root, file)
        try:
            speaker_prefix = file.split("_")[0]
            if speaker_prefix in class_1_speakers:
                class_mapping[audio_path] = 1
            else:
                class_mapping[audio_path] = 0
        except Exception as e:
            print(f"Error processing {audio_path}: {e}")

print("Class mapping created:")

class_0_count = sum(1 for label in class_mapping.values() if label == 0)
class_1_count = sum(1 for label in class_mapping.values() if label == 1)
print(f"Total Class 0 samples: {class_0_count}")
print(f"Total Class 1 samples: {class_1_count}")
print("Preparing the dataset...")

full_dataset = DAPSAudioDataset_with_cropping(class_mapping=class_mapping)

test_ids =[]
train_ids =[]
filenames = set()
for idx in range(len(full_dataset)):
    path, label = full_dataset.class_mapping[idx]
    if path is not None:
        filename = os.path.basename(path)
            
        filename_without_ext = os.path.splitext(filename)[0]
        parts = filename_without_ext.split('_')
            
        group = parts[0]
        place = ""
        if 'script1' in filename_without_ext:
            if 'aug' not in filename_without_ext:
                filenames.add(place)
                test_ids.append(idx)
            else: 
                continue
        else:
            train_ids.append(idx) 
print(len(test_ids))
print(len(train_ids))

print("Dataset created")

train_dataset = Subset(full_dataset, train_ids)
test_dataset = Subset(full_dataset, test_ids)

def collate_fn(batch):
    batch = [item for item in batch if item is not None]
    if len(batch) == 0:
        return None
    return torch.utils.data.dataloader.default_collate(batch)


train_loader = DataLoader(
    train_dataset, batch_size=64, shuffle=True, collate_fn=collate_fn
)

test_loader = DataLoader(
    test_dataset, batch_size=16, shuffle=True, collate_fn=collate_fn
)

for batch in train_loader:
    inputs, labels = batch
    print(f"Train Inputs shape: {inputs.shape}")
    print(f"Train Labels: {labels}")
    break

for batch in test_loader:
    inputs, labels = batch
    print(f"Test Inputs shape: {inputs.shape}")
    print(f"Test Labels: {labels}")
    break

Class mapping created:
Total Class 0 samples: 1050
Total Class 1 samples: 1350
Preparing the dataset...
300
1920
Dataset created
Train Inputs shape: torch.Size([64, 1, 64, 64])
Train Labels: tensor([0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1,
        0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0,
        1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0])
Test Inputs shape: torch.Size([16, 1, 64, 64])
Test Labels: tensor([1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0])


In [2]:
print(len(train_loader))

30


In [3]:
len(test_loader)

19

### The model


In [4]:
from src import Net 

In [5]:
import torch



freq_bins = 64
time_steps = 64

model = Net(freq_bins, time_steps)
print(model)


# Test forward pass

test_input = torch.randn(1, 1, 64, 64)  # Simulated random input
output = model(test_input)

print(output.shape)

Net(
  (conv1): Conv2d(1, 60, kernel_size=(5, 5), stride=(1, 1))
  (bn1): BatchNorm2d(60, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(60, 160, kernel_size=(5, 5), stride=(1, 1))
  (bn2): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(160, 320, kernel_size=(3, 3), stride=(1, 1))
  (bn3): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=8000, out_features=120, bias=True)
  (dropout1): Dropout(p=0.2, inplace=False)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (dropout2): Dropout(p=0.3, inplace=False)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)
torch.Size([1, 10])


### Train the model

In [6]:
### reduce the train set to few samples for faster training
### only for tests/ reduces the efficiency of the model
from torch.utils.data import DataLoader, Subset


subset_dataset = Subset(train_dataset, list(range(20)))

train_loader = DataLoader(subset_dataset, batch_size=1, shuffle=False)

In [6]:
len(train_loader)

30

In [7]:
# AVOID YOUR COMPUTER TO CRASH
import torch

torch.set_num_threads(4) 

In [8]:
import multiprocessing


print("Nombre de cœurs physiques:", multiprocessing.cpu_count())


logical_cores = torch.get_num_threads()
print("Nombre de cœurs logiques (threads disponibles):", logical_cores)


Nombre de cœurs physiques: 8
Nombre de cœurs logiques (threads disponibles): 4


In [9]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from src import Net

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
freq_bins = 64
time_steps = 64


model = Net(freq_bins, time_steps).to(device)  
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)  

num_epochs = 50

for epoch in range(num_epochs):
    print(f"Epoch {epoch + 1}/{num_epochs}, start training...")
    model.train()  
    running_loss = 0.0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)  

        optimizer.zero_grad()

        outputs = model(inputs)

        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()  

    avg_loss = running_loss / len(train_loader)
    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {avg_loss:.4f}")
freq_bins = 64
time_steps = 64

torch.save(model.state_dict(), f"simple_cnn_model_reduced_first_layer{freq_bins}x{time_steps}.pth")
print("Model saved successfully.")

Epoch 1/50, start training...
Epoch [1/50], Loss: 0.9231
Epoch 2/50, start training...


KeyboardInterrupt: 

### Evaluate Model

In [13]:
import torch
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score

from src import Net
freq_bins = 64
time_steps = 64

model = Net(freq_bins, time_steps)
model.load_state_dict(torch.load(f"simple_cnn_model_reduced_first_layer{freq_bins}x{time_steps}.pth"))
print("the model is loaded")
model.eval()

all_labels = []
all_predictions = []

occurence = 0
with torch.no_grad():
    length = len(test_loader)
    for inputs, labels in test_loader:
        print(f"start testing occurence {occurence + 1}/{length}")
        occurence += 1
        inputs, labels = inputs.to(device), labels.to(device)

        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)

        all_labels.extend(labels.cpu().numpy())
        all_predictions.extend(predicted.cpu().numpy())

f1 = f1_score(all_labels, all_predictions, average="binary")
precision = precision_score(all_labels, all_predictions, average="binary")
recall = recall_score(all_labels, all_predictions, average="binary")
accuracy = accuracy_score(all_labels, all_predictions)

# Print metrics
print(f"F1-Score: {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"Accuracy: {accuracy:.4f}")

  model.load_state_dict(torch.load(f"simple_cnn_model_reduced_first_layer{freq_bins}x{time_steps}.pth"))


RuntimeError: Error(s) in loading state_dict for Net:
	Missing key(s) in state_dict: "bn1.weight", "bn1.bias", "bn1.running_mean", "bn1.running_var", "bn2.weight", "bn2.bias", "bn2.running_mean", "bn2.running_var", "conv3.weight", "conv3.bias", "bn3.weight", "bn3.bias", "bn3.running_mean", "bn3.running_var". 
	size mismatch for conv1.weight: copying a param with shape torch.Size([6, 1, 5, 5]) from checkpoint, the shape in current model is torch.Size([60, 1, 5, 5]).
	size mismatch for conv1.bias: copying a param with shape torch.Size([6]) from checkpoint, the shape in current model is torch.Size([60]).
	size mismatch for conv2.weight: copying a param with shape torch.Size([16, 6, 5, 5]) from checkpoint, the shape in current model is torch.Size([160, 60, 5, 5]).
	size mismatch for conv2.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([160]).
	size mismatch for fc1.weight: copying a param with shape torch.Size([120, 2704]) from checkpoint, the shape in current model is torch.Size([120, 8000]).