Installing packages

In [1]:
!pip install datasets --quiet
!pip install torch --quiet
!pip install keras --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m542.0/542.0 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m388.9/388.9 kB[0m [31m14.5 MB/s[0m eta [36m0:00:00[0m
[?25h

Importing relevant packages

In [2]:
import os
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import pandas as pd
from tqdm import tqdm

Load data for training

In [3]:
np.random.seed(42)  # Set the seed for NumPy random number generation

def load_data(base_dir, layer):
    embeddings = []
    labels = []
    label_map = {}
    current_label = 0

    # Iterate over each speaker's directory
    for speaker_dir in os.listdir(base_dir):
        # Build the path to the specific layer for the current speaker
        layer_dir = os.path.join(base_dir, speaker_dir, layer)

        if os.path.isdir(layer_dir):
            # Load all .npy files in this layer directory
            for file_name in os.listdir(layer_dir):
                if file_name.endswith('.npy'):
                    path = os.path.join(layer_dir, file_name)
                    embedding = np.load(path)
                    embeddings.append(embedding)

                    # Map speaker to a label if not already done
                    if speaker_dir not in label_map:
                        label_map[speaker_dir] = current_label
                        current_label += 1

                    # Append the label for each embedding
                    labels.append(label_map[speaker_dir])

    # Convert list of embeddings and labels to numpy arrays
    embeddings = np.array(embeddings)
    labels = np.array(labels)
    return embeddings, labels


Creating model

In [4]:
class CNN(nn.Module):
    def __init__(self, num_classes):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=(5, 5), padding=(2, 2))
        self.pool = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
        self.conv2 = nn.Conv2d(32, 64, kernel_size=(3, 3), padding=(1, 1))
        self.conv3 = nn.Conv2d(64, 128, kernel_size=(3, 3), padding=(1, 1))
        self.dropout = nn.Dropout(0.5)
        self.fc1 = nn.Linear(128 * 12 * 128, 128)  # Adjust the flattened size according to your input shape
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = F.relu(self.conv3(x))
        x = self.pool(x)
        x = x.view(-1, 128 * 12 * 128)  # Flatten the tensor for the fully connected layer
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x


Compiling the model

In [5]:
model = CNN(num_classes=10)  # Change num_classes as per your dataset
optimizer = optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()

Function to evaluate model

In [6]:
def evaluate_model(model, loader, device):
    y_true = []
    y_pred = []
    model.eval()
    with torch.no_grad():
        for inputs, targets in loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            y_pred.extend(predicted.cpu().numpy())
            y_true.extend(targets.cpu().numpy())

    accuracy = accuracy_score(y_true, y_pred)
    precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')
    return accuracy, precision, recall, f1


Function to train und evaluate all layers

In [10]:
def train_and_evaluate(base_dir, layers, num_classes, device, epochs=30):
    results = []
    for layer in layers:
        embeddings, labels = load_data(base_dir, layer)
        dataset = TensorDataset(torch.from_numpy(embeddings), torch.from_numpy(labels))
        train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

        model = CNN(num_classes=num_classes).to(device)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.001)

        epoch_data = {'epoch': [], 'train_loss': [], 'val_accuracy': [], 'val_precision': [], 'val_recall': [], 'val_f1': []}

        for epoch in range(epochs):  # Adjust epochs if necessary
            model.train()
            total_loss = 0
            with tqdm(train_loader, desc=f"Layer: {layer}, Epoch: {epoch+1}", unit="batch") as t:
                for inputs, targets in t:
                    inputs, targets = inputs.to(device), targets.to(device)
                    optimizer.zero_grad()
                    outputs = model(inputs)
                    loss = criterion(outputs, targets)
                    loss.backward()
                    optimizer.step()
                    total_loss += loss.item()
                    t.set_postfix(loss=loss.item())

            # Evaluation on the validation set
            accuracy, precision, recall, f1 = evaluate_model(model, train_loader, device)

            # Store metrics for each epoch
            epoch_data['epoch'].append(epoch)
            epoch_data['train_loss'].append(total_loss / len(train_loader))
            epoch_data['val_accuracy'].append(accuracy)
            epoch_data['val_precision'].append(precision)
            epoch_data['val_recall'].append(recall)
            epoch_data['val_f1'].append(f1)

        # Store final results
        results.append((layer, accuracy, precision, recall, f1))
        # Convert epoch data to DataFrame and save to CSV
        df = pd.DataFrame(epoch_data)
        df.to_csv(f"{layer}_training_progress.csv", index=False)

    return results

Save results as CSV

In [11]:
def save_final_results(results):
    df_results = pd.DataFrame(results, columns=['Layer', 'Accuracy', 'Precision', 'Recall', 'F1-Score'])
    df_results.to_csv("final_results.csv", index=False)

Conducting training and evaluation

In [None]:
from google.colab import drive
drive.mount('/content/drive')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

base_dir = '/content/drive/My Drive/new_speaker_identification/clips__test/'
# layers = ['layer_0_processed', 'layer_5_processed', 'layer_10_processed', 'layer_20_processed', 'layer_24_processed']
layers = ['layer_0_processed', 'layer_5_processed']
num_classes = 25
results = train_and_evaluate(base_dir, layers, num_classes, device)
save_final_results(results)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
