In [None]:
import numpy as np
import torch
import torch.nn as nn
from IPython.display import Audio
import matplotlib.pyplot as plt
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import StratifiedKFold
from random import randint, uniform, choice
from sklearn.decomposition import PCA

In [None]:
sampling_rate = 8_000
languages = ["de", "en", "es", "fr", "nl", "pt"]
language_dict = {languages[i]: i for i in range(len(languages))}

X_train, y_train = np.load("inputs_train_fp16.npy"), np.load(
    "targets_train_int8.npy"
)
X_test, y_test = np.load("inputs_test_fp16.npy"), np.load(
    "targets_test_int8.npy"
)
X_train, X_test = X_train.astype(np.float32), X_test.astype(np.float32)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

X_train = torch.from_numpy(X_train).to(device)
X_test = torch.from_numpy(X_test).to(device)

y_train = torch.from_numpy(y_train).to(device)
y_test = torch.from_numpy(y_test).to(device)

In [None]:
# Define normalization function
class Normalization(nn.Module):
    def __init__(self, mean, std):
        super(Normalization, self).__init__()
        self.mean = torch.tensor(mean).view(1, -1)
        self.std = torch.tensor(std).view(1, -1)

    def forward(self, x):
        return (x - self.mean) / self.std
    
# Calculate mean and standard deviation of the training data
mean = torch.tensor(X_train.mean(axis=0), dtype=torch.float32)
std = torch.tensor(X_train.std(axis=0), dtype=torch.float32)

# Define the normalization layer using the calculated mean and standard deviation
normalization = Normalization(mean, std)

In [None]:
# Define the LanguageClassifier model
class LanguageClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, dropout_prob, normalization=normalization):
        super(LanguageClassifier, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.normalization = normalization
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Sequential(
            nn.Linear(hidden_size, 64),
            nn.ReLU(),
            nn.Dropout(dropout_prob),
            nn.Linear(64, num_classes)
        )
        self.dropout = nn.Dropout(dropout_prob)

    def forward(self, x):
        x = self.normalization(x)  # Apply the normalization layer
        x = x.unsqueeze(1)
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.dropout(out)
        out = self.fc(out[:, -1, :])
        return out

### Hyperparameter tuning

In [None]:
# Set the random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)

# Set the number of output classes and batch size
num_classes = 6
batch_size = 32

# Set the input size based on the actual size of the input features
input_size = X_train.shape[1]

# Define the random search parameters and ranges
num_trials = 10
hidden_size_range = (16, 32, 64, 128)
num_layers_range = (1, 4)
learning_rate_range = (1e-4, 1e-2)
weight_decay_range = (1e-4, 1e-2)

# Initialize variables to store the best hyperparameters and accuracy
best_hyperparameters = None
best_accuracy = None
best_loss = None

# Define the number of folds for k-fold cross-validation
num_folds = 3
num_epochs = 15
dropout_prob = 0.5

In [None]:
# Perform random search
for trial in range(num_trials):
    # Generate random parameter values within the specified ranges
    hidden_size = choice(hidden_size_range)
    num_layers = randint(*num_layers_range)
    learning_rate = uniform(*learning_rate_range)
    weight_decay = uniform(*weight_decay_range)

    # Print the current trial and corresponding hyperparameters
    print(f"Trial: {trial + 1}/{num_trials}")
    print(f"Hyperparameters: input_size={input_size}, hidden_size={hidden_size}, num_layers={num_layers}, learning_rate={learning_rate}, weight_decay={weight_decay}")

    # Initialize lists to store fold accuracies and losses
    fold_accuracies = []
    fold_losses = []

    # Perform stratified k-fold cross-validation
    skf = StratifiedKFold(n_splits=num_folds, shuffle=True)
    for fold, (train_index, val_index) in enumerate(skf.split(X_train, y_train)):
        # Create the model and other necessary components
        model = LanguageClassifier(input_size, hidden_size, num_layers, num_classes, dropout_prob)
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model.to(device)

        # Convert data and labels to PyTorch tensors
        train_data = torch.tensor(X_train[train_index], dtype=torch.float32).to(device)
        train_labels = torch.tensor(y_train[train_index], dtype=torch.long).to(device)
        val_data = torch.tensor(X_train[val_index], dtype=torch.float32).to(device)
        val_labels = torch.tensor(y_train[val_index], dtype=torch.long).to(device)

        # Convert data and labels to PyTorch Dataset
        train_dataset = TensorDataset(train_data, train_labels)
        val_dataset = TensorDataset(val_data, val_labels)
        # Create data loaders
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=batch_size)

        # Define loss function and optimizer
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        # Train the model
        for epoch in range(num_epochs):
            model.train()
            running_loss = 0.0
            for inputs, labels in train_loader:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # Forward pass
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                # Backward pass and optimization
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                running_loss += loss.item()

            # Calculate average loss for the epoch
            avg_loss = running_loss / len(train_loader)

        # Evaluate on validation set after the last epoch
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs = inputs.to(device)
                labels = labels.to(device)
                outputs = model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        # Calculate accuracy and loss for the current fold
        accuracy = correct / total
        loss = avg_loss
        fold_accuracies.append(accuracy)
        fold_losses.append(loss)

    # Calculate the average accuracy and loss across folds for the current trial
    avg_accuracy = np.mean(fold_accuracies)
    avg_loss = np.mean(fold_losses)

    # Print average accuracy and loss for the current trial
    print(f"Average accuracy: {avg_accuracy:.4f}")
    print(f"Average loss: {avg_loss:.4f}\n")

    # Update the best hyperparameters, accuracy, and loss if necessary
    if best_accuracy is None or avg_accuracy > best_accuracy:
        best_hyperparameters = {
            "hidden_size": hidden_size,
            "num_layers": num_layers,
            "learning_rate": learning_rate,
            "weight_decay" : weight_decay
        }
        best_accuracy = avg_accuracy
        best_loss = avg_loss

In [None]:
# Print the best hyperparameters, accuracy, and loss
print("Best Hyperparameters:")
print(best_hyperparameters)
print("Best Accuracy:")
print(best_accuracy)

In [None]:
# Train the model with the best hyperparameters on the full training set and evaluate on the validation set
best_model = LanguageClassifier(input_size, best_hyperparameters["hidden_size"], best_hyperparameters["num_layers"],
                                num_classes, dropout_prob=0.5)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
best_model.to(device)

train_data = torch.tensor(X_train, dtype=torch.float32).to(device)
train_labels = torch.tensor(y_train, dtype=torch.long).to(device)
val_data = torch.tensor(X_test, dtype=torch.float32).to(device)
val_labels = torch.tensor(y_test, dtype=torch.long).to(device)

train_dataset = TensorDataset(train_data, train_labels)
val_dataset = TensorDataset(val_data, val_labels)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(best_model.parameters(), lr=best_hyperparameters["learning_rate"], weight_decay=best_hyperparameters["weight_decay"])


In [None]:
num_epochs = 40
for epoch in range(num_epochs):
    best_model.train()
    train_loss = 0
    train_correct = 0
    train_total = 0
    for inputs, labels in train_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = best_model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * batch_size
        train_total += batch_size

        # Calculate train accuracy
        predicted_labels = outputs.argmax(dim=1)
        train_correct += (predicted_labels == labels).sum().item()

    # Calculate train accuracy
    train_accuracy = train_correct / train_total * 100

    # Evaluate on the test set
    best_model.eval()
    test_correct = 0
    test_total = 0
    with torch.no_grad():
        test_output = best_model(X_test)
        _, predicted_labels = torch.max(test_output, dim=1)
        test_correct += (predicted_labels == y_test).sum().item()
        test_total += y_test.size(0)

    # Calculate test accuracy
    test_accuracy = test_correct / test_total * 100

    # Print train and test accuracy, and train loss
    print(f"Epoch {epoch}: Train Loss: {train_loss/train_total:.4f} | Train Accuracy: {train_accuracy:.4f}")
    print(f"          Test Accuracy: {test_accuracy:.4f}")


In [None]:
torch.jit.save(torch.jit.script(best_model), "model.pt")

In [None]:
X_train_tensor = torch.from_numpy(X_train.numpy())
y_train_tensor = torch.from_numpy(y_train.numpy())

model = torch.jit.load("model.pt")
model_outputs = model(X_train_tensor)
targets = y_train_tensor

# Apply PCA to the model outputs
outputs_PCA = PCA(n_components=2).fit_transform(model_outputs.detach())

# Create the scatter plot
fig, ax = plt.subplots()
scatter = ax.scatter(*outputs_PCA.T, c=targets, cmap="tab10", alpha=0.3)

# Create legend with unique colors from the scatter plot
legend1 = ax.legend(*scatter.legend_elements(), loc="lower left", title="Classes")

# Set legend labels as languages (replace with your own labels)
languages = ["de", "en", "es", "fr", "nl", "pt"]
for i, text in enumerate(legend1.get_texts()):
    text.set_text(languages[i])

plt.show()