In [None]:
#Mojo of reproducibility
import torch
import random
import numpy as np

def set_seed(seed):
  #PyTorch
  torch.manual_seed(seed)
  torch.cuda.manual_seed_all(seed)
  #Numpy
  np.random.seed(seed)
  #Python_random
  random.seed(seed)
  #CuDNN (when using CUDA)
  if torch.cuda.is_available():
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

from fan import FANLayer

class FAN_Classifier(nn.Module):
    def __init__(self, hidden_size, num_layers, dropout_rate):
        super().__init__()
        
        # Define a list of layers
        self.layers = nn.ModuleList()
        
        # Define the first layer
        self.layers.append(FANLayer(173, hidden_size))
        
        # Define the intermediate hidden layers
        for _ in range(num_layers - 2):
            self.layers.append(FANLayer(hidden_size, hidden_size))
        
        # Final layer to output
        self.emo_output_layer = FANLayer(hidden_size, 6)
        self.strength_output_layer = FANLayer(hidden_size, 3)

        # Dropout layer
        self.dropout = nn.Dropout(dropout_rate)

        # Initialize weights
        self.init_weights()

    def init_weights(self):
        for layer in self.modules():
            if isinstance(layer, nn.Linear):
                nn.init.xavier_uniform_(layer.weight)
                if layer.bias is not None:
                    nn.init.zeros_(layer.bias)

    def forward(self,x):
        # Pass through hidden layers
        for layer in self.layers:
            x = layer(x)
            x = self.dropout(x)
        # Output layers
        emo_output = self.emo_output_layer(x)
        strength_output = self.strength_output_layer(x)
        
        return emo_output, strength_output

In [None]:
# Create dataset
from torch.utils.data import Dataset

class voiceDataset(Dataset):
    def __init__(self, features, emotionLabels, strengthLabels):
        self.features = features
        self.emotionLabels = emotionLabels
        self.strengthLabels = strengthLabels

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return {'features':self.features[idx], 
                'emotionLabel':self.emotionLabels[idx], 
                'strengthLabel':self.strengthLabels[idx]
        }   


In [None]:
import pandas as pd
df = pd.read_csv('featuresAndLabels.csv')

In [None]:
# Cut corresponding columns of df into features and labels
# Turn them into tensors
features = df.iloc[:, 1:174].values
features = torch.tensor(features, dtype=torch.float32)

emotionLabels = df.iloc[:,174:180].values
emotionLabels = torch.tensor(emotionLabels, dtype=torch.float32)

strengthLabels = df.iloc[:,180:183].values
strengthLabels = torch.tensor(strengthLabels, dtype=torch.float32)

In [None]:
# Create datasets
# train, validate, test = 8:1:1
train_size = int(0.8 * len(df))
val_size = int(0.05 * len(df))
test_size = len(df) - train_size - val_size

print(train_size, val_size, test_size)

train_dataset = voiceDataset(features[:train_size], 
                             emotionLabels[:train_size], 
                             strengthLabels[:train_size])

validate_dataset_1 = voiceDataset(features[train_size:train_size + val_size],
                                emotionLabels[train_size:train_size + val_size], 
                                strengthLabels[train_size:train_size + val_size])

validate_dataset_2 = voiceDataset(features[train_size + val_size:train_size + 2 * val_size],
                                emotionLabels[train_size + val_size:train_size + 2 * val_size], 
                                strengthLabels[train_size + val_size:train_size + 2 * val_size])

test_dataset = voiceDataset(features[train_size + val_size:],
                            emotionLabels[train_size + val_size:], 
                            strengthLabels[train_size + val_size:])


from torch.utils.data import DataLoader

print(len(train_dataset),len(validate_dataset_1), len(validate_dataset_2), len(test_dataset))

# Create dataLoader
trainDataLoader = DataLoader(train_dataset, batch_size=128, shuffle=True)
validationDataLoader_1 = DataLoader(validate_dataset_1,batch_size=128)
validationDataLoader_2 = DataLoader(validate_dataset_2,batch_size=128)
testDataLoader = DataLoader(test_dataset,batch_size=1)


5953 372 1117
5953 372 372 1117


In [None]:
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau

# Define the trainer & validator function
def train_validate_model(hidden_size = 0,
                         num_layers = 0,
                         dropout_rate = 0, 
                         trainDataLoader = None, 
                         validationDataLoader = None,
                         num_epochs = 0, 
                         learning_rate = 0,
                        save = False):

    set_seed(42)

    #Get the GPU as a device if available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    #Instantiate the model
    model = FAN_Classifier(hidden_size = hidden_size,
                           num_layers = num_layers,
                           dropout_rate = dropout_rate)

    # Moving the model to GPU if available
    model.to(device)

    #Prepare the optimizer
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    #Prepare the error function
    criterion = nn.CrossEntropyLoss()

    #Prepare the scheduler
    #Reduce the learning rate by 0.1 if the validation loss does not decrease for 3 epochs
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3, verbose=True)

    #Prepare the DataLoader
    train_data_loader = trainDataLoader
    validation_data_loader = validationDataLoader

    #Placeholder for minimum validation loss
    min_val_loss = float('inf')

    for epoch in range(num_epochs):
        #set the model to training mode
        model.train()

        total_loss = 0 #Placeholder for training loss per epoch
        for batch in train_data_loader:
            feature = batch['features'].to(device)
            emotionLabel = batch['emotionLabel'].to(device)
            strengthLabel = batch['strengthLabel'].to(device)

            optimizer.zero_grad()
            emotionOutput, strengthOutput = model(feature)
            #Calculate the loss for emotion head
            emo_loss = criterion(emotionOutput, emotionLabel)
            #Calculate the loss for strength head
            strength_loss = criterion(strengthOutput, strengthLabel)
            #Combine two losses to make a total loss. 
            #Put more weight on the emotion loss (7:3). Detecting emotion is more critical
            loss = 0.7*emo_loss + 0.3*strength_loss
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        #Validate the model
        model.eval() #Set the model to evaluation mode
        with torch.no_grad():
            total_samples = 0 #Total output for accuracy calculation
            emoCorrect = 0 #Amount of correct prediction for accuracy calculation
            strengthCorrect = 0
            total_val_loss = 0 #Placeholder for validation loss per epoch
            
            for batch in validation_data_loader:
                feature = batch['features'].to(device)
                emotionLabel = batch['emotionLabel'].to(device)
                strengthLabel = batch['strengthLabel'].to(device)
                #Forward pass
                emotionOutput, strengthOutput = model(feature)
                emo_loss = criterion(emotionOutput, emotionLabel)
                strength_loss = criterion(strengthOutput, strengthLabel )
                total_val_loss += 0.7*emo_loss + 0.3*strength_loss
                
                # Get predicted emotion class & target emotion class
                emo_predicted = torch.argmax(emotionOutput, dim=1)
                emo_target = torch.argmax(emotionLabel, dim=1)
                
                # Get predicted strength class & target emotion class
                strength_predicted = torch.argmax(strengthOutput,dim=1)
                strength_target = torch.argmax(strengthLabel, dim=1 )
                
                emoCorrect += (emo_predicted == emo_target).sum().item()
                strengthCorrect += (strength_predicted ==strength_target).sum().item()
                
                #Get total number of samples per epoch
                total_samples += emotionLabel.size(0)

            emo_accuracy = emoCorrect / total_samples
            strength_accuracy = strengthCorrect / total_samples


            #Print out the validation loss and accuracy per epoch
            # print(f"Epoch {epoch+1}/{num_epochs} | Training Loss: {total_loss:.4f}, Validation Loss: {total_val_loss:.4f}, Accuracy (Emotion): {emo_accuracy:.4f}, Accuracy (Strength): {strength_accuracy}")

            #pass the validation loss to the scheduler
            scheduler.step(total_val_loss)

        #If fineTuning = False, save the model with the lowest validation loss
        #Save the first epoch model just in case

            if epoch == 0:
                min_val_loss = total_val_loss #Instantiate the min_val_loss at the first epoch
                if save == True:
                    torch.save(model.state_dict(), 'bestFAN.pth')
            #Save the model if the validation loss is the lowest
            elif total_val_loss < min_val_loss:
                min_val_loss = total_val_loss
                if save == True:
                    torch.save(model.state_dict(), 'bestFAN.pth')
                    print(f"Model saved after Epoch: {epoch+1}")
                        

    #Return the minimum validation loss for hyperparameter tuning
    return min_val_loss

In [None]:
import optuna

def objective(trial):

    set_seed(42)

    #Define the hyperparameters to be tuned
    #Dimension of the hidden layer 
    hidden_size = trial.suggest_int('hidden_size', 50, 100)
    #Number of layers
    num_layers = trial.suggest_int('num_layers', 5, 15)
    #Dropout rate for the final feedforward network [0.1, 0.5]
    dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)
    #Number of epochs [10, 30]
    num_epochs = trial.suggest_int('num_epochs', 10, 30)
    #Learning rate for the optimizer [1e-5, 1e-3]
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-3)

    result = train_validate_model(hidden_size = hidden_size,
                                  num_layers = num_layers,
                                  dropout_rate = dropout_rate,
                                  trainDataLoader = trainDataLoader,
                                  validationDataLoader = validationDataLoader_1,
                                  num_epochs = num_epochs,
                                  learning_rate = learning_rate,
                                  save = False
                                  )

    return result


In [None]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)

# Print the best hyperparameters
print("Best trial:")
trial = study.best_trial
print(f"  Value (Validation Loss): {trial.value}")
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")


[I 2025-04-07 00:36:48,863] A new study created in memory with name: no-name-52781dc2-e955-42ba-b504-2e5c049dee09
[I 2025-04-07 00:36:50,571] Trial 0 finished with value: 16.203439712524414 and parameters: {'hidden_size': 81, 'num_layers': 13, 'dropout_rate': 0.3186025795635002, 'num_epochs': 10, 'learning_rate': 0.0009372543121883424}. Best is trial 0 with value: 16.203439712524414.
[I 2025-04-07 00:36:51,774] Trial 1 finished with value: 16.690824508666992 and parameters: {'hidden_size': 59, 'num_layers': 6, 'dropout_rate': 0.4103210523909471, 'num_epochs': 16, 'learning_rate': 0.000448334064670082}. Best is trial 0 with value: 16.203439712524414.
[I 2025-04-07 00:36:54,255] Trial 2 finished with value: 15.99310302734375 and parameters: {'hidden_size': 53, 'num_layers': 9, 'dropout_rate': 0.16027067541665907, 'num_epochs': 24, 'learning_rate': 0.0006852418339067508}. Best is trial 2 with value: 15.99310302734375.
[W 2025-04-07 00:36:55,402] Trial 3 failed with parameters: {'hidden_si

KeyboardInterrupt: 

In [None]:
print(trial.params)

{'hidden_size': 63, 'num_layers': 9, 'dropout_rate': 0.10326922793453225, 'num_epochs': 20, 'learning_rate': 0.0009911930030429713}


In [None]:
best_hidden_size = trial.params['hidden_size']
best_num_layers = trial.params['num_layers']
best_dropout_rate = trial.params['dropout_rate']
best_num_epochs = trial.params['num_epochs']
best_learning_rate = trial.params['learning_rate']

In [None]:
# Merge the training and validation dataset 1 to train the final model
final_train_dataset = voiceDataset(features[:train_size + val_size],
                                    emotionLabels[:train_size + val_size], 
                                    strengthLabels[:train_size + val_size])

final_trainDataLoader = DataLoader(final_train_dataset, batch_size=128, shuffle=True)

result = train_validate_model(hidden_size = best_hidden_size,
                                  num_layers = best_num_layers,
                                  dropout_rate = best_dropout_rate,
                                  trainDataLoader = final_trainDataLoader,
                                  #Use validationDataLoader_2 for the early stopping of the final model
                                  validationDataLoader = validationDataLoader_2,
                                  num_epochs = best_num_epochs,
                                  learning_rate = best_learning_rate,
                                  save = True
                                  )

In [None]:
set_seed(42)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

bestModel = FAN_Classifier(
                       hidden_size = best_hidden_size,
                       num_layers = best_num_layers,
                       dropout_rate = best_dropout_rate,
                       )

bestModel.load_state_dict(torch.load('bestFAN.pth',map_location=device))

bestModel.eval() #Set the bestModel to evaluation mode
bestModel.to(device)

with torch.no_grad():
  total = 0 #Total output for accuracy calculation
  emoCorrect = 0 #Amount of correct prediction for accuracy calculation
  strengthCorrect = 0
  for batch in testDataLoader:
      feature = batch['feature'].to(device)
      emoLabel = batch['emotionLabel'].to(device)
      strengthLabel = batch['strengthLabel'].to(device)
      
      emoOutput, strengthOutput = bestModel(feature)
      
      emoPredicted = torch.argmax(emoOutput, dim = 1)
      emoTarget = torch.argmax(emoLabel, dim=1)
      
      strengthPredicted = torch.argmax(strengthOutput, dim=1)
      strengthTarget = torch.argmax(strengthLabel, dim=1)
      
            
      total += emoLabel.size(0)
      emoCorrect += (emoPredicted == emoTarget).sum().item()
      strengthCorrect += (strengthPredicted == strengthTarget).sum().item()
      
  print(f"Emotion Accuracy: {emoCorrect/total:.4f}")
  print(f"Strength Accuracy: {strengthCorrect/total:.4f}")

NameError: name 'best_hidden_size' is not defined