In [1]:
#Mojo of reproducibility
import torch
import random
import numpy as np

def set_seed(seed):
  #PyTorch
  torch.manual_seed(seed)
  torch.cuda.manual_seed_all(seed)
  #Numpy
  np.random.seed(seed)
  #Python_random
  random.seed(seed)
  #CuDNN (when using CUDA)
  if torch.cuda.is_available():
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.1.3 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/Users/takehararyoutarou/anaconda3/envs/mlVenv/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/Users/takehararyoutarou/anaconda3/envs/mlVenv/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/Users/takehararyoutarou/anaconda3/envs/mlVenv/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/Users/takehararyoutarou/anaconda3/envs/mlVenv/lib/python3.10/site-pa

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

from fan import FANLayer

class FAN_Classifier(nn.Module):
    def __init__(self, hidden_size, num_layers, dropout_rate):
        super().__init__()
        
        # Define a list of layers
        self.layers = nn.ModuleList()
        
        # Define the first layer
        self.layers.append(nn.Linear(173, hidden_size))
        
        # Define the intermediate hidden layers
        for _ in range(num_layers - 2):
            self.layers.append(FANLayer(hidden_size, hidden_size))
        
        # Final layer to output
        self.emo_output_layer = nn.Linear(hidden_size, 6)
        self.strength_output_layer = nn.Linear(hidden_size, 3)

        # Dropout layer
        self.dropout = nn.Dropout(dropout_rate)

        # Initialize weights
        self.init_weights()

    def init_weights(self):
        for layer in self.modules():
            if isinstance(layer, nn.Linear):
                nn.init.xavier_uniform_(layer.weight)
                if layer.bias is not None:
                    nn.init.zeros_(layer.bias)

    def forward(self,x):
        # Pass through hidden layers
        for layer in self.layers:
            x = layer(x)
            x = self.dropout(x)
        # Output layers
        emo_output = self.emo_output_layer(x)
        strength_output = self.strength_output_layer(x)
                
        return emo_output, strength_output

In [3]:
# Create dataset
from torch.utils.data import Dataset

class voiceDataset(Dataset):
    def __init__(self, features, emotionLabels, strengthLabels):
        self.features = features
        self.emotionLabels = emotionLabels
        self.strengthLabels = strengthLabels

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return {'features':self.features[idx], 
                'emotionLabel':self.emotionLabels[idx], 
                'strengthLabel':self.strengthLabels[idx]
        }   


In [4]:
import pandas as pd
df = pd.read_csv('featuresAndLabels.csv')

In [5]:
# Cut corresponding columns of df into features and labels
# Turn them into tensors
features = df.iloc[:, 1:174].values
features = torch.tensor(features, dtype=torch.float32)

emotionLabels = df.iloc[:,174:180].values
emotionLabels = torch.tensor(emotionLabels, dtype=torch.float32)

strengthLabels = df.iloc[:,180:183].values
strengthLabels = torch.tensor(strengthLabels, dtype=torch.float32)

In [6]:
# Create datasets
# train, validate, test = 8:1:1
train_size = int(0.8 * len(df))
val_size = int(0.05 * len(df))
test_size = len(df) - train_size - val_size

print(train_size, val_size, test_size)

train_dataset = voiceDataset(features[:train_size], 
                             emotionLabels[:train_size], 
                             strengthLabels[:train_size])

validate_dataset_1 = voiceDataset(features[train_size:train_size + val_size],
                                emotionLabels[train_size:train_size + val_size], 
                                strengthLabels[train_size:train_size + val_size])

validate_dataset_2 = voiceDataset(features[train_size + val_size:train_size + 2 * val_size],
                                emotionLabels[train_size + val_size:train_size + 2 * val_size], 
                                strengthLabels[train_size + val_size:train_size + 2 * val_size])

test_dataset = voiceDataset(features[train_size + val_size:],
                            emotionLabels[train_size + val_size:], 
                            strengthLabels[train_size + val_size:])


from torch.utils.data import DataLoader

print(len(train_dataset),len(validate_dataset_1), len(validate_dataset_2), len(test_dataset))

# Create dataLoader
trainDataLoader = DataLoader(train_dataset, batch_size=128, shuffle=True)
validationDataLoader_1 = DataLoader(validate_dataset_1,batch_size=128)
validationDataLoader_2 = DataLoader(validate_dataset_2,batch_size=128)
testDataLoader = DataLoader(test_dataset,batch_size=1)


5953 372 1117
5953 372 372 1117


In [7]:
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau

# Define the trainer & validator function
def train_validate_model(hidden_size = 0,
                         num_layers = 0,
                         dropout_rate = 0, 
                         trainDataLoader = None, 
                         validationDataLoader = None,
                         num_epochs = 0, 
                         learning_rate = 0,
                        save = False):

    set_seed(42)

    #Get the GPU as a device if available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    #Instantiate the model
    model = FAN_Classifier(hidden_size = hidden_size,
                           num_layers = num_layers,
                           dropout_rate = dropout_rate)

    # Moving the model to GPU if available
    model.to(device)

    #Prepare the optimizer
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    #Prepare the error function
    criterion = nn.CrossEntropyLoss()

    #Prepare the scheduler
    #Reduce the learning rate by 0.1 if the validation loss does not decrease for 3 epochs
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3, verbose=True)

    #Prepare the DataLoader
    train_data_loader = trainDataLoader
    validation_data_loader = validationDataLoader

    #Placeholder for minimum validation loss
    min_val_loss = float('inf')

    for epoch in range(num_epochs):
        #set the model to training mode
        model.train()

        total_loss = 0 #Placeholder for training loss per epoch
        for batch in train_data_loader:
            feature = batch['features'].to(device)
            emotionLabel = batch['emotionLabel'].to(device)
            strengthLabel = batch['strengthLabel'].to(device)

            optimizer.zero_grad()
            emotionOutput, strengthOutput = model(feature)
            #Calculate the loss for emotion head
            emo_loss = criterion(emotionOutput, emotionLabel)
            #Calculate the loss for strength head
            strength_loss = criterion(strengthOutput, strengthLabel)
            #Combine two losses to make a total loss. 
            #Put more weight on the emotion loss (7:3). Detecting emotion is more critical
            loss = 0.7*emo_loss + 0.3*strength_loss
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        #Validate the model
        model.eval() #Set the model to evaluation mode
        with torch.no_grad():
            total_samples = 0 #Total output for accuracy calculation
            emoCorrect = 0 #Amount of correct prediction for accuracy calculation
            strengthCorrect = 0
            total_val_loss = 0 #Placeholder for validation loss per epoch
            
            for batch in validation_data_loader:
                feature = batch['features'].to(device)
                emotionLabel = batch['emotionLabel'].to(device)
                strengthLabel = batch['strengthLabel'].to(device)
                #Forward pass
                emotionOutput, strengthOutput = model(feature)
                emo_loss = criterion(emotionOutput, emotionLabel)
                strength_loss = criterion(strengthOutput, strengthLabel )
                total_val_loss += 0.7*emo_loss + 0.3*strength_loss
                
                # Get predicted emotion class & target emotion class
                emo_predicted = torch.argmax(emotionOutput, dim=1)
                emo_target = torch.argmax(emotionLabel, dim=1)
                
                # Get predicted strength class & target emotion class
                strength_predicted = torch.argmax(strengthOutput,dim=1)
                strength_target = torch.argmax(strengthLabel, dim=1 )
                
                emoCorrect += (emo_predicted == emo_target).sum().item()
                strengthCorrect += (strength_predicted ==strength_target).sum().item()
                
                #Get total number of samples per epoch
                total_samples += emotionLabel.size(0)

            emo_accuracy = emoCorrect / total_samples
            strength_accuracy = strengthCorrect / total_samples


            #Print out the validation loss and accuracy per epoch
            # print(f"Epoch {epoch+1}/{num_epochs} | Training Loss: {total_loss:.4f}, Validation Loss: {total_val_loss:.4f}, Accuracy (Emotion): {emo_accuracy:.4f}, Accuracy (Strength): {strength_accuracy}")

            #pass the validation loss to the scheduler
            scheduler.step(total_val_loss)

        #If fineTuning = False, save the model with the lowest validation loss
        #Save the first epoch model just in case

            if epoch == 0:
                min_val_loss = total_val_loss #Instantiate the min_val_loss at the first epoch
                if save == True:
                    torch.save(model.state_dict(), 'bestFAN.pth')
            #Save the model if the validation loss is the lowest
            elif total_val_loss < min_val_loss:
                min_val_loss = total_val_loss
                if save == True:
                    torch.save(model.state_dict(), 'bestFAN.pth')
                    print(f"Model saved after Epoch: {epoch+1}")
                        
    #Return the minimum validation loss for hyperparameter tuning
    return min_val_loss

In [8]:
import optuna

def objective(trial):

    set_seed(42)

    #Define the hyperparameters to be tuned
    #Dimension of the hidden layer (Make it divisible by 4 to avoid shape mismatch)
    hidden_size = trial.suggest_int('hidden_size', 52, 100, step=4)
    #Number of layers
    num_layers = trial.suggest_int('num_layers', 5, 15)
    #Dropout rate for the final feedforward network [0.1, 0.5]
    dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)
    #Number of epochs [10, 30]
    num_epochs = trial.suggest_int('num_epochs', 10, 30)
    #Learning rate for the optimizer [1e-5, 1e-3]
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-3)

    result = train_validate_model(hidden_size = hidden_size,
                                  num_layers = num_layers,
                                  dropout_rate = dropout_rate,
                                  trainDataLoader = trainDataLoader,
                                  validationDataLoader = validationDataLoader_1,
                                  num_epochs = num_epochs,
                                  learning_rate = learning_rate,
                                  save = False
                                  )

    return result


  from .autonotebook import tqdm as notebook_tqdm


In [9]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)

# Print the best hyperparameters
print("Best trial:")
trial = study.best_trial
print(f"  Value (Validation Loss): {trial.value}")
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")


[I 2025-04-07 18:31:40,217] A new study created in memory with name: no-name-8f923894-36b6-4c9c-b82f-d3090916feb3
[I 2025-04-07 18:31:43,730] Trial 0 finished with value: 4.056650161743164 and parameters: {'hidden_size': 56, 'num_layers': 6, 'dropout_rate': 0.26965837296449824, 'num_epochs': 26, 'learning_rate': 0.0007554774751746104}. Best is trial 0 with value: 4.056650161743164.
[I 2025-04-07 18:31:47,447] Trial 1 finished with value: 4.060760498046875 and parameters: {'hidden_size': 84, 'num_layers': 8, 'dropout_rate': 0.237863290934139, 'num_epochs': 20, 'learning_rate': 0.0006137881479485964}. Best is trial 0 with value: 4.056650161743164.
[I 2025-04-07 18:31:50,421] Trial 2 finished with value: 4.077528476715088 and parameters: {'hidden_size': 68, 'num_layers': 9, 'dropout_rate': 0.40853717145636403, 'num_epochs': 15, 'learning_rate': 0.0006983279205018003}. Best is trial 0 with value: 4.056650161743164.
[I 2025-04-07 18:31:55,186] Trial 3 finished with value: 4.0658111572265625

Best trial:
  Value (Validation Loss): 3.8056087493896484
  Params: 
    hidden_size: 88
    num_layers: 8
    dropout_rate: 0.1152572863386475
    num_epochs: 29
    learning_rate: 0.0009371413766206002


In [10]:
print(trial.params)

{'hidden_size': 88, 'num_layers': 8, 'dropout_rate': 0.1152572863386475, 'num_epochs': 29, 'learning_rate': 0.0009371413766206002}


In [11]:
best_hidden_size = trial.params['hidden_size']
best_num_layers = trial.params['num_layers']
best_dropout_rate = trial.params['dropout_rate']
best_num_epochs = trial.params['num_epochs']
best_learning_rate = trial.params['learning_rate']

In [12]:
# Merge the training and validation dataset 1 to train the final model
final_train_dataset = voiceDataset(features[:train_size + val_size],
                                    emotionLabels[:train_size + val_size], 
                                    strengthLabels[:train_size + val_size])

final_trainDataLoader = DataLoader(final_train_dataset, batch_size=128, shuffle=True)

result = train_validate_model(hidden_size = best_hidden_size,
                                  num_layers = best_num_layers,
                                  dropout_rate = best_dropout_rate,
                                  trainDataLoader = final_trainDataLoader,
                                  #Use validationDataLoader_2 for the early stopping of the final model
                                  validationDataLoader = validationDataLoader_2,
                                  num_epochs = best_num_epochs,
                                  learning_rate = best_learning_rate,
                                  save = True
                                  )

Model saved after Epoch: 2
Model saved after Epoch: 3
Model saved after Epoch: 7
Model saved after Epoch: 8
Model saved after Epoch: 10
Model saved after Epoch: 11
Model saved after Epoch: 16
Model saved after Epoch: 17


In [13]:
set_seed(42)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

bestModel = FAN_Classifier(
                       hidden_size = best_hidden_size,
                       num_layers = best_num_layers,
                       dropout_rate = best_dropout_rate,
                       )

bestModel.load_state_dict(torch.load('bestFAN.pth',map_location=device))

bestModel.eval() #Set the bestModel to evaluation mode
bestModel.to(device)

with torch.no_grad():
  total = 0 #Total output for accuracy calculation
  emoCorrect = 0 #Amount of correct prediction for accuracy calculation
  strengthCorrect = 0
  for batch in testDataLoader:
      feature = batch['features'].to(device)
      emoLabel = batch['emotionLabel'].to(device)
      strengthLabel = batch['strengthLabel'].to(device)
      
      emoOutput, strengthOutput = bestModel(feature)
      
      emoPredicted = torch.argmax(emoOutput, dim = 1)
      emoTarget = torch.argmax(emoLabel, dim=1)
      
      strengthPredicted = torch.argmax(strengthOutput, dim=1)
      strengthTarget = torch.argmax(strengthLabel, dim=1)
      
            
      total += emoLabel.size(0)
      emoCorrect += (emoPredicted == emoTarget).sum().item()
      strengthCorrect += (strengthPredicted == strengthTarget).sum().item()
      
  print(f"Emotion Accuracy: {emoCorrect/total:.4f}")
  print(f"Strength Accuracy: {strengthCorrect/total:.4f}")

Emotion Accuracy: 0.5900
Strength Accuracy: 0.8183
