In [15]:
import pickle
import pandas as pd
import numpy as np
from Speech import Speech
import time 

import sys
sys.path.append("../")
from classify import classifier

## Flags

In [16]:
feature_type = "max" # either mean or max 
feature_layer = "middle" # either output or middle

## Config

In [17]:
# features 
train_path_dict = {
    "mean":{
        "output": "train_speech_features_layer_out_mean.pkl",
        "middle": "train_speech_features_layer_12_mean.pkl"
    }, 
    "max": {
        "output": "train_speech_features_layer_out_max.pkl",
        "middle": "train_speech_features_layer_12_max.pkl"
    }
}

test_path_dict = {
    "mean": {
        "output": "test_speech_features_layer_out_mean.pkl",
        "middle": "test_speech_features_layer_12_mean.pkl"
    }, 
    "max": {
        "output": "test_speech_features_layer_out_max.pkl",
        "middle": "test_speech_features_layer_12_max.pkl"
    }
}

# feature paths
train_feature_path = train_path_dict[feature_type][feature_layer]
test_feature_path = test_path_dict[feature_type][feature_layer]

# dfs 
csv_train = "../train.csv"
csv_test = "../test.csv"

df_train = pd.read_csv(csv_train)
df_test = pd.read_csv(csv_test)
df_train = df_train[df_train.alc_mapped!="Control"]
df_test = df_test[df_test.alc_mapped!="Control"]

# speech
speech = Speech(df_train, path_prefix="../")

## Helper function 

In [18]:
def compute_metrics(y_true, y_pred):
    
    uar = recall_score(y_true, y_pred, average="macro")
    f1 = f1_score(y_true, y_pred)
    macro_f1 = f1_score(y_true, y_pred, average="macro")
    accuracy = accuracy_score(y_true, y_pred)
    
    return {"uar": uar, "f1": f1, "macro_f1": macro_f1, "accuracy": accuracy}

## Load wav2vec2 features

In [19]:
# train
with open(train_feature_path, "rb") as file:
     train_feature_dict = pickle.load(file)

# test        
with open(test_feature_path, "rb") as file:
    test_feature_dict = pickle.load(file)

## Get X and y, X_test and y_test

In [20]:
# Train set
# get feature matrix X (N, 1024) and label vector y (N,)

N = len(df_train)
m = train_feature_dict[0].shape[0] # 1024

X = np.zeros((N, m))
y = np.zeros((N,))

for i, (index, features) in enumerate(train_feature_dict.items()):
    X[i, :] = features
    text_label = df_train.loc[index].alc_mapped # Intoxicated or Sober 
    y[i] = speech.get_label( text_label ) # 1 for Intoxicated, -1 for Sober 

In [21]:
# Test set
# X_test (N_test, 1024), y_test (N_test)

N_test = len(df_test)

X_test = np.zeros((N_test, m))
y_test = np.zeros((N_test,))

for i, (index, features) in enumerate(test_feature_dict.items()):
    X_test[i,:] = features
    y_test[i] = speech.get_label( df_test.loc[index].alc_mapped )

## Train SVM and evaluate

In [22]:
classifier(X, y, X_test, y_test)

Starting training SVM.
Training done. Time taken: 40.18 min.
Evaluation
{'uar': 0.6954545454545454, 'f1': 0.5924781040700671, 'macro_f1': 0.6973393271038006, 'accuracy': 0.7336700336700337}


1. Output layer mean features 
```
{'uar': 0.720959595959596,
 'f1': 0.628428927680798,
 'macro_f1': 0.7195511855684803,
 'accuracy': 0.7491582491582491}
```

2. Output layer max features 
```
{'uar': 0.6515151515151515, 
'f1': 0.5255157437567861, 
'macro_f1': 0.6561204877885931, 
'accuracy': 0.7057239057239058}
```

3. Middle layer mean features 
```
{'uar': 0.7631313131313131,
 'f1': 0.6835689907362262,
 'macro_f1': 0.758344022238774,
 'accuracy': 0.7814814814814814}
```

4. Middle layer max features 
```
{'uar': 0.6954545454545454, 
'f1': 0.5924781040700671, 
'macro_f1': 0.6973393271038006, 
'accuracy': 0.7336700336700337}
```