In [1]:
from pathlib import Path
import csv
import itertools
from itertools import groupby
import h5py
import numpy as np
import sklearn
from sklearn.cluster import KMeans
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
import random
import matplotlib
from matplotlib import pyplot as plt

# Configuration & Utilities

In [2]:
random.seed(42)
COLORS = ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd", "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf"]
ANNOTATIONS_PATH = "/media/xtrem/data/experiments/nicolingua-0001-language-id/language-id-annotations/metadata.csv"
FEATURE_DIRS = [
    '/media/xtrem/data/experiments/nicolingua-0001-language-id/wav2vec_features-c',
    '/media/xtrem/data/experiments/nicolingua-0001-language-id/wav2vec_features-z',
    '/media/xtrem/data/experiments/nicolingua-0001-language-id/retrained-wav2vec_features-c',
    '/media/xtrem/data/experiments/nicolingua-0001-language-id/retrained-wav2vec_features-z'
]

TRAIN_PERCENT = .5
#TRAIN_PERCENT = .6

RESULTS_DIR = f'svm_results__split_{TRAIN_PERCENT}'

In [3]:
annotation_specification = {
    0: {
        'id': 0,
        'label': "maninka",
        'required_tags': set(['ct-speech', 'lng-maninka']),
        'forbidden_tags':  set(['lng-susu', 'lng-pular'])
    },
    1: {
        'id': 1,
        'label': "susu",
        'required_tags': set(['ct-speech', 'lng-susu']),
        'forbidden_tags':  set(['lng-maninka', 'lng-pular'])
    },
    2: {
        'id': 2,
        'label': "pular",
        'required_tags': set(['ct-speech', 'lng-pular']),
        'forbidden_tags':  set(['lng-susu', 'lng-maninka'])
    }
}

In [4]:
def to_user_friendly_feature_name(fv_name):
    name = fv_name \
        .replace("features-", "") \
        .replace("wav2vec_", "") \
        .replace("average", "avg") \
        .replace("timestep", "T") \
        .replace("c.", "Context") \
        .replace("z.", "Latent")
    return name

# Load annotations

In [5]:
def load_annotations(a_file_path, a_specification):
    with open(ANNOTATIONS_PATH) as f:
        reader = csv.DictReader(f)
        for row in reader:
            tag_set = set([t.strip() for t in row['tags'].split(";")])
            for label in annotation_specification.keys():
                spec = annotation_specification[label]
                if spec['required_tags'].issubset(tag_set):
                    if spec['forbidden_tags'].isdisjoint(tag_set):
                        yield row['file'], label
                        break

data = list(load_annotations(ANNOTATIONS_PATH, annotation_specification))
audio_files, audio_labels = zip(*data)

## Inspect label counts

In [6]:
def inspect_label_counts():
    for label in annotation_specification:
        count = len([l for l in audio_labels if l == label])
        print("{:10} ({}): {}".format(
            annotation_specification[label]['label'],
            label, 
            count
        ))
inspect_label_counts()

maninka    (0): 114
susu       (1): 32
pular      (2): 28


## Balance data

In [7]:
count_per_class = 28
data = list(load_annotations(ANNOTATIONS_PATH, annotation_specification))
balanced_data = []
for label in annotation_specification:
    balanced_data.extend([d for d in data if d[1] == label][:count_per_class])
audio_files, audio_labels = zip(*balanced_data)

In [8]:
inspect_label_counts()

maninka    (0): 28
susu       (1): 28
pular      (2): 28


# Prepare 10 cross validation folds

In [9]:

FOLD_COUNT = 10

n = len(audio_files)
n_train = int(np.ceil(n * .6))
n_test = n - n_train
all_indices = range(n)

cv_folds = {}
train_count_by_index = {i:0 for i in all_indices}
test_count_by_index = {i:0 for i in all_indices}

for fold_index in range(FOLD_COUNT):
    fold_rsampler = np.random.RandomState(seed=fold_index)
    train_index_set = set(fold_rsampler.choice(all_indices, n_train, replace=False))
    test_index_set = set(all_indices).difference(train_index_set)
        
    cv_folds[fold_index] = {
        'train_indices': sorted(list(train_index_set)),
        'test_indices': sorted(list(test_index_set)),
    }


# Load features

In [10]:
def load_features(audio_files, features_input_dir):
    id_list = []
    features_list = []

    for audio_file_name in audio_files:
        feature_file_name = audio_file_name.replace(".wav", ".h5context")
        feature_path = Path(features_input_dir) / feature_file_name
        with h5py.File(feature_path, 'r') as f:
            features_shape = f['info'][1:].astype(int)
            features = np.array(f['features'][:]).reshape(features_shape)
            # features = pool_feature_last_seq(features)
            features_list.append(features)
    return features_list

In [11]:
raw_features = {}
for feature_dir in FEATURE_DIRS:
    feature_name = Path(feature_dir).stem
    raw_features[feature_name] = load_features(audio_files, feature_dir)

## Inspect feature shapes

In [12]:
for feature_name in raw_features.keys():
    print("feature_name: {}. feature shape: {}".format(
        to_user_friendly_feature_name(feature_name),
        raw_features[feature_name][0].shape
    ))

feature_name: c. feature shape: (2998, 512)
feature_name: z. feature shape: (2998, 512)
feature_name: retrained-c. feature shape: (2998, 512)
feature_name: retrained-z. feature shape: (2998, 512)


## Extract feature vectors

In [13]:
def extract_last_timestep_features(raw_features):
    return raw_features[-1, :]

def extract_neuron_average_features(raw_features):
    return np.mean(raw_features, axis=0)

def identity(x):
    return x

feature_extractors = {
    'last_timestep': extract_last_timestep_features,
    'neuron_average': extract_neuron_average_features,
    'raw_features': identity
}

In [14]:
feature_vectors = {}
for feature_name in raw_features.keys():
    for feature_extractor_name in feature_extractors.keys():
        fv_name = f"{feature_name}__{feature_extractor_name}"
        feature_vectors[fv_name] = []
        for f in raw_features[feature_name]:
            feature_vectors[fv_name].append(
                feature_extractors[feature_extractor_name](f)
            )
            
        feature_vectors[fv_name] = np.array(feature_vectors[fv_name])

## Inspect feature vectors

In [15]:
for fvname in feature_vectors.keys():
    print(fvname, feature_vectors[fvname][0].shape)

wav2vec_features-c__last_timestep (512,)
wav2vec_features-c__neuron_average (512,)
wav2vec_features-c__raw_features (2998, 512)
wav2vec_features-z__last_timestep (512,)
wav2vec_features-z__neuron_average (512,)
wav2vec_features-z__raw_features (2998, 512)
retrained-wav2vec_features-c__last_timestep (512,)
retrained-wav2vec_features-c__neuron_average (512,)
retrained-wav2vec_features-c__raw_features (2998, 512)
retrained-wav2vec_features-z__last_timestep (512,)
retrained-wav2vec_features-z__neuron_average (512,)
retrained-wav2vec_features-z__raw_features (2998, 512)


# Classification Models

In [16]:
import torch
#import torch.nn as nn
#import torch.nn.functional as F
#from pytorch_model_summary import summary
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim

# Train Classification Models

In [17]:
def get_data_for_fold(fold_id, feature_name, batch_size):
    train_indices = cv_folds[fold_id]['train_indices']
    test_indices = cv_folds[fold_id]['test_indices']

    train_x = np.take(raw_features[feature_name], train_indices, axis=0)
    train_y = np.take(audio_labels, train_indices, axis=0)

    test_x = np.take(raw_features[feature_name], test_indices, axis=0)
    test_y = np.take(audio_labels, test_indices, axis=0)
    
    return train_x, train_y, test_x, test_y

    
def get_loaders_for_fold(fold_id, feature_name, batch_size):
    
    train_x, train_y, test_x, test_y = get_data_for_fold(fold_id, feature_name, batch_size)
    
    train_dataset = TensorDataset(
        torch.tensor(train_x), 
        torch.tensor(train_y)
    )

    train_loader = DataLoader(train_dataset, batch_size=batch_size)

    test_dataset = TensorDataset(
        torch.tensor(test_x), 
        torch.tensor(test_y)
    )

    test_loader = DataLoader(test_dataset, batch_size=batch_size)
    
    return train_loader, test_loader


def get_predictions_for_logits(logits):
    probs = F.softmax(logits, dim=1)
    return torch.argmax(probs, dim=1)

In [18]:
def pool_features(x, pooling_mechanism):
    if pooling_mechanism == "neuron_mean":
        return np.mean(x, axis=1)
    elif pooling_mechanism == "neuron_std":
        return np.std(x, axis=1)
    raise ValueError(f"Unknown pooling mechanism: {pooling_mechanism}")
    
    
def make_classifier(algorithm, C, random_state):
    if algorithm == "svm":
        classifier = SVC(kernel="poly", C=C, random_state=random_state)
        return classifier
    elif algorithm == "log_reg":
        classifier = LogisticRegression(C=C, random_state=random_state)
        return classifier
    raise ValueError(f"Unknown algorithm: {algorithm}")

    
def run_trial(fold_index, feature_name, C, pooling_mechanism, algorithm, batch_size=100):
    device = torch.device("cpu:0")
    torch.manual_seed(0)
    

    train_loader, test_loader = get_loaders_for_fold(fold_index, feature_name, batch_size)
    
    classifier = make_classifier(algorithm, C, random_state=0)


    iteration_count = 0 # should be 1
    for batch_idx, (x, y) in enumerate(train_loader):
        x = x.numpy()
        y = y.numpy()

        x_input = pool_features(x, pooling_mechanism)

        classifier.fit(x_input, y)
        y_prime = classifier.predict(x_input)

        train_loss = 0.0
        train_acc = sklearn.metrics.accuracy_score(y, y_prime)
        iteration_count += 1
        assert(iteration_count == 1)

    iteration_count = 0 # should be 1
    for batch_idx, (x, y) in enumerate(test_loader):
        x = x.numpy()
        y = y.numpy()

        x_input = pool_features(x, pooling_mechanism)

        y_prime = classifier.predict(x_input)

        test_loss = 0.0
        test_acc = sklearn.metrics.accuracy_score(y, y_prime)
        iteration_count += 1
        assert(iteration_count == 1)

    return train_acc, test_acc

In [19]:
Path(RESULTS_DIR).mkdir(exist_ok=True)
fname = f"{RESULTS_DIR}/trials.csv"

all_results = []

algorithms = ["log_reg", "svm"]
pooling_mechanisms = ["neuron_mean", "neuron_std"]
regularization_c_values = [1, 0.9, 0.8, 0.7]

trial_params = list(itertools.product(cv_folds, raw_features, regularization_c_values, pooling_mechanisms, algorithms))


with open(fname, 'w') as f:
    result_writer = csv.DictWriter(f, fieldnames = ["fold_index", "feature_name", "C", "pooling_mechanism", "algorithm", "train_acc", "test_acc"])
    result_writer.writeheader()
    
    for e in trial_params:
        fold_index, feature_name, C, pooling_mechanism, algorithm = e
        train_acc, test_acc = run_trial(fold_index, feature_name, C, pooling_mechanism, algorithm, batch_size=100)
        
        record = {
            "fold_index": fold_index, 
            "feature_name": feature_name, 
            "C": C, 
            "pooling_mechanism": pooling_mechanism, 
            "algorithm": algorithm, 
            "train_acc": train_acc, 
            "test_acc": test_acc
        }
        
        result_writer.writerow(record)
        f.flush()
        
        print(record)
    


{'fold_index': 0, 'feature_name': 'wav2vec_features-c', 'C': 1, 'pooling_mechanism': 'neuron_mean', 'algorithm': 'log_reg', 'train_acc': 0.7647058823529411, 'test_acc': 0.5151515151515151}
{'fold_index': 0, 'feature_name': 'wav2vec_features-c', 'C': 1, 'pooling_mechanism': 'neuron_mean', 'algorithm': 'svm', 'train_acc': 0.8627450980392157, 'test_acc': 0.45454545454545453}
{'fold_index': 0, 'feature_name': 'wav2vec_features-c', 'C': 1, 'pooling_mechanism': 'neuron_std', 'algorithm': 'log_reg', 'train_acc': 0.7843137254901961, 'test_acc': 0.5151515151515151}
{'fold_index': 0, 'feature_name': 'wav2vec_features-c', 'C': 1, 'pooling_mechanism': 'neuron_std', 'algorithm': 'svm', 'train_acc': 0.9411764705882353, 'test_acc': 0.48484848484848486}
{'fold_index': 0, 'feature_name': 'wav2vec_features-c', 'C': 0.9, 'pooling_mechanism': 'neuron_mean', 'algorithm': 'log_reg', 'train_acc': 0.7647058823529411, 'test_acc': 0.5151515151515151}
{'fold_index': 0, 'feature_name': 'wav2vec_features-c', 'C': 

{'fold_index': 0, 'feature_name': 'retrained-wav2vec_features-c', 'C': 0.7, 'pooling_mechanism': 'neuron_mean', 'algorithm': 'log_reg', 'train_acc': 0.6078431372549019, 'test_acc': 0.42424242424242425}
{'fold_index': 0, 'feature_name': 'retrained-wav2vec_features-c', 'C': 0.7, 'pooling_mechanism': 'neuron_mean', 'algorithm': 'svm', 'train_acc': 0.9607843137254902, 'test_acc': 0.5151515151515151}
{'fold_index': 0, 'feature_name': 'retrained-wav2vec_features-c', 'C': 0.7, 'pooling_mechanism': 'neuron_std', 'algorithm': 'log_reg', 'train_acc': 0.7058823529411765, 'test_acc': 0.45454545454545453}
{'fold_index': 0, 'feature_name': 'retrained-wav2vec_features-c', 'C': 0.7, 'pooling_mechanism': 'neuron_std', 'algorithm': 'svm', 'train_acc': 0.9607843137254902, 'test_acc': 0.6363636363636364}
{'fold_index': 0, 'feature_name': 'retrained-wav2vec_features-z', 'C': 1, 'pooling_mechanism': 'neuron_mean', 'algorithm': 'log_reg', 'train_acc': 0.6666666666666666, 'test_acc': 0.48484848484848486}
{'fo

{'fold_index': 1, 'feature_name': 'wav2vec_features-z', 'C': 0.9, 'pooling_mechanism': 'neuron_std', 'algorithm': 'svm', 'train_acc': 1.0, 'test_acc': 0.6363636363636364}
{'fold_index': 1, 'feature_name': 'wav2vec_features-z', 'C': 0.8, 'pooling_mechanism': 'neuron_mean', 'algorithm': 'log_reg', 'train_acc': 0.45098039215686275, 'test_acc': 0.36363636363636365}
{'fold_index': 1, 'feature_name': 'wav2vec_features-z', 'C': 0.8, 'pooling_mechanism': 'neuron_mean', 'algorithm': 'svm', 'train_acc': 0.5686274509803921, 'test_acc': 0.48484848484848486}
{'fold_index': 1, 'feature_name': 'wav2vec_features-z', 'C': 0.8, 'pooling_mechanism': 'neuron_std', 'algorithm': 'log_reg', 'train_acc': 0.5098039215686274, 'test_acc': 0.3333333333333333}
{'fold_index': 1, 'feature_name': 'wav2vec_features-z', 'C': 0.8, 'pooling_mechanism': 'neuron_std', 'algorithm': 'svm', 'train_acc': 1.0, 'test_acc': 0.6363636363636364}
{'fold_index': 1, 'feature_name': 'wav2vec_features-z', 'C': 0.7, 'pooling_mechanism': 

{'fold_index': 2, 'feature_name': 'wav2vec_features-c', 'C': 1, 'pooling_mechanism': 'neuron_std', 'algorithm': 'log_reg', 'train_acc': 0.7647058823529411, 'test_acc': 0.5454545454545454}
{'fold_index': 2, 'feature_name': 'wav2vec_features-c', 'C': 1, 'pooling_mechanism': 'neuron_std', 'algorithm': 'svm', 'train_acc': 0.9607843137254902, 'test_acc': 0.6060606060606061}
{'fold_index': 2, 'feature_name': 'wav2vec_features-c', 'C': 0.9, 'pooling_mechanism': 'neuron_mean', 'algorithm': 'log_reg', 'train_acc': 0.7058823529411765, 'test_acc': 0.5151515151515151}
{'fold_index': 2, 'feature_name': 'wav2vec_features-c', 'C': 0.9, 'pooling_mechanism': 'neuron_mean', 'algorithm': 'svm', 'train_acc': 0.8627450980392157, 'test_acc': 0.5757575757575758}
{'fold_index': 2, 'feature_name': 'wav2vec_features-c', 'C': 0.9, 'pooling_mechanism': 'neuron_std', 'algorithm': 'log_reg', 'train_acc': 0.7058823529411765, 'test_acc': 0.5454545454545454}
{'fold_index': 2, 'feature_name': 'wav2vec_features-c', 'C':

{'fold_index': 2, 'feature_name': 'retrained-wav2vec_features-c', 'C': 0.7, 'pooling_mechanism': 'neuron_std', 'algorithm': 'log_reg', 'train_acc': 0.5882352941176471, 'test_acc': 0.45454545454545453}
{'fold_index': 2, 'feature_name': 'retrained-wav2vec_features-c', 'C': 0.7, 'pooling_mechanism': 'neuron_std', 'algorithm': 'svm', 'train_acc': 0.9607843137254902, 'test_acc': 0.7878787878787878}
{'fold_index': 2, 'feature_name': 'retrained-wav2vec_features-z', 'C': 1, 'pooling_mechanism': 'neuron_mean', 'algorithm': 'log_reg', 'train_acc': 0.6666666666666666, 'test_acc': 0.45454545454545453}
{'fold_index': 2, 'feature_name': 'retrained-wav2vec_features-z', 'C': 1, 'pooling_mechanism': 'neuron_mean', 'algorithm': 'svm', 'train_acc': 0.9019607843137255, 'test_acc': 0.6060606060606061}
{'fold_index': 2, 'feature_name': 'retrained-wav2vec_features-z', 'C': 1, 'pooling_mechanism': 'neuron_std', 'algorithm': 'log_reg', 'train_acc': 0.7254901960784313, 'test_acc': 0.5454545454545454}
{'fold_ind

{'fold_index': 3, 'feature_name': 'wav2vec_features-z', 'C': 0.8, 'pooling_mechanism': 'neuron_mean', 'algorithm': 'svm', 'train_acc': 0.7254901960784313, 'test_acc': 0.36363636363636365}
{'fold_index': 3, 'feature_name': 'wav2vec_features-z', 'C': 0.8, 'pooling_mechanism': 'neuron_std', 'algorithm': 'log_reg', 'train_acc': 0.45098039215686275, 'test_acc': 0.2727272727272727}
{'fold_index': 3, 'feature_name': 'wav2vec_features-z', 'C': 0.8, 'pooling_mechanism': 'neuron_std', 'algorithm': 'svm', 'train_acc': 0.9803921568627451, 'test_acc': 0.6060606060606061}
{'fold_index': 3, 'feature_name': 'wav2vec_features-z', 'C': 0.7, 'pooling_mechanism': 'neuron_mean', 'algorithm': 'log_reg', 'train_acc': 0.4117647058823529, 'test_acc': 0.24242424242424243}
{'fold_index': 3, 'feature_name': 'wav2vec_features-z', 'C': 0.7, 'pooling_mechanism': 'neuron_mean', 'algorithm': 'svm', 'train_acc': 0.6470588235294118, 'test_acc': 0.3333333333333333}
{'fold_index': 3, 'feature_name': 'wav2vec_features-z', 

{'fold_index': 4, 'feature_name': 'wav2vec_features-c', 'C': 1, 'pooling_mechanism': 'neuron_std', 'algorithm': 'svm', 'train_acc': 1.0, 'test_acc': 0.5454545454545454}
{'fold_index': 4, 'feature_name': 'wav2vec_features-c', 'C': 0.9, 'pooling_mechanism': 'neuron_mean', 'algorithm': 'log_reg', 'train_acc': 0.7647058823529411, 'test_acc': 0.5151515151515151}
{'fold_index': 4, 'feature_name': 'wav2vec_features-c', 'C': 0.9, 'pooling_mechanism': 'neuron_mean', 'algorithm': 'svm', 'train_acc': 0.8823529411764706, 'test_acc': 0.5454545454545454}
{'fold_index': 4, 'feature_name': 'wav2vec_features-c', 'C': 0.9, 'pooling_mechanism': 'neuron_std', 'algorithm': 'log_reg', 'train_acc': 0.7647058823529411, 'test_acc': 0.45454545454545453}
{'fold_index': 4, 'feature_name': 'wav2vec_features-c', 'C': 0.9, 'pooling_mechanism': 'neuron_std', 'algorithm': 'svm', 'train_acc': 1.0, 'test_acc': 0.5151515151515151}
{'fold_index': 4, 'feature_name': 'wav2vec_features-c', 'C': 0.8, 'pooling_mechanism': 'neu

{'fold_index': 4, 'feature_name': 'retrained-wav2vec_features-c', 'C': 0.7, 'pooling_mechanism': 'neuron_std', 'algorithm': 'svm', 'train_acc': 1.0, 'test_acc': 0.6363636363636364}
{'fold_index': 4, 'feature_name': 'retrained-wav2vec_features-z', 'C': 1, 'pooling_mechanism': 'neuron_mean', 'algorithm': 'log_reg', 'train_acc': 0.5686274509803921, 'test_acc': 0.3333333333333333}
{'fold_index': 4, 'feature_name': 'retrained-wav2vec_features-z', 'C': 1, 'pooling_mechanism': 'neuron_mean', 'algorithm': 'svm', 'train_acc': 0.8627450980392157, 'test_acc': 0.45454545454545453}
{'fold_index': 4, 'feature_name': 'retrained-wav2vec_features-z', 'C': 1, 'pooling_mechanism': 'neuron_std', 'algorithm': 'log_reg', 'train_acc': 0.5294117647058824, 'test_acc': 0.2727272727272727}
{'fold_index': 4, 'feature_name': 'retrained-wav2vec_features-z', 'C': 1, 'pooling_mechanism': 'neuron_std', 'algorithm': 'svm', 'train_acc': 0.9803921568627451, 'test_acc': 0.6666666666666666}
{'fold_index': 4, 'feature_name'

{'fold_index': 5, 'feature_name': 'wav2vec_features-z', 'C': 0.8, 'pooling_mechanism': 'neuron_std', 'algorithm': 'log_reg', 'train_acc': 0.49019607843137253, 'test_acc': 0.3333333333333333}
{'fold_index': 5, 'feature_name': 'wav2vec_features-z', 'C': 0.8, 'pooling_mechanism': 'neuron_std', 'algorithm': 'svm', 'train_acc': 0.9803921568627451, 'test_acc': 0.5151515151515151}
{'fold_index': 5, 'feature_name': 'wav2vec_features-z', 'C': 0.7, 'pooling_mechanism': 'neuron_mean', 'algorithm': 'log_reg', 'train_acc': 0.47058823529411764, 'test_acc': 0.30303030303030304}
{'fold_index': 5, 'feature_name': 'wav2vec_features-z', 'C': 0.7, 'pooling_mechanism': 'neuron_mean', 'algorithm': 'svm', 'train_acc': 0.7843137254901961, 'test_acc': 0.48484848484848486}
{'fold_index': 5, 'feature_name': 'wav2vec_features-z', 'C': 0.7, 'pooling_mechanism': 'neuron_std', 'algorithm': 'log_reg', 'train_acc': 0.5098039215686274, 'test_acc': 0.30303030303030304}
{'fold_index': 5, 'feature_name': 'wav2vec_features

{'fold_index': 6, 'feature_name': 'wav2vec_features-c', 'C': 0.9, 'pooling_mechanism': 'neuron_mean', 'algorithm': 'log_reg', 'train_acc': 0.7450980392156863, 'test_acc': 0.36363636363636365}
{'fold_index': 6, 'feature_name': 'wav2vec_features-c', 'C': 0.9, 'pooling_mechanism': 'neuron_mean', 'algorithm': 'svm', 'train_acc': 0.8823529411764706, 'test_acc': 0.48484848484848486}
{'fold_index': 6, 'feature_name': 'wav2vec_features-c', 'C': 0.9, 'pooling_mechanism': 'neuron_std', 'algorithm': 'log_reg', 'train_acc': 0.7647058823529411, 'test_acc': 0.3333333333333333}
{'fold_index': 6, 'feature_name': 'wav2vec_features-c', 'C': 0.9, 'pooling_mechanism': 'neuron_std', 'algorithm': 'svm', 'train_acc': 0.9215686274509803, 'test_acc': 0.5454545454545454}
{'fold_index': 6, 'feature_name': 'wav2vec_features-c', 'C': 0.8, 'pooling_mechanism': 'neuron_mean', 'algorithm': 'log_reg', 'train_acc': 0.7450980392156863, 'test_acc': 0.3333333333333333}
{'fold_index': 6, 'feature_name': 'wav2vec_features-c

{'fold_index': 6, 'feature_name': 'retrained-wav2vec_features-z', 'C': 1, 'pooling_mechanism': 'neuron_mean', 'algorithm': 'log_reg', 'train_acc': 0.5294117647058824, 'test_acc': 0.3333333333333333}
{'fold_index': 6, 'feature_name': 'retrained-wav2vec_features-z', 'C': 1, 'pooling_mechanism': 'neuron_mean', 'algorithm': 'svm', 'train_acc': 0.8431372549019608, 'test_acc': 0.5454545454545454}
{'fold_index': 6, 'feature_name': 'retrained-wav2vec_features-z', 'C': 1, 'pooling_mechanism': 'neuron_std', 'algorithm': 'log_reg', 'train_acc': 0.5490196078431373, 'test_acc': 0.3333333333333333}
{'fold_index': 6, 'feature_name': 'retrained-wav2vec_features-z', 'C': 1, 'pooling_mechanism': 'neuron_std', 'algorithm': 'svm', 'train_acc': 1.0, 'test_acc': 0.6363636363636364}
{'fold_index': 6, 'feature_name': 'retrained-wav2vec_features-z', 'C': 0.9, 'pooling_mechanism': 'neuron_mean', 'algorithm': 'log_reg', 'train_acc': 0.5294117647058824, 'test_acc': 0.30303030303030304}
{'fold_index': 6, 'feature_

{'fold_index': 7, 'feature_name': 'wav2vec_features-z', 'C': 0.8, 'pooling_mechanism': 'neuron_std', 'algorithm': 'svm', 'train_acc': 0.9803921568627451, 'test_acc': 0.7272727272727273}
{'fold_index': 7, 'feature_name': 'wav2vec_features-z', 'C': 0.7, 'pooling_mechanism': 'neuron_mean', 'algorithm': 'log_reg', 'train_acc': 0.37254901960784315, 'test_acc': 0.2727272727272727}
{'fold_index': 7, 'feature_name': 'wav2vec_features-z', 'C': 0.7, 'pooling_mechanism': 'neuron_mean', 'algorithm': 'svm', 'train_acc': 0.6470588235294118, 'test_acc': 0.5454545454545454}
{'fold_index': 7, 'feature_name': 'wav2vec_features-z', 'C': 0.7, 'pooling_mechanism': 'neuron_std', 'algorithm': 'log_reg', 'train_acc': 0.37254901960784315, 'test_acc': 0.2727272727272727}
{'fold_index': 7, 'feature_name': 'wav2vec_features-z', 'C': 0.7, 'pooling_mechanism': 'neuron_std', 'algorithm': 'svm', 'train_acc': 0.9803921568627451, 'test_acc': 0.7272727272727273}
{'fold_index': 7, 'feature_name': 'retrained-wav2vec_featu

{'fold_index': 8, 'feature_name': 'wav2vec_features-c', 'C': 0.9, 'pooling_mechanism': 'neuron_mean', 'algorithm': 'svm', 'train_acc': 0.8431372549019608, 'test_acc': 0.48484848484848486}
{'fold_index': 8, 'feature_name': 'wav2vec_features-c', 'C': 0.9, 'pooling_mechanism': 'neuron_std', 'algorithm': 'log_reg', 'train_acc': 0.7843137254901961, 'test_acc': 0.48484848484848486}
{'fold_index': 8, 'feature_name': 'wav2vec_features-c', 'C': 0.9, 'pooling_mechanism': 'neuron_std', 'algorithm': 'svm', 'train_acc': 0.9803921568627451, 'test_acc': 0.5151515151515151}
{'fold_index': 8, 'feature_name': 'wav2vec_features-c', 'C': 0.8, 'pooling_mechanism': 'neuron_mean', 'algorithm': 'log_reg', 'train_acc': 0.7647058823529411, 'test_acc': 0.45454545454545453}
{'fold_index': 8, 'feature_name': 'wav2vec_features-c', 'C': 0.8, 'pooling_mechanism': 'neuron_mean', 'algorithm': 'svm', 'train_acc': 0.8235294117647058, 'test_acc': 0.48484848484848486}
{'fold_index': 8, 'feature_name': 'wav2vec_features-c',

{'fold_index': 8, 'feature_name': 'retrained-wav2vec_features-z', 'C': 1, 'pooling_mechanism': 'neuron_mean', 'algorithm': 'log_reg', 'train_acc': 0.7254901960784313, 'test_acc': 0.5454545454545454}
{'fold_index': 8, 'feature_name': 'retrained-wav2vec_features-z', 'C': 1, 'pooling_mechanism': 'neuron_mean', 'algorithm': 'svm', 'train_acc': 0.9019607843137255, 'test_acc': 0.696969696969697}
{'fold_index': 8, 'feature_name': 'retrained-wav2vec_features-z', 'C': 1, 'pooling_mechanism': 'neuron_std', 'algorithm': 'log_reg', 'train_acc': 0.7450980392156863, 'test_acc': 0.696969696969697}
{'fold_index': 8, 'feature_name': 'retrained-wav2vec_features-z', 'C': 1, 'pooling_mechanism': 'neuron_std', 'algorithm': 'svm', 'train_acc': 0.9803921568627451, 'test_acc': 0.6363636363636364}
{'fold_index': 8, 'feature_name': 'retrained-wav2vec_features-z', 'C': 0.9, 'pooling_mechanism': 'neuron_mean', 'algorithm': 'log_reg', 'train_acc': 0.7254901960784313, 'test_acc': 0.5454545454545454}
{'fold_index': 

{'fold_index': 9, 'feature_name': 'wav2vec_features-z', 'C': 0.8, 'pooling_mechanism': 'neuron_std', 'algorithm': 'svm', 'train_acc': 1.0, 'test_acc': 0.6666666666666666}
{'fold_index': 9, 'feature_name': 'wav2vec_features-z', 'C': 0.7, 'pooling_mechanism': 'neuron_mean', 'algorithm': 'log_reg', 'train_acc': 0.4117647058823529, 'test_acc': 0.24242424242424243}
{'fold_index': 9, 'feature_name': 'wav2vec_features-z', 'C': 0.7, 'pooling_mechanism': 'neuron_mean', 'algorithm': 'svm', 'train_acc': 0.6470588235294118, 'test_acc': 0.48484848484848486}
{'fold_index': 9, 'feature_name': 'wav2vec_features-z', 'C': 0.7, 'pooling_mechanism': 'neuron_std', 'algorithm': 'log_reg', 'train_acc': 0.37254901960784315, 'test_acc': 0.2727272727272727}
{'fold_index': 9, 'feature_name': 'wav2vec_features-z', 'C': 0.7, 'pooling_mechanism': 'neuron_std', 'algorithm': 'svm', 'train_acc': 0.9803921568627451, 'test_acc': 0.6666666666666666}
{'fold_index': 9, 'feature_name': 'retrained-wav2vec_features-c', 'C': 1