In [1]:
from pathlib import Path
import csv
from itertools import groupby
import h5py
import numpy as np
import sklearn
from sklearn.cluster import KMeans
from sklearn.svm import SVC
# from sklearn.linear_model import LogisticRegression
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
import random
import matplotlib
from matplotlib import pyplot as plt

# Configuration & Utilities

In [2]:
random.seed(42)
COLORS = ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd", "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf"]
ANNOTATIONS_PATH = "/media/xtrem/data/experiments/nicolingua-0001-language-id/language-id-annotations/metadata.csv"
FEATURE_DIRS = [
    '/media/xtrem/data/experiments/nicolingua-0001-language-id/wav2vec_features-c',
    '/media/xtrem/data/experiments/nicolingua-0001-language-id/wav2vec_features-z',
    '/media/xtrem/data/experiments/nicolingua-0001-language-id/retrained-wav2vec_features-c',
    '/media/xtrem/data/experiments/nicolingua-0001-language-id/retrained-wav2vec_features-z'
]


dropout_p = 0.6
fc_dropout_p = 0.5

RESULTS_DIR = f'results_008_gn_lang_classification__dropout_{dropout_p}__fc_dropout_{fc_dropout_p}'

GPU_ID = 1

In [3]:
annotation_specification = {
    0: {
        'id': 0,
        'label': "maninka",
        'required_tags': set(['ct-speech', 'lng-maninka']),
        'forbidden_tags':  set(['lng-susu', 'lng-pular'])
    },
    1: {
        'id': 1,
        'label': "susu",
        'required_tags': set(['ct-speech', 'lng-susu']),
        'forbidden_tags':  set(['lng-maninka', 'lng-pular'])
    },
    2: {
        'id': 2,
        'label': "pular",
        'required_tags': set(['ct-speech', 'lng-pular']),
        'forbidden_tags':  set(['lng-susu', 'lng-maninka'])
    }
}

In [4]:
bias_category_specification = [
    {
        "category": "utterance",
        "subcategories": [
            {
                "subcategory": "verbal_nod",
                "tags": set(["utt-verbal-nod"])
            },
            {
                "subcategory": "multilingual",
                "tags": set(["utt-multi-lingual", "utt-multi-lingual-named-endity"])
            }
        ]
    },
    {
        "category": "speaker_count",
        "subcategories": [
            {
                "subcategory": "single",
                "tags": set(["spkr-single"])
            },
            {
                "subcategory": "multiple",
                "tags": set(["spkr-mult", "spkr-multi"])
            }
        ]
    },
    {
        "category": "gender",
        "subcategories": [
            {
                "subcategory": "male",
                "tags": set(["spkr-male"])
            },
            {
                "subcategory": "female",
                "tags": set(["spkr-female"])
            },
        ]
    },
    {
        "category": "language",
        "subcategories": [
            {
                "subcategory": "susu",
                "tags": set(["lng-susu"])
            },
            {
                "subcategory": "maninka",
                "tags": set(["lng-maninka"])
            },
            {
                "subcategory": "pular",
                "tags": set(["lng-pular"])
            }
        ]
    },
    {
        "category": "channel",
        "subcategories": [
            {
                "subcategory": "telephone",
                "tags": set(["ct-telephone"])
            },
            {
                "subcategory": "noise",
                "tags": set(["ct-noise"])
            },
            {
                "subcategory": "music",
                "tags": set(["ct-fg-music", "ct-tr-music", "ct-bg-music"])
            }
        ]
    }
]

flat_bias_category_specification = {}

for c in bias_category_specification:
    for sc in c['subcategories']:
        k = f"{c['category']}_{sc['subcategory']}"
        flat_bias_category_specification[k] = sc['tags']

_ = [print(f"{k}: {v}") for k,v in flat_bias_category_specification.items()]

utterance_verbal_nod: {'utt-verbal-nod'}
utterance_multilingual: {'utt-multi-lingual', 'utt-multi-lingual-named-endity'}
speaker_count_single: {'spkr-single'}
speaker_count_multiple: {'spkr-mult', 'spkr-multi'}
gender_male: {'spkr-male'}
gender_female: {'spkr-female'}
language_susu: {'lng-susu'}
language_maninka: {'lng-maninka'}
language_pular: {'lng-pular'}
channel_telephone: {'ct-telephone'}
channel_noise: {'ct-noise'}
channel_music: {'ct-fg-music', 'ct-bg-music', 'ct-tr-music'}


In [5]:
def to_user_friendly_feature_name(fv_name):
    name = fv_name \
        .replace("features-", "") \
        .replace("wav2vec_", "") \
        .replace("average", "avg") \
        .replace("timestep", "T") \
        .replace("c.", "Context") \
        .replace("z.", "Latent")
    return name

# Load annotations

In [6]:
def load_annotations(a_file_path, a_specification):
    with open(ANNOTATIONS_PATH) as f:
        reader = csv.DictReader(f)
        for row in reader:
            tag_set = set([t.strip() for t in row['tags'].split(";")])
            for label in annotation_specification.keys():
                spec = annotation_specification[label]
                if spec['required_tags'].issubset(tag_set):
                    if spec['forbidden_tags'].isdisjoint(tag_set):
                        yield row['file'], label, tag_set
                        break

data = list(load_annotations(ANNOTATIONS_PATH, annotation_specification))
audio_files, audio_labels, audio_tags = zip(*data)

## Inspect label counts

In [7]:
def inspect_label_counts():
    for label in annotation_specification:
        count = len([l for l in audio_labels if l == label])
        print("{:10} ({}): {}".format(
            annotation_specification[label]['label'],
            label, 
            count
        ))
inspect_label_counts()

maninka    (0): 114
susu       (1): 32
pular      (2): 28


## Balance data

In [8]:
count_per_class = 28
data = list(load_annotations(ANNOTATIONS_PATH, annotation_specification))
balanced_data = []
for label in annotation_specification:
    balanced_data.extend([d for d in data if d[1] == label][:count_per_class])
audio_files, audio_labels, audio_tags = zip(*balanced_data)

In [9]:
inspect_label_counts()

maninka    (0): 28
susu       (1): 28
pular      (2): 28


## Inspect bias category counts in balanded data

In [10]:
def inspect_bias_category_counts():
    for name, tags in flat_bias_category_specification.items():
        count = len([ts for ts in audio_tags if len(tags.intersection(ts))>0])
        print(name, count, "/", len(audio_tags))
            
inspect_bias_category_counts()

utterance_verbal_nod 48 / 84
utterance_multilingual 21 / 84
speaker_count_single 25 / 84
speaker_count_multiple 58 / 84
gender_male 81 / 84
gender_female 15 / 84
language_susu 28 / 84
language_maninka 28 / 84
language_pular 28 / 84
channel_telephone 27 / 84
channel_noise 21 / 84
channel_music 24 / 84


# Prepare 10 cross validation folds

In [11]:
TRAIN_PERCENT = .6
FOLD_COUNT = 10

n = len(audio_files)
n_train = int(np.ceil(n * .6))
n_test = n - n_train
all_indices = range(n)

cv_folds = {}
train_count_by_index = {i:0 for i in all_indices}
test_count_by_index = {i:0 for i in all_indices}

for fold_index in range(FOLD_COUNT):
    fold_rsampler = np.random.RandomState(seed=fold_index)
    train_index_set = set(fold_rsampler.choice(all_indices, n_train, replace=False))
    test_index_set = set(all_indices).difference(train_index_set)
        
    cv_folds[fold_index] = {
        'train_indices': sorted(list(train_index_set)),
        'test_indices': sorted(list(test_index_set)),
    }


# Load features

In [12]:
def load_features(audio_files, features_input_dir):
    id_list = []
    features_list = []

    for audio_file_name in audio_files:
        feature_file_name = audio_file_name.replace(".wav", ".h5context")
        feature_path = Path(features_input_dir) / feature_file_name
        with h5py.File(feature_path, 'r') as f:
            features_shape = f['info'][1:].astype(int)
            features = np.array(f['features'][:]).reshape(features_shape)
            # features = pool_feature_last_seq(features)
            features_list.append(features)
    return features_list

In [13]:
raw_features = {}
for feature_dir in FEATURE_DIRS:
    feature_name = Path(feature_dir).stem
    raw_features[feature_name] = load_features(audio_files, feature_dir)

## Inspect feature shapes

In [14]:
for feature_name in raw_features.keys():
    print("feature_name: {}. feature shape: {}".format(
        to_user_friendly_feature_name(feature_name),
        raw_features[feature_name][0].shape
    ))

feature_name: c. feature shape: (2998, 512)
feature_name: z. feature shape: (2998, 512)
feature_name: retrained-c. feature shape: (2998, 512)
feature_name: retrained-z. feature shape: (2998, 512)


## Extract feature vectors

In [15]:
def extract_last_timestep_features(raw_features):
    return raw_features[-1, :]

def extract_neuron_average_features(raw_features):
    return np.mean(raw_features, axis=0)

def identity(x):
    return x

feature_extractors = {
    'last_timestep': extract_last_timestep_features,
    'neuron_average': extract_neuron_average_features,
    'raw_features': identity
}

In [16]:
feature_vectors = {}
for feature_name in raw_features.keys():
    for feature_extractor_name in feature_extractors.keys():
        fv_name = f"{feature_name}__{feature_extractor_name}"
        feature_vectors[fv_name] = []
        for f in raw_features[feature_name]:
            feature_vectors[fv_name].append(
                feature_extractors[feature_extractor_name](f)
            )
            
        feature_vectors[fv_name] = np.array(feature_vectors[fv_name])

## Inspect feature vectors

In [17]:
for fvname in feature_vectors.keys():
    print(fvname, feature_vectors[fvname][0].shape)

wav2vec_features-c__last_timestep (512,)
wav2vec_features-c__neuron_average (512,)
wav2vec_features-c__raw_features (2998, 512)
wav2vec_features-z__last_timestep (512,)
wav2vec_features-z__neuron_average (512,)
wav2vec_features-z__raw_features (2998, 512)
retrained-wav2vec_features-c__last_timestep (512,)
retrained-wav2vec_features-c__neuron_average (512,)
retrained-wav2vec_features-c__raw_features (2998, 512)
retrained-wav2vec_features-z__last_timestep (512,)
retrained-wav2vec_features-z__neuron_average (512,)
retrained-wav2vec_features-z__raw_features (2998, 512)


# Classification Models

In [18]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from pytorch_model_summary import summary
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim

In [19]:
class LangIdCNN_Mean2_FCD(nn.Module):
    def __init__(self):
        super(LangIdCNN_Mean2_FCD, self).__init__()
        
        self.conv0 = nn.Conv1d(in_channels=512, out_channels=3, kernel_size=1)
        
        self.conv1 = nn.Conv1d(in_channels=3, out_channels=1, kernel_size=3)
        self.drop1 = nn.Dropout(p=dropout_p)
        self.pool1 = nn.AvgPool1d(kernel_size=2, stride=2)
        
        self.conv2 = nn.Conv1d(in_channels=1, out_channels=3, kernel_size=3)
        self.drop2 = nn.Dropout(p=dropout_p)
        self.pool2 = nn.AvgPool1d(kernel_size=2, stride=2)
        
        self.conv3 = nn.Conv1d(in_channels=3, out_channels=3, kernel_size=3)
        self.drop3 = nn.Dropout(p=dropout_p)
        self.pool3 = nn.AvgPool1d(kernel_size=2, stride=2)

        self.conv4 = nn.Conv1d(in_channels=3, out_channels=3, kernel_size=3)
        self.drop4 = nn.Dropout(p=dropout_p)
        self.pool4 = nn.AvgPool1d(kernel_size=2, stride=2)
        
        self.drop5 = nn.Dropout(p=fc_dropout_p)

        self.lin6 = nn.Linear(in_features=9, out_features=3)
        
        
    def forward(self, x):
        x = x.permute(0, 2, 1)
        
        x = self.conv0(x)
        
        x = self.conv1(x)
        x = F.elu(x)
        x = self.drop1(x)
        x = self.pool1(x)
        
        
        x = self.conv2(x)
        x = F.elu(x)
        x = self.drop2(x)
        x = self.pool2(x)
        
        v1 = torch.mean(x, dim=2)
        
        x = self.conv3(x)
        x = F.elu(x)
        x = self.drop3(x)
        x = self.pool3(x)
        
        v2 = torch.mean(x, dim=2)
        
        x = self.conv4(x)
        x = F.elu(x)
        x = self.drop4(x)
        x = self.pool4(x)
        
        v3 = torch.mean(x, dim=2)
        
        v = torch.cat((v1, v2, v3), axis=1)
        v = self.drop5(v)
        
        x = self.lin6(v)
        
        return v, x
    

class LangIdCNN_MeanStd_FCD(nn.Module):
    def __init__(self):
        super(LangIdCNN_MeanStd_FCD, self).__init__()
        
        self.conv0 = nn.Conv1d(in_channels=512, out_channels=3, kernel_size=1)
        
        self.conv1 = nn.Conv1d(in_channels=3, out_channels=1, kernel_size=3)
        self.drop1 = nn.Dropout(p=dropout_p)
        self.pool1 = nn.AvgPool1d(kernel_size=2, stride=2)
        
        self.conv2 = nn.Conv1d(in_channels=1, out_channels=3, kernel_size=3)
        self.drop2 = nn.Dropout(p=dropout_p)
        self.pool2 = nn.AvgPool1d(kernel_size=2, stride=2)
        
        self.conv3 = nn.Conv1d(in_channels=3, out_channels=3, kernel_size=3)
        self.drop3 = nn.Dropout(p=dropout_p)
        self.pool3 = nn.AvgPool1d(kernel_size=2, stride=2)

        self.conv4 = nn.Conv1d(in_channels=3, out_channels=3, kernel_size=3)
        self.drop4 = nn.Dropout(p=dropout_p)
        self.pool4 = nn.AvgPool1d(kernel_size=2, stride=2)

        self.drop5 = nn.Dropout(p=fc_dropout_p)

        self.lin6 = nn.Linear(in_features=18, out_features=3)
        
        
    def forward(self, x):
        x = x.permute(0, 2, 1)
        
        x = self.conv0(x)
        
        x = self.conv1(x)
        x = F.elu(x)
        x = self.drop1(x)
        x = self.pool1(x)
        
        
        x = self.conv2(x)
        x = F.elu(x)
        x = self.drop2(x)
        x = self.pool2(x)
        
        v1_mean = torch.mean(x, dim=2)
        v1_std = torch.std(x, dim=2)
        
        x = self.conv3(x)
        x = F.elu(x)
        x = self.drop3(x)
        x = self.pool3(x)
        
        v2_mean = torch.mean(x, dim=2)
        v2_std = torch.std(x, dim=2)
        
        x = self.conv4(x)
        x = F.elu(x)
        x = self.drop4(x)
        x = self.pool4(x)
        
        v3_mean = torch.mean(x, dim=2)
        v3_std = torch.std(x, dim=2)
        
        v = torch.cat((v1_mean, v1_std, v2_mean, v2_std, v3_mean, v3_std), axis=1)
        
        v = self.drop5(v)
        
        x = self.lin6(v)
        
        return v, x

# Train Classification Models

In [20]:
def get_data_for_fold(fold_id, feature_name, batch_size):
    train_indices = cv_folds[fold_id]['train_indices']
    test_indices = cv_folds[fold_id]['test_indices']    

    train_x = np.take(raw_features[feature_name], train_indices, axis=0)
    train_y = np.take(audio_labels, train_indices, axis=0)
    train_tags = np.take(audio_tags, train_indices, axis=0)
    train_bias_category_labels = {}
    for category, tags in flat_bias_category_specification.items():
        train_bias_category_labels[category] = [1 if len(tags.intersection(ts))>0 else 0 for ts in train_tags]

    test_x = np.take(raw_features[feature_name], test_indices, axis=0)
    test_y = np.take(audio_labels, test_indices, axis=0)
    test_tags = np.take(audio_tags, test_indices, axis=0)
    test_bias_category_labels = {}
    for category, tags in flat_bias_category_specification.items():
        test_bias_category_labels[category] = [1 if len(tags.intersection(ts))>0 else 0 for ts in test_tags]
        
    
    return train_x, train_y, test_x, test_y, train_bias_category_labels, test_bias_category_labels

def get_audio_files_for_fold(fold_id):
    train_indices = cv_folds[fold_id]['train_indices']
    test_indices = cv_folds[fold_id]['test_indices']
    
    train_files = np.take(audio_files, train_indices, axis=0)
    test_files = np.take(audio_files, test_indices, axis=0)
    
    return train_files, test_files
    

    
def get_loaders_for_fold(fold_id, feature_name, batch_size):
    
    train_x, train_y, test_x, test_y, train_bias_category_labels, test_bias_category_labels = \
        get_data_for_fold(fold_id, feature_name, batch_size)
    
    
    
    train_dataset = TensorDataset(
        torch.tensor(train_x), 
        torch.tensor(train_y)
    )

    train_loader = DataLoader(train_dataset, batch_size=batch_size)

    test_dataset = TensorDataset(
        torch.tensor(test_x), 
        torch.tensor(test_y)
    )

    test_loader = DataLoader(test_dataset, batch_size=batch_size)
    
    return train_loader, test_loader, train_bias_category_labels, test_bias_category_labels


def get_predictions_for_logits(logits):
    probs = F.softmax(logits, dim=1)
    return torch.argmax(probs, dim=1)

In [21]:
l2 = nn.PairwiseDistance(p=2)
margin = 1

# See:
# http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1ContrastiveLossLayer.html#details
# http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf

def contrastive_loss(representations, labels):
    normalized_reps = F.normalize(representations, dim=1)
    
    loss = 0
    n = 0
    for i in range(normalized_reps.shape[0]):
        for j in range(normalized_reps.shape[0]):
            d = l2(
                normalized_reps[i].view(1, -1), 
                normalized_reps[j].view(1, -1)
            )
            if labels[i] == labels[j]:
                loss += torch.square(d)
            else:
                loss += torch.square(torch.max(
                    torch.tensor([margin-d, 0]).to(representations.device)
                ))
            
            n+=1
            
    loss = loss / (2*n)
    
    return loss[0]
    

In [22]:
def train_on_fold(model_class, fold_id, feature_name, batch_size, epochs, use_contrastive_term):
    device = torch.device(f"cuda:{GPU_ID}")
    torch.manual_seed(0)
    results = {}
    
    model = model_class().to(device)

    train_loader, test_loader, train_bias_category_labels, test_bias_category_labels = get_loaders_for_fold(fold_id, feature_name, batch_size)

    print(summary(model_class(), torch.zeros((10, 2998, 512)), show_input=False))

    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(1, epochs+1):
        model.train()
        train_loss = 0
        pred_train_classes = []
        true_train_classes = []

        for batch_idx, (x, y) in enumerate(train_loader):
            x = x.to(device)
            y = y.to(device)
            
            optimizer.zero_grad()
            representations, outputs = model(x)
            pred_train_classes.extend(
                get_predictions_for_logits(outputs).cpu().numpy()
            )
            true_train_classes.extend(y.cpu().numpy())
            loss = criterion(outputs, y)
            if (use_contrastive_term):
                loss += 0.1 * contrastive_loss(representations, y)

            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        train_n = len(true_train_classes)
        
        train_loss = train_loss/len(train_loader)
        train_acc = sklearn.metrics.accuracy_score(true_train_classes, pred_train_classes)
        train_acc_by_bais_category = {
            category: sklearn.metrics.accuracy_score(true_train_classes, pred_train_classes, sample_weight=sw)
            for category, sw in train_bias_category_labels.items()
        }
        

        pred_test_logits = []
        
        pred_test_classes = []
        true_test_classes = []
        
        model.eval()
        test_loss = 0
        for batch_idx, (x, y) in enumerate(test_loader):
            x = x.to(device)
            y = y.to(device)
            
            representations, outputs = model(x)
            
            pred_test_logits.extend(outputs.detach().cpu().numpy())

            pred_test_classes.extend(
                get_predictions_for_logits(outputs).cpu().numpy()
            )

            true_test_classes.extend(y.cpu().numpy())

            loss = criterion(outputs, y)
            if (use_contrastive_term):
                loss += contrastive_loss(representations, y)
                
            test_loss += loss.item()


        test_n = len(true_test_classes)
        
        test_loss = test_loss / len(test_loader)
        test_acc = sklearn.metrics.accuracy_score(true_test_classes, pred_test_classes)
        test_acc_by_bais_category = {
            category: sklearn.metrics.accuracy_score(true_test_classes, pred_test_classes, sample_weight=sw)
            for category, sw in test_bias_category_labels.items()
        }

        if epoch%10==0:
            print(f"Epoch: {epoch}. Train Loss: {train_loss:0.4}. Test Loss: {test_loss:0.4}. Train Acc: {train_acc:0.4}. Test Acc:{test_acc:0.4}")
        
        results[epoch] = {
            'epoch': epoch,
            'train_loss': train_loss,
            'test_loss': test_loss,
            'train_acc': train_acc,
            'test_acc': test_acc,
            'train_n': train_n,
            'test_n': test_n,
            'test_logits': pred_test_logits,
            'test_true_classes': true_test_classes
        }
        
        for c in train_acc_by_bais_category:
            results[epoch][f"train_acc_{c}"] = train_acc_by_bais_category[c]
            results[epoch][f"train_n_{c}"] = int(np.sum(train_bias_category_labels[c]))
            
        for c in test_acc_by_bais_category:
            results[epoch][f"test_acc_{c}"] = test_acc_by_bais_category[c]
            results[epoch][f"test_n_{c}"] = int(np.sum(test_bias_category_labels[c]))
        
    del model
    return results
    

In [23]:
import csv
from pathlib import Path

def write_epoch_test_logits(model_name, all_folds_results):
    for result_entry in all_folds_results:
        feature_name = result_entry['feature_name']
        fold_index = result_entry['fold_index']
        
        train_audio_files, test_audio_files = get_audio_files_for_fold(fold_index)
        
        for epoch in sorted(result_entry['epochs'].keys()):
            
            parent_dir = Path(f"{RESULTS_DIR}/{model_name}/{feature_name}_{fold_index}_data")
            parent_dir.mkdir(parents=True, exist_ok=True)

            file_name = parent_dir / f"epoch_{epoch:04}.csv"
            with open(file_name, "w") as f:
                writer = csv.DictWriter(f, fieldnames=["fold_id", "datum_index", "datum_name", "true_class_id", "logits"])
                writer.writeheader()
                
                test_logits = result_entry['epochs'][epoch]["test_logits"]
                test_true_classes = result_entry['epochs'][epoch]["test_true_classes"]
                
                for datum_index in range(len(test_logits)):
                    writer.writerow({
                        "fold_id": fold_index,
                        "datum_index": datum_index,
                        "datum_name": test_audio_files[datum_index],
                        "true_class_id": test_true_classes[datum_index],
                        "logits": test_logits[datum_index]
                    })
                

            annotation_specification.keys()

            field_names = ["index", "fname", "logits"]

            fname = f"{RESULTS_DIR}/{model_name}/{feature_name}_{fold_index}.csv"

def save_results(model_name, all_folds_results):
    for result_entry in all_folds_results:
        feature_name = result_entry['feature_name']
        fold_index = result_entry['fold_index']
        
        Path(RESULTS_DIR).mkdir(exist_ok=True, parents=True)
        fname = f"{RESULTS_DIR}/{model_name}/{feature_name}_{fold_index}.csv"
        Path(fname).parent.mkdir(parents=True, exist_ok=True)
        with open(fname, 'w') as f:
            fieldnames = sorted(result_entry['epochs'][1].keys())
            fieldnames.remove("test_logits") # logged separately, differently
            fieldnames.remove("test_true_classes") # logged separately, differently
            
            writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction='ignore')
            
            writer.writeheader()
            
            for epoch in sorted(result_entry['epochs'].keys()):
                writer.writerow(result_entry['epochs'][epoch])

In [None]:
model_classes = [
    LangIdCNN_MeanStd_FCD,
    LangIdCNN_Mean2_FCD,
]

for model_class in model_classes:
    all_folds_results = []
    for fold_index in cv_folds:
        for feature_name in ['retrained-wav2vec_features-z', 'wav2vec_features-z']: #raw_features:
            use_contrastive_term = "Contrastive" in model_class.__name__
            
            print(f"{model_class.__name__} using {feature_name} on fold#{fold_index}. Contrastive: {use_contrastive_term}")
            resutls = train_on_fold(model_class, fold_index, feature_name, batch_size=100, epochs=1000, use_contrastive_term=use_contrastive_term)
            all_folds_results.append({
                'fold_index': fold_index,
                'feature_name': feature_name,
                'epochs': resutls
            })
            save_results(model_class.__name__, all_folds_results)
            write_epoch_test_logits(model_class.__name__, all_folds_results)

LangIdCNN_MeanStd_FCD using retrained-wav2vec_features-z on fold#0. Contrastive: False
-----------------------------------------------------------------------
      Layer (type)        Output Shape         Param #     Tr. Param #
          Conv1d-1       [10, 3, 2998]           1,539           1,539
          Conv1d-2       [10, 1, 2996]              10              10
         Dropout-3       [10, 1, 2996]               0               0
       AvgPool1d-4       [10, 1, 1498]               0               0
          Conv1d-5       [10, 3, 1496]              12              12
         Dropout-6       [10, 3, 1496]               0               0
       AvgPool1d-7        [10, 3, 748]               0               0
          Conv1d-8        [10, 3, 746]              30              30
         Dropout-9        [10, 3, 746]               0               0
      AvgPool1d-10        [10, 3, 373]               0               0
         Conv1d-11        [10, 3, 371]              30      

Epoch: 790. Train Loss: 0.1552. Test Loss: 0.8587. Train Acc: 0.9608. Test Acc:0.6364
Epoch: 800. Train Loss: 0.1505. Test Loss: 0.8718. Train Acc: 0.9216. Test Acc:0.6364
Epoch: 810. Train Loss: 0.1578. Test Loss: 0.7561. Train Acc: 0.9608. Test Acc:0.6364
Epoch: 820. Train Loss: 0.2164. Test Loss: 0.892. Train Acc: 0.8824. Test Acc:0.6364
Epoch: 830. Train Loss: 0.1224. Test Loss: 0.8936. Train Acc: 0.9804. Test Acc:0.6364
Epoch: 840. Train Loss: 0.1677. Test Loss: 0.869. Train Acc: 0.9608. Test Acc:0.6364
Epoch: 850. Train Loss: 0.2052. Test Loss: 0.9762. Train Acc: 0.9216. Test Acc:0.6364
Epoch: 860. Train Loss: 0.1965. Test Loss: 0.9979. Train Acc: 0.9608. Test Acc:0.6364
Epoch: 870. Train Loss: 0.1353. Test Loss: 0.859. Train Acc: 0.9804. Test Acc:0.6667
Epoch: 880. Train Loss: 0.202. Test Loss: 0.7802. Train Acc: 0.9216. Test Acc:0.6364
Epoch: 890. Train Loss: 0.08869. Test Loss: 0.877. Train Acc: 1.0. Test Acc:0.6667
Epoch: 900. Train Loss: 0.1456. Test Loss: 0.9646. Train Acc:

Epoch: 580. Train Loss: 0.3171. Test Loss: 1.165. Train Acc: 0.902. Test Acc:0.4545
Epoch: 590. Train Loss: 0.3769. Test Loss: 1.529. Train Acc: 0.8824. Test Acc:0.3939
Epoch: 600. Train Loss: 0.2861. Test Loss: 1.491. Train Acc: 0.9412. Test Acc:0.3939
Epoch: 610. Train Loss: 0.3189. Test Loss: 1.432. Train Acc: 0.8824. Test Acc:0.3939
Epoch: 620. Train Loss: 0.3248. Test Loss: 1.334. Train Acc: 0.902. Test Acc:0.4242
Epoch: 630. Train Loss: 0.249. Test Loss: 1.373. Train Acc: 0.9608. Test Acc:0.3939
Epoch: 640. Train Loss: 0.2698. Test Loss: 1.372. Train Acc: 0.902. Test Acc:0.3939
Epoch: 650. Train Loss: 0.3895. Test Loss: 1.335. Train Acc: 0.8039. Test Acc:0.4848
Epoch: 660. Train Loss: 0.2482. Test Loss: 1.371. Train Acc: 0.902. Test Acc:0.4545
Epoch: 670. Train Loss: 0.2885. Test Loss: 1.397. Train Acc: 0.8824. Test Acc:0.4848
Epoch: 680. Train Loss: 0.2476. Test Loss: 1.34. Train Acc: 0.9608. Test Acc:0.4848
Epoch: 690. Train Loss: 0.2817. Test Loss: 1.353. Train Acc: 0.902. Tes

Epoch: 370. Train Loss: 0.4991. Test Loss: 1.043. Train Acc: 0.7451. Test Acc:0.7576
Epoch: 380. Train Loss: 0.4768. Test Loss: 1.104. Train Acc: 0.8039. Test Acc:0.7273
Epoch: 390. Train Loss: 0.3659. Test Loss: 1.148. Train Acc: 0.8431. Test Acc:0.7273
Epoch: 400. Train Loss: 0.4724. Test Loss: 1.107. Train Acc: 0.8039. Test Acc:0.7273
Epoch: 410. Train Loss: 0.4077. Test Loss: 1.063. Train Acc: 0.8431. Test Acc:0.7576
Epoch: 420. Train Loss: 0.391. Test Loss: 1.104. Train Acc: 0.8235. Test Acc:0.7879
Epoch: 430. Train Loss: 0.4302. Test Loss: 1.138. Train Acc: 0.8235. Test Acc:0.8182
Epoch: 440. Train Loss: 0.4492. Test Loss: 1.199. Train Acc: 0.8039. Test Acc:0.8182
Epoch: 450. Train Loss: 0.3682. Test Loss: 1.123. Train Acc: 0.8824. Test Acc:0.7273
Epoch: 460. Train Loss: 0.3741. Test Loss: 1.167. Train Acc: 0.8627. Test Acc:0.7273
Epoch: 470. Train Loss: 0.3635. Test Loss: 1.204. Train Acc: 0.8824. Test Acc:0.7879
Epoch: 480. Train Loss: 0.2818. Test Loss: 1.248. Train Acc: 0.921

Epoch: 160. Train Loss: 1.078. Test Loss: 1.184. Train Acc: 0.3922. Test Acc:0.2727
Epoch: 170. Train Loss: 1.064. Test Loss: 1.187. Train Acc: 0.4902. Test Acc:0.2727
Epoch: 180. Train Loss: 1.078. Test Loss: 1.189. Train Acc: 0.3725. Test Acc:0.2727
Epoch: 190. Train Loss: 1.078. Test Loss: 1.187. Train Acc: 0.3725. Test Acc:0.2727
Epoch: 200. Train Loss: 1.098. Test Loss: 1.184. Train Acc: 0.2941. Test Acc:0.2727
Epoch: 210. Train Loss: 1.068. Test Loss: 1.178. Train Acc: 0.4706. Test Acc:0.2727
Epoch: 220. Train Loss: 1.089. Test Loss: 1.184. Train Acc: 0.3137. Test Acc:0.3636
Epoch: 230. Train Loss: 1.052. Test Loss: 1.183. Train Acc: 0.3922. Test Acc:0.3939
Epoch: 240. Train Loss: 1.06. Test Loss: 1.178. Train Acc: 0.4706. Test Acc:0.3939
Epoch: 250. Train Loss: 1.057. Test Loss: 1.17. Train Acc: 0.4118. Test Acc:0.3939
Epoch: 260. Train Loss: 1.034. Test Loss: 1.168. Train Acc: 0.5098. Test Acc:0.3636
Epoch: 270. Train Loss: 1.018. Test Loss: 1.165. Train Acc: 0.451. Test Acc:0.

Epoch: 10. Train Loss: 1.113. Test Loss: 1.099. Train Acc: 0.3333. Test Acc:0.303
Epoch: 20. Train Loss: 1.103. Test Loss: 1.099. Train Acc: 0.3529. Test Acc:0.303
Epoch: 30. Train Loss: 1.104. Test Loss: 1.099. Train Acc: 0.4314. Test Acc:0.2727
Epoch: 40. Train Loss: 1.111. Test Loss: 1.102. Train Acc: 0.3137. Test Acc:0.2727
Epoch: 50. Train Loss: 1.093. Test Loss: 1.105. Train Acc: 0.3137. Test Acc:0.2727
Epoch: 60. Train Loss: 1.1. Test Loss: 1.107. Train Acc: 0.3725. Test Acc:0.2727
Epoch: 70. Train Loss: 1.075. Test Loss: 1.11. Train Acc: 0.4314. Test Acc:0.2727
Epoch: 80. Train Loss: 1.092. Test Loss: 1.111. Train Acc: 0.3529. Test Acc:0.2727
Epoch: 90. Train Loss: 1.109. Test Loss: 1.111. Train Acc: 0.2549. Test Acc:0.2727
Epoch: 100. Train Loss: 1.082. Test Loss: 1.109. Train Acc: 0.4706. Test Acc:0.2727
Epoch: 110. Train Loss: 1.086. Test Loss: 1.107. Train Acc: 0.451. Test Acc:0.2727
Epoch: 120. Train Loss: 1.09. Test Loss: 1.107. Train Acc: 0.2941. Test Acc:0.2727
Epoch: 1

Epoch: 990. Train Loss: 0.1477. Test Loss: 2.226. Train Acc: 0.9412. Test Acc:0.6667
Epoch: 1000. Train Loss: 0.155. Test Loss: 2.238. Train Acc: 0.9608. Test Acc:0.6061
LangIdCNN_MeanStd_FCD using wav2vec_features-z on fold#2. Contrastive: False
-----------------------------------------------------------------------
      Layer (type)        Output Shape         Param #     Tr. Param #
          Conv1d-1       [10, 3, 2998]           1,539           1,539
          Conv1d-2       [10, 1, 2996]              10              10
         Dropout-3       [10, 1, 2996]               0               0
       AvgPool1d-4       [10, 1, 1498]               0               0
          Conv1d-5       [10, 3, 1496]              12              12
         Dropout-6       [10, 3, 1496]               0               0
       AvgPool1d-7        [10, 3, 748]               0               0
          Conv1d-8        [10, 3, 746]              30              30
         Dropout-9        [10, 3, 746]    

Epoch: 780. Train Loss: 0.2401. Test Loss: 2.002. Train Acc: 0.902. Test Acc:0.5758
Epoch: 790. Train Loss: 0.2456. Test Loss: 1.798. Train Acc: 0.8824. Test Acc:0.6061
Epoch: 800. Train Loss: 0.1573. Test Loss: 1.655. Train Acc: 0.9608. Test Acc:0.6061
Epoch: 810. Train Loss: 0.1827. Test Loss: 1.676. Train Acc: 0.9804. Test Acc:0.6061
Epoch: 820. Train Loss: 0.2892. Test Loss: 1.723. Train Acc: 0.902. Test Acc:0.6061
Epoch: 830. Train Loss: 0.196. Test Loss: 1.609. Train Acc: 0.9608. Test Acc:0.5758
Epoch: 840. Train Loss: 0.1938. Test Loss: 1.784. Train Acc: 0.9216. Test Acc:0.5758
Epoch: 850. Train Loss: 0.2265. Test Loss: 1.682. Train Acc: 0.9216. Test Acc:0.6061
Epoch: 860. Train Loss: 0.23. Test Loss: 1.669. Train Acc: 0.9608. Test Acc:0.6061
Epoch: 870. Train Loss: 0.2003. Test Loss: 1.805. Train Acc: 0.9608. Test Acc:0.6061
Epoch: 880. Train Loss: 0.2908. Test Loss: 1.845. Train Acc: 0.8824. Test Acc:0.6061
Epoch: 890. Train Loss: 0.1839. Test Loss: 1.817. Train Acc: 0.9412. T

Epoch: 570. Train Loss: 0.3146. Test Loss: 0.6219. Train Acc: 0.9216. Test Acc:0.6061
Epoch: 580. Train Loss: 0.3294. Test Loss: 0.6767. Train Acc: 0.8627. Test Acc:0.6667
Epoch: 590. Train Loss: 0.3048. Test Loss: 0.6611. Train Acc: 0.8627. Test Acc:0.6667
Epoch: 600. Train Loss: 0.2952. Test Loss: 0.6393. Train Acc: 0.8824. Test Acc:0.6364
Epoch: 610. Train Loss: 0.277. Test Loss: 0.6503. Train Acc: 0.8824. Test Acc:0.6667
Epoch: 620. Train Loss: 0.2918. Test Loss: 0.6107. Train Acc: 0.902. Test Acc:0.6364
Epoch: 630. Train Loss: 0.2203. Test Loss: 0.6361. Train Acc: 0.9608. Test Acc:0.6364
Epoch: 640. Train Loss: 0.2878. Test Loss: 0.6039. Train Acc: 0.902. Test Acc:0.6364
Epoch: 650. Train Loss: 0.3107. Test Loss: 0.5853. Train Acc: 0.9412. Test Acc:0.6364
Epoch: 660. Train Loss: 0.2395. Test Loss: 0.5546. Train Acc: 0.9216. Test Acc:0.6364
Epoch: 670. Train Loss: 0.3257. Test Loss: 0.5651. Train Acc: 0.9216. Test Acc:0.6364
Epoch: 680. Train Loss: 0.2379. Test Loss: 0.5928. Train 

Epoch: 360. Train Loss: 0.88. Test Loss: 0.9626. Train Acc: 0.6471. Test Acc:0.5455
Epoch: 370. Train Loss: 0.834. Test Loss: 0.9199. Train Acc: 0.6275. Test Acc:0.5758
Epoch: 380. Train Loss: 0.7757. Test Loss: 0.9275. Train Acc: 0.6275. Test Acc:0.5455
Epoch: 390. Train Loss: 0.7253. Test Loss: 0.8861. Train Acc: 0.7059. Test Acc:0.6061
Epoch: 400. Train Loss: 0.7206. Test Loss: 0.8531. Train Acc: 0.6863. Test Acc:0.5758
Epoch: 410. Train Loss: 0.6899. Test Loss: 0.8637. Train Acc: 0.7647. Test Acc:0.6061
Epoch: 420. Train Loss: 0.7501. Test Loss: 0.8084. Train Acc: 0.6667. Test Acc:0.5758
Epoch: 430. Train Loss: 0.7131. Test Loss: 0.7828. Train Acc: 0.549. Test Acc:0.6061
Epoch: 440. Train Loss: 0.7182. Test Loss: 0.7731. Train Acc: 0.7059. Test Acc:0.5455
Epoch: 450. Train Loss: 0.6456. Test Loss: 0.7807. Train Acc: 0.7451. Test Acc:0.5758
Epoch: 460. Train Loss: 0.6641. Test Loss: 0.8064. Train Acc: 0.7451. Test Acc:0.6061
Epoch: 470. Train Loss: 0.6577. Test Loss: 0.8032. Train A

Epoch: 140. Train Loss: 1.011. Test Loss: 1.126. Train Acc: 0.5294. Test Acc:0.303
Epoch: 150. Train Loss: 1.048. Test Loss: 1.113. Train Acc: 0.4706. Test Acc:0.303
Epoch: 160. Train Loss: 0.9998. Test Loss: 1.098. Train Acc: 0.5882. Test Acc:0.303
Epoch: 170. Train Loss: 0.9616. Test Loss: 1.098. Train Acc: 0.6078. Test Acc:0.3333
Epoch: 180. Train Loss: 0.9468. Test Loss: 1.064. Train Acc: 0.5294. Test Acc:0.303
Epoch: 190. Train Loss: 0.9011. Test Loss: 1.033. Train Acc: 0.6078. Test Acc:0.3333
Epoch: 200. Train Loss: 0.8491. Test Loss: 1.041. Train Acc: 0.6078. Test Acc:0.3333
Epoch: 210. Train Loss: 0.7734. Test Loss: 0.9949. Train Acc: 0.6667. Test Acc:0.3333
Epoch: 220. Train Loss: 0.7631. Test Loss: 0.955. Train Acc: 0.6078. Test Acc:0.3333
Epoch: 230. Train Loss: 0.6699. Test Loss: 0.9269. Train Acc: 0.7255. Test Acc:0.3939
Epoch: 240. Train Loss: 0.6968. Test Loss: 0.9047. Train Acc: 0.7059. Test Acc:0.3939
Epoch: 250. Train Loss: 0.6478. Test Loss: 0.8859. Train Acc: 0.6275

Epoch: 10. Train Loss: 1.103. Test Loss: 1.127. Train Acc: 0.3922. Test Acc:0.2424
Epoch: 20. Train Loss: 1.088. Test Loss: 1.123. Train Acc: 0.3725. Test Acc:0.303
Epoch: 30. Train Loss: 1.094. Test Loss: 1.121. Train Acc: 0.3725. Test Acc:0.303
Epoch: 40. Train Loss: 1.101. Test Loss: 1.122. Train Acc: 0.3529. Test Acc:0.303
Epoch: 50. Train Loss: 1.094. Test Loss: 1.124. Train Acc: 0.3725. Test Acc:0.303
Epoch: 60. Train Loss: 1.093. Test Loss: 1.126. Train Acc: 0.3725. Test Acc:0.303
Epoch: 70. Train Loss: 1.064. Test Loss: 1.127. Train Acc: 0.4706. Test Acc:0.303
Epoch: 80. Train Loss: 1.078. Test Loss: 1.127. Train Acc: 0.3529. Test Acc:0.303
Epoch: 90. Train Loss: 1.084. Test Loss: 1.128. Train Acc: 0.3725. Test Acc:0.303
Epoch: 100. Train Loss: 1.081. Test Loss: 1.13. Train Acc: 0.4118. Test Acc:0.303
Epoch: 110. Train Loss: 1.086. Test Loss: 1.129. Train Acc: 0.3529. Test Acc:0.303
Epoch: 120. Train Loss: 1.087. Test Loss: 1.129. Train Acc: 0.4118. Test Acc:0.303
Epoch: 130. T

Epoch: 980. Train Loss: 0.2433. Test Loss: 1.489. Train Acc: 0.9216. Test Acc:0.5455
Epoch: 990. Train Loss: 0.2067. Test Loss: 1.335. Train Acc: 0.9608. Test Acc:0.5152
Epoch: 1000. Train Loss: 0.2504. Test Loss: 1.233. Train Acc: 0.902. Test Acc:0.5152
LangIdCNN_MeanStd_FCD using retrained-wav2vec_features-z on fold#5. Contrastive: False
-----------------------------------------------------------------------
      Layer (type)        Output Shape         Param #     Tr. Param #
          Conv1d-1       [10, 3, 2998]           1,539           1,539
          Conv1d-2       [10, 1, 2996]              10              10
         Dropout-3       [10, 1, 2996]               0               0
       AvgPool1d-4       [10, 1, 1498]               0               0
          Conv1d-5       [10, 3, 1496]              12              12
         Dropout-6       [10, 3, 1496]               0               0
       AvgPool1d-7        [10, 3, 748]               0               0
          Conv1d-8

In [None]:
# raw_features.keys()
# ['wav2vec_features-c', 'wav2vec_features-z', 'retrained-wav2vec_features-c', 'retrained-wav2vec_features-z']