In [1]:
from pathlib import Path
import csv
import itertools
import numpy as np
from pprint import PrettyPrinter


import torch
import torch.nn as nn
import torch.nn.functional as F
from pytorch_model_summary import summary
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim

import h5py

import sklearn
import sklearn.metrics

pprint = PrettyPrinter()

## Configuration

In [2]:
SELECTED_LANGUAGES = {'_language_independent', 'francais', 'maninka', 'pular', 'susu'}

BASE_DIR = Path('/media/xtrem/data/experiments/nicolingua-0002-va-asr/datasets/gn_va_asr_dataset_2020-08-24_02')


ANNOTATIONS_PATH = BASE_DIR/ "annotated_segments" / "metadata.csv"


FEATURE_NAMES = [
    "wav2vec_features-c", 
    "wav2vec_features-z", 
    "retrained-wav2vec_features-c", 
    "retrained-wav2vec_features-z"
]

# CONV_POOLING_TYPES = ['avg', 'max']
CONV_POOLING_TYPES = ['avg']
OBJECTIVE_TYPES = ['voice_cmd', 'voice_cmd__and__voice_cmd_lng']
CONV_DROPOUT_PROBABILITIES = [0.2]
FC_DROPOUT_PROBABILITIES = [0.2]


TRAIN_PERCENT = .7
FOLD_COUNT = 5


RESULTS_DIR = f'results_101'
EPOCHS = 1000
BATCH_SIZE = 512
MAX_FEATURE_SEQUENCE_LENGTH = 200

GPU_ID = 0
device = torch.device(f"cuda:{GPU_ID}")

## Load & shuffle metadata records

In [3]:
def load_metadata():
    with open(ANNOTATIONS_PATH) as f:
        reader = csv.DictReader(f)
        for r in reader:
            if r['language'] in SELECTED_LANGUAGES:
                yield r

In [4]:
# load
metadata_records = list(load_metadata())

# shuffle
metadata_shuffler_rs = np.random.RandomState(seed=42)
metadata_shuffler_rs.shuffle(metadata_records)

In [5]:
bias_category_fields = [
     "device_id"
    ,"language"
    ,"speaker_gender"
    ,"speaker_mothertongue"
]

bias_categories = {}
for c in bias_category_fields:
    bias_categories[c] = sorted({r[c] for r in metadata_records})


_ = [print(f"\n{k}: \n\t{','.join(v)}") for k,v in bias_categories.items()]


device_id: 
	d001,d002,d003

language: 
	_language_independent,francais,maninka,pular,susu

speaker_gender: 
	F,M

speaker_mothertongue: 
	maninka,pular,susu


### Labels

In [6]:
# VOICE COMMANDS
voice_cmd_class_names = sorted({r['label'] for r in metadata_records})
voice_cmd_class_count = len(voice_cmd_class_names)
voice_cmd_class_id_by_name = {c:i for i, c in enumerate(voice_cmd_class_names)}

print("Classes - Voice Commands")
_ = [print(f"{v:4}: {k}") for k,v in voice_cmd_class_id_by_name.items()]

print("---------------------")


# VOICE COMMAND LANGUAGES
voice_cmd_lng_class_names = sorted({r['language'] for r in metadata_records})
voice_cmd_lng_class_count = len(voice_cmd_lng_class_names)
voice_cmd_lng_class_id_by_name = {c:i for i, c in enumerate(voice_cmd_lng_class_names)}

print("Classes - Voice Command Languages")
_ = [print(f"{v:3}: {k}") for k,v in voice_cmd_lng_class_id_by_name.items()]

print("---------------------")



# SPEAKER MOTHERTONGUE
spkr_mothertongue_class_names = sorted({r['speaker_mothertongue'] for r in metadata_records})
spkr_mothertongue_class_count = len(spkr_mothertongue_class_names)
spkr_mothertongue_class_id_by_name = {c:i for i,c in enumerate(spkr_mothertongue_class_names)}

print("Classes - Speaker Mothertongues")
_ = [print(f"{v:3}: {k}") for k,v in spkr_mothertongue_class_id_by_name.items()]

print("---------------------")



# SPEAKER GENDER
spkr_gender_class_names = sorted({r['speaker_gender'] for r in metadata_records})
spkr_gender_class_count = len(spkr_gender_class_names)
spkr_gender_class_id_by_name = {c:i for i, c in enumerate(spkr_gender_class_names)}

print("Classes - Speaker Gender")
_ = [print(f"{v:3}: {k}") for k,v in spkr_gender_class_id_by_name.items()]

print("----------------------")



Classes - Voice Commands
   0: 101_wake_word__francais
   1: 101_wake_word__maninka
   2: 101_wake_word__pular
   3: 101_wake_word__susu
   4: 201_add_contact__francais
   5: 201_add_contact__maninka
   6: 201_add_contact__pular
   7: 201_add_contact__susu
   8: 202_search_contact__francais
   9: 202_search_contact__maninka
  10: 202_search_contact__pular
  11: 202_search_contact__susu
  12: 203_update_contact__francais
  13: 203_update_contact__maninka
  14: 203_update_contact__pular
  15: 203_update_contact__susu
  16: 204_delete_contact__francais
  17: 204_delete_contact__maninka
  18: 204_delete_contact__pular
  19: 204_delete_contact__susu
  20: 205_call_contact__francais
  21: 205_call_contact__maninka
  22: 205_call_contact__pular
  23: 205_call_contact__susu
  24: 206_yes__francais
  25: 206_yes__maninka
  26: 206_yes__pular
  27: 206_yes__susu
  28: 207_no__francais
  29: 207_no__maninka
  30: 207_no__pular
  31: 207_no__susu
  32: 301_zero__francais
  33: 301_zero__maninka
  

### Inspect metadata

In [7]:
def count_by_attribute(records, attribute_names):
    attribute_name_instances = {}
    for attribute_name in attribute_names:
        attribute_name_instances[attribute_name] = {r[attribute_name] for r in records}
        
    l = [attribute_name_instances[attribute_name] for attribute_name in attribute_names]
    
    
    
    for attribute_values in sorted(itertools.product(*l)):
        
        def record_match(r):
            for i in range(len(attribute_names)):
                if r[attribute_names[i]] != attribute_values[i]:
                    return False
            return True
            
        record_instances = [r for r in records if record_match(r)]
        count = len(record_instances)
        
        yield (attribute_values, count)

In [8]:
print("RECORDS BY DEVICE")
_ = [print(f"\t{r}") for r in sorted(count_by_attribute(metadata_records, ['device_id']))]
print("")

print("RECORDS BY LANGUAGE")
_ = [print(f"\t{r}") for r in sorted(count_by_attribute(metadata_records, ['language']))]
print("")

print("RECORDS BY GENDER")
_ = [print(f"\t{r}") for r in sorted(count_by_attribute(metadata_records, ['speaker_gender']))]
print("")

print("RECORDS BY AGE")
_ = [print(f"\t{r}") for r in sorted(count_by_attribute(metadata_records, ['speaker_age']))]
print("")

print("RECORDS BY SPEAKER")
_ = [print(f"\t{r}") for r in sorted(count_by_attribute(metadata_records, ['speaker_id']))]
print("")

print("RECORDS BY SPEAKER BY LANGUAGE")
_ = [print(f"\t{r}") for r in sorted(count_by_attribute(metadata_records, ['speaker_id', 'language']))]
print("")

print("RECORDS BY SPEAKER BY LABEL")
_ = [print(f"\t{r}") for r in sorted(count_by_attribute(metadata_records, ['label']))]
print("")

RECORDS BY DEVICE
	(('d001',), 2759)
	(('d002',), 2741)
	(('d003',), 2759)

RECORDS BY LANGUAGE
	(('_language_independent',), 3072)
	(('francais',), 1260)
	(('maninka',), 1356)
	(('pular',), 909)
	(('susu',), 1662)

RECORDS BY GENDER
	(('F',), 2820)
	(('M',), 5439)

RECORDS BY AGE
	(('12',), 237)
	(('13',), 252)
	(('15',), 603)
	(('17',), 1050)
	(('18',), 855)
	(('19',), 273)
	(('20',), 291)
	(('27',), 255)
	(('28',), 183)
	(('29',), 237)
	(('31',), 255)
	(('32',), 291)
	(('33',), 183)
	(('34',), 129)
	(('35',), 498)
	(('37',), 309)
	(('38',), 441)
	(('43',), 183)
	(('44',), 540)
	(('5',), 129)
	(('55',), 183)
	(('61',), 390)
	(('63',), 237)
	(('67',), 255)

RECORDS BY SPEAKER
	(('s001',), 183)
	(('s002',), 129)
	(('s003',), 183)
	(('s004',), 183)
	(('s005',), 129)
	(('s006',), 129)
	(('s007',), 183)
	(('s008',), 129)
	(('s009',), 237)
	(('s010',), 291)
	(('s011',), 129)
	(('s012',), 183)
	(('s013',), 129)
	(('s014',), 237)
	(('s015',), 291)
	(('s016',), 183)
	(('s017',), 237)
	(('s018

## Prepare Cross Validation Folds
- Partition by (speaker, language)
- Each (speaker, language) correspond to `utterance_count * device_count`
- For each fold, all `utterance_count * device_count` records for the same speaker in the same language are either in the TRAIN or the VALIDATION sets, but not both.


In [9]:
def generate_train_test_records_per_fold(all_records):
    records_per_fold = {}
    
    all_speaker_languages = sorted({(r['speaker_id'], r['language']) for r in all_records})

    sl_count = len(all_speaker_languages)
    all_sl_indices = range(sl_count)
    train_sl_count = int(np.ceil(sl_count*TRAIN_PERCENT))
    test_sl_count = sl_count - train_sl_count

    for fold_index in range(FOLD_COUNT):
        fold_rsampler = np.random.RandomState(seed=fold_index)

        train_sl_index_set = set(fold_rsampler.choice(all_sl_indices, train_sl_count, replace=False))
        train_sl_set = {all_speaker_languages[i] for i in train_sl_index_set}

        test_sl_index_set = set(all_sl_indices).difference(train_sl_index_set)
        test_sl_set = {all_speaker_languages[i] for i in test_sl_index_set}

        train_records = [r for r in all_records if (r['speaker_id'], r['language']) in train_sl_set]
        test_records = [r for r in all_records if (r['speaker_id'], r['language']) in test_sl_set]
        
        
        records_per_fold[fold_index] = {
            "train_records": train_records,
            "test_records": test_records
        }
    
    return records_per_fold

In [10]:
records_per_fold = generate_train_test_records_per_fold(metadata_records)

### Inspect Folds

In [11]:
for fold_index in range(FOLD_COUNT):
    train_records = records_per_fold[fold_index]["train_records"]
    test_records = records_per_fold[fold_index]["test_records"]

    print(f"Fold {fold_index} -- TRAIN")
    _ = [print(f"\t{r}") for r in sorted(count_by_attribute(train_records, ['speaker_id', 'language'])) if r[1]>0]

    print(f"Fold {fold_index} -- TEST")
    _ = [print(f"\t{r}") for r in sorted(count_by_attribute(test_records, ['speaker_id', 'language'])) if r[1]>0]
    
    print("---------------------")

Fold 0 -- TRAIN
	(('s001', '_language_independent'), 75)
	(('s001', 'maninka'), 54)
	(('s001', 'susu'), 54)
	(('s002', '_language_independent'), 75)
	(('s002', 'maninka'), 54)
	(('s003', '_language_independent'), 75)
	(('s003', 'maninka'), 54)
	(('s003', 'susu'), 54)
	(('s004', '_language_independent'), 75)
	(('s004', 'susu'), 54)
	(('s005', '_language_independent'), 75)
	(('s006', '_language_independent'), 75)
	(('s006', 'maninka'), 54)
	(('s007', '_language_independent'), 75)
	(('s007', 'pular'), 54)
	(('s007', 'susu'), 54)
	(('s008', '_language_independent'), 75)
	(('s008', 'maninka'), 54)
	(('s009', 'pular'), 54)
	(('s009', 'susu'), 54)
	(('s010', '_language_independent'), 75)
	(('s010', 'maninka'), 54)
	(('s010', 'pular'), 54)
	(('s010', 'susu'), 54)
	(('s011', '_language_independent'), 75)
	(('s011', 'susu'), 54)
	(('s012', '_language_independent'), 75)
	(('s012', 'francais'), 54)
	(('s012', 'susu'), 54)
	(('s013', '_language_independent'), 75)
	(('s013', 'susu'), 54)
	(('s014', 

	(('s002', 'maninka'), 54)
	(('s003', '_language_independent'), 75)
	(('s003', 'maninka'), 54)
	(('s004', '_language_independent'), 75)
	(('s004', 'maninka'), 54)
	(('s005', '_language_independent'), 75)
	(('s005', 'susu'), 54)
	(('s006', '_language_independent'), 75)
	(('s007', '_language_independent'), 75)
	(('s007', 'pular'), 54)
	(('s007', 'susu'), 54)
	(('s008', '_language_independent'), 75)
	(('s009', 'susu'), 54)
	(('s010', 'francais'), 54)
	(('s010', 'pular'), 54)
	(('s010', 'susu'), 54)
	(('s011', 'susu'), 54)
	(('s012', '_language_independent'), 75)
	(('s012', 'francais'), 54)
	(('s012', 'susu'), 54)
	(('s013', '_language_independent'), 75)
	(('s013', 'susu'), 54)
	(('s014', '_language_independent'), 75)
	(('s014', 'francais'), 54)
	(('s015', '_language_independent'), 75)
	(('s015', 'maninka'), 54)
	(('s015', 'pular'), 54)
	(('s016', '_language_independent'), 75)
	(('s016', 'maninka'), 54)
	(('s016', 'susu'), 54)
	(('s017', '_language_independent'), 75)
	(('s017', 'francais')

In [12]:
for fold_index in range(FOLD_COUNT):
    train_records = records_per_fold[fold_index]["train_records"]
    test_records = records_per_fold[fold_index]["test_records"]

    print(f"Fold {fold_index} -- TRAIN: ({len(train_records)})")
    _ = [print(f"\t{r}") for r in sorted(count_by_attribute(train_records, ['language'])) if r[1]>0]

    print(f"Fold {fold_index} -- TEST: ({len(test_records)})")
    _ = [print(f"\t{r}") for r in sorted(count_by_attribute(test_records, ['language'])) if r[1]>0]
    
    print("---------------------")

Fold 0 -- TRAIN: (6018)
	(('_language_independent',), 2400)
	(('francais',), 738)
	(('maninka',), 960)
	(('pular',), 852)
	(('susu',), 1068)
Fold 0 -- TEST: (2241)
	(('_language_independent',), 672)
	(('francais',), 522)
	(('maninka',), 396)
	(('pular',), 57)
	(('susu',), 594)
---------------------
Fold 1 -- TRAIN: (5940)
	(('_language_independent',), 2325)
	(('francais',), 978)
	(('maninka',), 906)
	(('pular',), 519)
	(('susu',), 1212)
Fold 1 -- TEST: (2319)
	(('_language_independent',), 747)
	(('francais',), 282)
	(('maninka',), 450)
	(('pular',), 390)
	(('susu',), 450)
---------------------
Fold 2 -- TRAIN: (6036)
	(('_language_independent',), 2397)
	(('francais',), 972)
	(('maninka',), 912)
	(('pular',), 573)
	(('susu',), 1182)
Fold 2 -- TEST: (2223)
	(('_language_independent',), 675)
	(('francais',), 288)
	(('maninka',), 444)
	(('pular',), 336)
	(('susu',), 480)
---------------------
Fold 3 -- TRAIN: (5796)
	(('_language_independent',), 2097)
	(('francais',), 918)
	(('maninka',), 

## Load Features

In [13]:
def load_features(records, feature_name):
    features_list = []
    
    features_input_dir = BASE_DIR / feature_name

    for r in records:
        feature_file_name = r['file'].replace(".wav", ".h5context")
        feature_path = Path(features_input_dir) / feature_file_name
        with h5py.File(feature_path, 'r') as f:
            features_shape = f['info'][1:].astype(int)
            features = np.array(f['features'][:]).reshape(features_shape)
            
            padded_features = np.zeros((MAX_FEATURE_SEQUENCE_LENGTH, 512), dtype=features.dtype)
            padded_features[:features_shape[0], :] = features
            
            
            features_list.append(padded_features)
    return features_list

In [14]:
def get_bias_category_labels(records):
    bias_category_labels = {}
    
    for cat in bias_categories:
        for cat_val in bias_categories[cat]:
            bias_category_labels[f"{cat}__{cat_val}"] = [1 if r[cat]==cat_val else 0 for r in records]
            
    return bias_category_labels

# Classification Models

In [15]:
class ASRCNN(nn.Module):
    def __init__(self, 
                 conv_pooling_type, 
                 conv_dropout_p, 
                 fc_dropout_p, 
                 voice_cmd_neuron_count, 
                 voice_cmd_lng_neuron_count,
                 objective_type
                ):
        
        super(ASRCNN, self).__init__()
        
        if conv_pooling_type not in {"max", "avg"}:
            raise ValueError(f"Unknown Conv Pooling Type: {conv_pooling_type}")
            
        conv_pooling_class_by_type = {
            "max": nn.MaxPool1d,
            "avg": nn.AvgPool1d,
        }
        
        conv_pooling_class = conv_pooling_class_by_type[conv_pooling_type]
        
        self.objective_type = objective_type
        
        self.conv0 = nn.Conv1d(in_channels=512, out_channels=8, kernel_size=1)
        
        self.conv1 = nn.Conv1d(in_channels=8, out_channels=8, kernel_size=3)
        self.drop1 = nn.Dropout(p=conv_dropout_p)
        self.pool1 = conv_pooling_class(kernel_size=2, stride=2)
        
        self.conv2 = nn.Conv1d(in_channels=8, out_channels=16, kernel_size=3)
        self.drop2 = nn.Dropout(p=conv_dropout_p)
        self.pool2 = conv_pooling_class(kernel_size=2, stride=2)
        
        self.conv3 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=3)
        self.drop3 = nn.Dropout(p=conv_dropout_p)
        self.pool3 = conv_pooling_class(kernel_size=2, stride=2)

        self.conv4 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3)
        self.drop4 = nn.Dropout(p=conv_dropout_p)
        self.pool4 = conv_pooling_class(kernel_size=2, stride=2)
        
        self.drop5 = nn.Dropout(p=fc_dropout_p)
        
        self.lin61 = nn.Linear(in_features=112, out_features=voice_cmd_neuron_count)
        
        # 'voice_cmd', 'voice_cmd__and__voice_cmd_lng'
        if self.objective_type == 'voice_cmd__and__voice_cmd_lng':
            self.lin62 = nn.Linear(in_features=112, out_features=voice_cmd_lng_neuron_count)
                
    def forward(self, x):
        x = x.permute(0, 2, 1)
        x = self.conv0(x)
        
        x = self.conv1(x)
        x = F.elu(x)
        x = self.drop1(x)
        x = self.pool1(x)
        
        
        x = self.conv2(x)
        x = F.elu(x)
        x = self.drop2(x)
        x = self.pool2(x)
        
        v1 = torch.mean(x, dim=2)
        
        x = self.conv3(x)
        x = F.elu(x)
        x = self.drop3(x)
        x = self.pool3(x)
        
        v2 = torch.mean(x, dim=2)
        
        x = self.conv4(x)
        x = F.elu(x)
        x = self.drop4(x)
        x = self.pool4(x)
        
        v3 = torch.mean(x, dim=2)
        
        v = torch.cat((v1, v2, v3), axis=1)
        v = self.drop5(v)
        
        if self.objective_type == 'voice_cmd':
            logits_voice_cmd = self.lin61(v)
            return logits_voice_cmd
        elif self.objective_type == 'voice_cmd__and__voice_cmd_lng':
            logits_voice_cmd = self.lin61(v)
            logits_voice_cmd_lng = self.lin62(v)
            return logits_voice_cmd, logits_voice_cmd_lng
        else:
            raise(f"Unknown objective type: {self.objective_type}")

In [16]:
def get_data_for_fold(fold_id, feature_name):
    
    train_records = records_per_fold[fold_id]["train_records"]
    test_records = records_per_fold[fold_id]["test_records"]
    
    train_features = load_features(train_records, feature_name)
    test_features = load_features(test_records, feature_name)
    
    train_x = np.array(train_features)
    test_x = np.array(test_features)
    
    train_y = {}
    train_y['voice_cmd'] = np.array([voice_cmd_class_id_by_name[r['label']] for r in train_records])
    train_y['voice_cmd_lng'] = np.array([voice_cmd_lng_class_id_by_name[r['language']] for r in train_records])
    train_y['spkr_mothertongue'] = np.array([spkr_mothertongue_class_id_by_name[r['speaker_mothertongue']] for r in train_records])
    train_y['spkr_gender'] = np.array([spkr_gender_class_id_by_name[r['speaker_gender']] for r in train_records])
    
    

    
    test_y = {}
    test_y['voice_cmd'] = np.array([voice_cmd_class_id_by_name[r['label']] for r in test_records])
    test_y['voice_cmd_lng'] = np.array([voice_cmd_lng_class_id_by_name[r['language']] for r in test_records])
    test_y['spkr_mothertongue'] = np.array([spkr_mothertongue_class_id_by_name[r['speaker_mothertongue']] for r in test_records])
    test_y['spkr_gender'] = np.array([spkr_gender_class_id_by_name[r['speaker_gender']] for r in test_records])

    train_bias_category_labels = get_bias_category_labels(train_records)
    test_bias_category_labels = get_bias_category_labels(test_records)
    
    return train_x, train_y, test_x, test_y, train_bias_category_labels, test_bias_category_labels

    
def get_loaders_for_fold(fold_id, feature_name, batch_size):
    
    train_x, train_y, test_x, test_y, train_bias_category_labels, test_bias_category_labels = \
        get_data_for_fold(fold_id, feature_name)
    
    
    
    train_dataset = TensorDataset(
        torch.tensor(train_x), 
        torch.tensor(train_y['voice_cmd']),
        torch.tensor(train_y['voice_cmd_lng']),
        # torch.tensor(train_y['spkr_mothertongue']),
        # torch.tensor(train_y['spkr_gender']),
    )

    train_loader = DataLoader(train_dataset, batch_size=batch_size)

    test_dataset = TensorDataset(
        torch.tensor(test_x), 
        torch.tensor(test_y['voice_cmd']),
        torch.tensor(test_y['voice_cmd_lng']),
        # torch.tensor(test_y['spkr_mothertongue']),
        # torch.tensor(test_y['spkr_gender']),
    )

    test_loader = DataLoader(test_dataset, batch_size=batch_size)
    
    return train_loader, test_loader, train_bias_category_labels, test_bias_category_labels


def get_predictions_for_logits(logits):
    probs = F.softmax(logits, dim=1)
    return torch.argmax(probs, dim=1)

In [17]:
def train(model, optimizer, criterion, objective_type, train_loader):
    model.train()
    train_loss = 0

    for batch_idx, (x, y_voice_cmd, y_voice_cmd_lng) in enumerate(train_loader):
        x = x.to(device)
        y_voice_cmd = y_voice_cmd.to(device)
        y_voice_cmd_lng = y_voice_cmd_lng.to(device)

        optimizer.zero_grad()
        outputs = model(x)

        if objective_type == 'voice_cmd':
            logits_voice_cmd = outputs
            loss = criterion(logits_voice_cmd, y_voice_cmd)
        elif objective_type == 'voice_cmd__and__voice_cmd_lng':
            logits_voice_cmd, logits_voice_cmd_lng = outputs    
            loss = (criterion(logits_voice_cmd, y_voice_cmd) + criterion(logits_voice_cmd_lng, y_voice_cmd_lng)) / 2
            
        else:
            raise ValueError(f"Unknown objective type: {objective_type}")

        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        

def test(model, criterion, objective_type, loader, bias_category_labels):
    model.eval()
    accumulated_loss = 0

    pred_classes = []
    true_classes = []

    pred_classes_lng = []
    true_classes_lng = []

    for batch_idx, (x, y_voice_cmd, y_voice_cmd_lng) in enumerate(loader):
        x = x.to(device)
        y_voice_cmd = y_voice_cmd.to(device)
        y_voice_cmd_lng = y_voice_cmd_lng.to(device)

        outputs = model(x)

        if objective_type == 'voice_cmd':
            logits_voice_cmd = outputs

            pred_classes.extend(
                get_predictions_for_logits(logits_voice_cmd).cpu().numpy()
            )
            true_classes.extend(y_voice_cmd.cpu().numpy())

            loss = criterion(logits_voice_cmd, y_voice_cmd)
        elif objective_type == 'voice_cmd__and__voice_cmd_lng':
            logits_voice_cmd, logits_voice_cmd_lng = outputs
            pred_classes.extend(
                get_predictions_for_logits(logits_voice_cmd).cpu().numpy()
            )
            true_classes.extend(y_voice_cmd.cpu().numpy())

            pred_classes_lng.extend(
                get_predictions_for_logits(logits_voice_cmd_lng).cpu().numpy()
            )
            true_classes_lng.extend(y_voice_cmd_lng.cpu().numpy())

            loss = (criterion(logits_voice_cmd, y_voice_cmd) + criterion(logits_voice_cmd_lng, y_voice_cmd_lng)) /2
        else:
            raise ValueError(f"Unknown objective type: {objective_type}")

        accumulated_loss += loss.item()

    n = len(true_classes)

    average_loss = accumulated_loss/n
    
    acc = sklearn.metrics.accuracy_score(true_classes, pred_classes)
    acc_by_bais_category = {
        category: sklearn.metrics.accuracy_score(true_classes, pred_classes, sample_weight=sw)
        for category, sw in bias_category_labels.items()
    }
    
    
    if objective_type == 'voice_cmd__and__voice_cmd_lng':
        acc_lng = sklearn.metrics.accuracy_score(true_classes_lng, pred_classes_lng)
        acc_by_bais_category_lng = {
            category: sklearn.metrics.accuracy_score(true_classes_lng, pred_classes_lng, sample_weight=sw)
            for category, sw in bias_category_labels.items()
        }
    else:
        acc_lng = -1
        acc_by_bais_category_lng = {
            category: -1
            for category, sw in bias_category_labels.items()
        }
        
    return n, average_loss, acc, acc_by_bais_category, acc_lng, acc_by_bais_category_lng
      
        
def train_on_fold(model, fold_id, feature_name, objective_type, batch_size, epochs):
    torch.manual_seed(0)
    results = {}
    
    train_loader, test_loader, train_bias_category_labels, test_bias_category_labels = get_loaders_for_fold(fold_id, feature_name, batch_size)

    print(summary(model, torch.zeros((10, MAX_FEATURE_SEQUENCE_LENGTH, 512)).to(device), show_input=False))
    print(f"train_n: {len(train_loader.dataset)}")
    print(f"test_n: {len(test_loader.dataset)}")

    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss(reduction='sum')

    for epoch in range(1, epochs+1):
        
        # train on training set
        train(model, optimizer, criterion, objective_type, train_loader)
        
        # test on training set
        train_n, train_average_loss, train_acc, train_acc_by_bais_category, train_acc_lng, train_acc_by_bais_category_lng = \
            test(model, criterion, objective_type, train_loader, train_bias_category_labels)
        
        # test on test set
        test_n, test_average_loss, test_acc, test_acc_by_bais_category, test_acc_lng, test_acc_by_bais_category_lng = \
            test(model, criterion, objective_type, test_loader, test_bias_category_labels)
        

        if epoch%10==0:
            print(f"Epoch: {epoch}. Train Loss: {train_average_loss:0.4}. Test Loss: {test_average_loss:0.4}. Train Acc: {train_acc:0.4}. Test Acc:{test_acc:0.4}")
        
         
        results[epoch] = {
            'epoch': epoch,
            
            'train_n': train_n,
            'train_loss': train_average_loss,
            'train_acc': train_acc,
            'train_acc_lng': train_acc_lng,
            
            'test_n': test_n,
            'test_loss': test_average_loss,
            'test_acc': test_acc,
            'test_acc_lng': test_acc_lng
        }
        
        for c in train_acc_by_bais_category:
            results[epoch][f"train_acc_{c}"] = train_acc_by_bais_category[c]
            results[epoch][f"train_n_{c}"] = int(np.sum(train_bias_category_labels[c]))
            
        for c in train_acc_by_bais_category_lng:
            results[epoch][f"train_acc_lng_{c}"] = train_acc_by_bais_category_lng[c]
            
            
        for c in test_acc_by_bais_category:
            results[epoch][f"test_acc_{c}"] = test_acc_by_bais_category[c]
            results[epoch][f"test_n_{c}"] = int(np.sum(test_bias_category_labels[c]))

        for c in test_acc_by_bais_category_lng:
            results[epoch][f"test_acc_lng_{c}"] = test_acc_by_bais_category_lng[c]
            

    return results

In [18]:
import csv
from pathlib import Path

def results_exist(model_name, feature_name, fold_id):
    fname = f"{RESULTS_DIR}/{model_name}/{feature_name}_{fold_id}.csv"
    return Path(fname).is_file()
    
    

def save_results(model_name, all_folds_results):
    for result_entry in all_folds_results:
        feature_name = result_entry['feature_name']
        fold_index = result_entry['fold_index']
        
        Path(RESULTS_DIR).mkdir(exist_ok=True, parents=True)
        fname = f"{RESULTS_DIR}/{model_name}/{feature_name}_{fold_index}.csv"
        Path(fname).parent.mkdir(parents=True, exist_ok=True)
        with open(fname, 'w') as f:
            fieldnames = sorted(result_entry['epochs'][1].keys())
            
            writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction='raise')
            
            writer.writeheader()
            
            for epoch in sorted(result_entry['epochs'].keys()):
                writer.writerow(result_entry['epochs'][epoch])

In [None]:
trial_params = list(
    itertools.product(
        range(FOLD_COUNT),
        CONV_POOLING_TYPES,
        CONV_DROPOUT_PROBABILITIES,
        FC_DROPOUT_PROBABILITIES,
        FEATURE_NAMES, 
        OBJECTIVE_TYPES
    )
)    

print("Plan:")
_ = [print(t) for t in trial_params]
print()

for fold_id, conv_pooling_type, conv_dropout_p, fc_dropout_p, feature_name, objective_type in trial_params:

    model_name = f"ASRCNN" + \
    f"__conv_pool_{conv_pooling_type}" + \
    f"__conv_dp_{conv_dropout_p}" + \
    f"__fc_dp_{fc_dropout_p}" + \
    f"__fn_{feature_name}" + \
    f"__obj_{objective_type}"
    
    if results_exist(model_name, feature_name, fold_id):
        print(f"skipping ({fold_id}, {conv_pooling_type}, {conv_dropout_p}, {fc_dropout_p}, {feature_name}, {objective_type})")
        continue
    
    model = ASRCNN(
        conv_pooling_type, 
        conv_dropout_p, 
        fc_dropout_p, 
        voice_cmd_neuron_count = voice_cmd_class_count, 
        voice_cmd_lng_neuron_count = voice_cmd_lng_class_count,
        objective_type = objective_type
    ).to(device)

    print(f"{model_name} using {feature_name} on fold#{fold_id}")

    epochs_results = train_on_fold(
        model, 
        fold_id, 
        feature_name, 
        objective_type, 
        batch_size = BATCH_SIZE, 
        epochs = EPOCHS
    )

    # results for only one fold
    folds_results = [{
        'fold_index': fold_id,
        'feature_name': feature_name,
        'epochs': epochs_results
    }]
    
    
    save_results(model_name, folds_results)
    # write_epoch_test_logits(model_name, all_folds_results)

    del model

Plan:
(0, 'avg', 0.2, 0.2, 'wav2vec_features-c', 'voice_cmd')
(0, 'avg', 0.2, 0.2, 'wav2vec_features-c', 'voice_cmd__and__voice_cmd_lng')
(0, 'avg', 0.2, 0.2, 'wav2vec_features-z', 'voice_cmd')
(0, 'avg', 0.2, 0.2, 'wav2vec_features-z', 'voice_cmd__and__voice_cmd_lng')
(0, 'avg', 0.2, 0.2, 'retrained-wav2vec_features-c', 'voice_cmd')
(0, 'avg', 0.2, 0.2, 'retrained-wav2vec_features-c', 'voice_cmd__and__voice_cmd_lng')
(0, 'avg', 0.2, 0.2, 'retrained-wav2vec_features-z', 'voice_cmd')
(0, 'avg', 0.2, 0.2, 'retrained-wav2vec_features-z', 'voice_cmd__and__voice_cmd_lng')
(1, 'avg', 0.2, 0.2, 'wav2vec_features-c', 'voice_cmd')
(1, 'avg', 0.2, 0.2, 'wav2vec_features-c', 'voice_cmd__and__voice_cmd_lng')
(1, 'avg', 0.2, 0.2, 'wav2vec_features-z', 'voice_cmd')
(1, 'avg', 0.2, 0.2, 'wav2vec_features-z', 'voice_cmd__and__voice_cmd_lng')
(1, 'avg', 0.2, 0.2, 'retrained-wav2vec_features-c', 'voice_cmd')
(1, 'avg', 0.2, 0.2, 'retrained-wav2vec_features-c', 'voice_cmd__and__voice_cmd_lng')
(1, 'avg',

Epoch: 450. Train Loss: 0.1359. Test Loss: 0.8786. Train Acc: 0.9691. Test Acc:0.8046
Epoch: 460. Train Loss: 0.1328. Test Loss: 0.8589. Train Acc: 0.9696. Test Acc:0.8072
Epoch: 470. Train Loss: 0.1242. Test Loss: 0.875. Train Acc: 0.9736. Test Acc:0.8072
Epoch: 480. Train Loss: 0.1203. Test Loss: 0.8746. Train Acc: 0.9727. Test Acc:0.8019
Epoch: 490. Train Loss: 0.1214. Test Loss: 0.868. Train Acc: 0.9729. Test Acc:0.813
Epoch: 500. Train Loss: 0.1176. Test Loss: 0.8744. Train Acc: 0.9724. Test Acc:0.809
Epoch: 510. Train Loss: 0.1099. Test Loss: 0.8872. Train Acc: 0.9744. Test Acc:0.8068
Epoch: 520. Train Loss: 0.1149. Test Loss: 0.8826. Train Acc: 0.9726. Test Acc:0.8059
Epoch: 530. Train Loss: 0.1062. Test Loss: 0.9073. Train Acc: 0.9744. Test Acc:0.805
Epoch: 540. Train Loss: 0.1036. Test Loss: 0.8947. Train Acc: 0.9767. Test Acc:0.8095
Epoch: 550. Train Loss: 0.105. Test Loss: 0.8989. Train Acc: 0.9732. Test Acc:0.8041
Epoch: 560. Train Loss: 0.1071. Test Loss: 0.921. Train Acc:

Epoch: 210. Train Loss: 0.525. Test Loss: 0.9732. Train Acc: 0.8955. Test Acc:0.7149
Epoch: 220. Train Loss: 0.502. Test Loss: 0.9614. Train Acc: 0.8988. Test Acc:0.7202
Epoch: 230. Train Loss: 0.483. Test Loss: 0.9735. Train Acc: 0.9033. Test Acc:0.73
Epoch: 240. Train Loss: 0.4691. Test Loss: 0.9343. Train Acc: 0.9041. Test Acc:0.7269
Epoch: 250. Train Loss: 0.4434. Test Loss: 0.9372. Train Acc: 0.9119. Test Acc:0.7287
Epoch: 260. Train Loss: 0.4256. Test Loss: 0.9437. Train Acc: 0.9154. Test Acc:0.7345
Epoch: 270. Train Loss: 0.409. Test Loss: 0.9241. Train Acc: 0.9217. Test Acc:0.7394
Epoch: 280. Train Loss: 0.4045. Test Loss: 0.9417. Train Acc: 0.9181. Test Acc:0.7367
Epoch: 290. Train Loss: 0.3846. Test Loss: 0.9167. Train Acc: 0.9239. Test Acc:0.7412
Epoch: 300. Train Loss: 0.3675. Test Loss: 0.9127. Train Acc: 0.9284. Test Acc:0.7479
Epoch: 310. Train Loss: 0.3529. Test Loss: 0.9006. Train Acc: 0.9317. Test Acc:0.751
Epoch: 320. Train Loss: 0.3371. Test Loss: 0.909. Train Acc: 

Epoch: 10. Train Loss: 3.971. Test Loss: 4.063. Train Acc: 0.07178. Test Acc:0.05667
Epoch: 20. Train Loss: 3.313. Test Loss: 3.489. Train Acc: 0.1401. Test Acc:0.1124
Epoch: 30. Train Loss: 3.024. Test Loss: 3.242. Train Acc: 0.2203. Test Acc:0.1798
Epoch: 40. Train Loss: 2.716. Test Loss: 2.998. Train Acc: 0.2941. Test Acc:0.2414
Epoch: 50. Train Loss: 2.394. Test Loss: 2.729. Train Acc: 0.3853. Test Acc:0.3048
Epoch: 60. Train Loss: 2.113. Test Loss: 2.484. Train Acc: 0.455. Test Acc:0.3606
Epoch: 70. Train Loss: 1.885. Test Loss: 2.302. Train Acc: 0.5113. Test Acc:0.4061
Epoch: 80. Train Loss: 1.689. Test Loss: 2.115. Train Acc: 0.5641. Test Acc:0.4547
Epoch: 90. Train Loss: 1.537. Test Loss: 1.979. Train Acc: 0.6017. Test Acc:0.4873
Epoch: 100. Train Loss: 1.386. Test Loss: 1.842. Train Acc: 0.6442. Test Acc:0.5243
Epoch: 110. Train Loss: 1.281. Test Loss: 1.748. Train Acc: 0.6728. Test Acc:0.5556
Epoch: 120. Train Loss: 1.17. Test Loss: 1.65. Train Acc: 0.7094. Test Acc:0.585
Epo

Epoch: 980. Train Loss: 0.09183. Test Loss: 1.021. Train Acc: 0.9779. Test Acc:0.7849
Epoch: 990. Train Loss: 0.08735. Test Loss: 1.042. Train Acc: 0.9776. Test Acc:0.784
Epoch: 1000. Train Loss: 0.08658. Test Loss: 1.067. Train Acc: 0.9801. Test Acc:0.7769
ASRCNN__conv_pool_avg__conv_dp_0.2__fc_dp_0.2__fn_wav2vec_features-z__obj_voice_cmd__and__voice_cmd_lng using wav2vec_features-z on fold#0
-----------------------------------------------------------------------
      Layer (type)        Output Shape         Param #     Tr. Param #
          Conv1d-1        [10, 8, 200]           4,104           4,104
          Conv1d-2        [10, 8, 198]             200             200
         Dropout-3        [10, 8, 198]               0               0
       AvgPool1d-4         [10, 8, 99]               0               0
          Conv1d-5        [10, 16, 97]             400             400
         Dropout-6        [10, 16, 97]               0               0
       AvgPool1d-7        [10, 16,

Epoch: 750. Train Loss: 0.2346. Test Loss: 1.038. Train Acc: 0.9556. Test Acc:0.7305
Epoch: 760. Train Loss: 0.2392. Test Loss: 1.014. Train Acc: 0.9566. Test Acc:0.7358
Epoch: 770. Train Loss: 0.2247. Test Loss: 1.031. Train Acc: 0.9595. Test Acc:0.734
Epoch: 780. Train Loss: 0.2445. Test Loss: 1.044. Train Acc: 0.9508. Test Acc:0.734
Epoch: 790. Train Loss: 0.2374. Test Loss: 1.059. Train Acc: 0.9553. Test Acc:0.7349
Epoch: 800. Train Loss: 0.2234. Test Loss: 1.056. Train Acc: 0.9573. Test Acc:0.739
Epoch: 810. Train Loss: 0.2248. Test Loss: 1.075. Train Acc: 0.9573. Test Acc:0.7403
Epoch: 820. Train Loss: 0.2215. Test Loss: 1.098. Train Acc: 0.9588. Test Acc:0.7349
Epoch: 830. Train Loss: 0.2199. Test Loss: 1.102. Train Acc: 0.9571. Test Acc:0.7309
Epoch: 840. Train Loss: 0.2202. Test Loss: 1.129. Train Acc: 0.9571. Test Acc:0.722
Epoch: 850. Train Loss: 0.2112. Test Loss: 1.077. Train Acc: 0.9621. Test Acc:0.7398
Epoch: 860. Train Loss: 0.2027. Test Loss: 1.051. Train Acc: 0.9608. 

Epoch: 530. Train Loss: 0.3034. Test Loss: 1.155. Train Acc: 0.9282. Test Acc:0.7202
Epoch: 540. Train Loss: 0.3118. Test Loss: 1.156. Train Acc: 0.9237. Test Acc:0.7242
Epoch: 550. Train Loss: 0.2857. Test Loss: 1.137. Train Acc: 0.9317. Test Acc:0.7233
Epoch: 560. Train Loss: 0.2831. Test Loss: 1.146. Train Acc: 0.9324. Test Acc:0.7278
Epoch: 570. Train Loss: 0.2761. Test Loss: 1.154. Train Acc: 0.9332. Test Acc:0.7323
Epoch: 580. Train Loss: 0.2662. Test Loss: 1.131. Train Acc: 0.9347. Test Acc:0.7372
Epoch: 590. Train Loss: 0.2641. Test Loss: 1.128. Train Acc: 0.9364. Test Acc:0.7345
Epoch: 600. Train Loss: 0.2547. Test Loss: 1.136. Train Acc: 0.9402. Test Acc:0.7416
Epoch: 610. Train Loss: 0.2514. Test Loss: 1.141. Train Acc: 0.9389. Test Acc:0.7398
Epoch: 620. Train Loss: 0.2426. Test Loss: 1.143. Train Acc: 0.9418. Test Acc:0.7332
Epoch: 630. Train Loss: 0.2372. Test Loss: 1.142. Train Acc: 0.9412. Test Acc:0.7421
Epoch: 640. Train Loss: 0.2319. Test Loss: 1.15. Train Acc: 0.942

Epoch: 300. Train Loss: 0.5897. Test Loss: 1.16. Train Acc: 0.8691. Test Acc:0.6809
Epoch: 310. Train Loss: 0.5753. Test Loss: 1.142. Train Acc: 0.8692. Test Acc:0.6841
Epoch: 320. Train Loss: 0.5614. Test Loss: 1.142. Train Acc: 0.8744. Test Acc:0.6845
Epoch: 330. Train Loss: 0.5458. Test Loss: 1.134. Train Acc: 0.8817. Test Acc:0.6948
Epoch: 340. Train Loss: 0.5267. Test Loss: 1.146. Train Acc: 0.8855. Test Acc:0.6859
Epoch: 350. Train Loss: 0.5123. Test Loss: 1.134. Train Acc: 0.8888. Test Acc:0.6903
Epoch: 360. Train Loss: 0.5002. Test Loss: 1.135. Train Acc: 0.8943. Test Acc:0.6934
Epoch: 370. Train Loss: 0.4897. Test Loss: 1.127. Train Acc: 0.8956. Test Acc:0.6975
Epoch: 380. Train Loss: 0.47. Test Loss: 1.14. Train Acc: 0.8995. Test Acc:0.6917
Epoch: 390. Train Loss: 0.4632. Test Loss: 1.126. Train Acc: 0.9031. Test Acc:0.6934
Epoch: 400. Train Loss: 0.45. Test Loss: 1.126. Train Acc: 0.9069. Test Acc:0.7019
Epoch: 410. Train Loss: 0.4378. Test Loss: 1.118. Train Acc: 0.9109. Te

Epoch: 80. Train Loss: 1.592. Test Loss: 2.043. Train Acc: 0.5931. Test Acc:0.4793
Epoch: 90. Train Loss: 1.391. Test Loss: 1.879. Train Acc: 0.6462. Test Acc:0.519
Epoch: 100. Train Loss: 1.221. Test Loss: 1.727. Train Acc: 0.6901. Test Acc:0.5564
Epoch: 110. Train Loss: 1.085. Test Loss: 1.619. Train Acc: 0.7242. Test Acc:0.5868
Epoch: 120. Train Loss: 0.9831. Test Loss: 1.527. Train Acc: 0.7567. Test Acc:0.6051
Epoch: 130. Train Loss: 0.8966. Test Loss: 1.46. Train Acc: 0.7798. Test Acc:0.6292
Epoch: 140. Train Loss: 0.8279. Test Loss: 1.392. Train Acc: 0.7983. Test Acc:0.6488
Epoch: 150. Train Loss: 0.7792. Test Loss: 1.362. Train Acc: 0.8076. Test Acc:0.6568
Epoch: 160. Train Loss: 0.734. Test Loss: 1.325. Train Acc: 0.8234. Test Acc:0.668
Epoch: 170. Train Loss: 0.6897. Test Loss: 1.29. Train Acc: 0.8352. Test Acc:0.6783
Epoch: 180. Train Loss: 0.6605. Test Loss: 1.287. Train Acc: 0.846. Test Acc:0.6774
Epoch: 190. Train Loss: 0.6388. Test Loss: 1.273. Train Acc: 0.8461. Test Acc

Epoch: 10. Train Loss: 2.667. Test Loss: 2.831. Train Acc: 0.0771. Test Acc:0.05846
Epoch: 20. Train Loss: 2.286. Test Loss: 2.481. Train Acc: 0.1775. Test Acc:0.1272
Epoch: 30. Train Loss: 2.04. Test Loss: 2.268. Train Acc: 0.2727. Test Acc:0.2227
Epoch: 40. Train Loss: 1.779. Test Loss: 2.034. Train Acc: 0.3767. Test Acc:0.3025
Epoch: 50. Train Loss: 1.576. Test Loss: 1.867. Train Acc: 0.4663. Test Acc:0.3931
Epoch: 60. Train Loss: 1.411. Test Loss: 1.717. Train Acc: 0.5402. Test Acc:0.4574
Epoch: 70. Train Loss: 1.286. Test Loss: 1.616. Train Acc: 0.5876. Test Acc:0.4931
Epoch: 80. Train Loss: 1.193. Test Loss: 1.543. Train Acc: 0.6299. Test Acc:0.519
Epoch: 90. Train Loss: 1.117. Test Loss: 1.493. Train Acc: 0.6702. Test Acc:0.552
Epoch: 100. Train Loss: 1.055. Test Loss: 1.455. Train Acc: 0.6997. Test Acc:0.5672
Epoch: 110. Train Loss: 1.002. Test Loss: 1.414. Train Acc: 0.7203. Test Acc:0.5868
Epoch: 120. Train Loss: 0.9593. Test Loss: 1.377. Train Acc: 0.7446. Test Acc:0.5881
Ep

Epoch: 980. Train Loss: 0.1459. Test Loss: 1.042. Train Acc: 0.9744. Test Acc:0.7555
Epoch: 990. Train Loss: 0.1444. Test Loss: 1.076. Train Acc: 0.9741. Test Acc:0.7613
Epoch: 1000. Train Loss: 0.1433. Test Loss: 1.039. Train Acc: 0.9742. Test Acc:0.7586
ASRCNN__conv_pool_avg__conv_dp_0.2__fc_dp_0.2__fn_wav2vec_features-c__obj_voice_cmd using wav2vec_features-c on fold#1
-----------------------------------------------------------------------
      Layer (type)        Output Shape         Param #     Tr. Param #
          Conv1d-1        [10, 8, 200]           4,104           4,104
          Conv1d-2        [10, 8, 198]             200             200
         Dropout-3        [10, 8, 198]               0               0
       AvgPool1d-4         [10, 8, 99]               0               0
          Conv1d-5        [10, 16, 97]             400             400
         Dropout-6        [10, 16, 97]               0               0
       AvgPool1d-7        [10, 16, 48]               0  

Epoch: 760. Train Loss: 0.08917. Test Loss: 1.084. Train Acc: 0.9764. Test Acc:0.7848
Epoch: 770. Train Loss: 0.08741. Test Loss: 1.078. Train Acc: 0.9768. Test Acc:0.787
Epoch: 780. Train Loss: 0.08527. Test Loss: 1.1. Train Acc: 0.9785. Test Acc:0.7848
Epoch: 790. Train Loss: 0.07849. Test Loss: 1.085. Train Acc: 0.98. Test Acc:0.7896
Epoch: 800. Train Loss: 0.08269. Test Loss: 1.093. Train Acc: 0.9803. Test Acc:0.7891
Epoch: 810. Train Loss: 0.07385. Test Loss: 1.116. Train Acc: 0.9806. Test Acc:0.7904
Epoch: 820. Train Loss: 0.07694. Test Loss: 1.101. Train Acc: 0.9795. Test Acc:0.7891
Epoch: 830. Train Loss: 0.0724. Test Loss: 1.107. Train Acc: 0.9801. Test Acc:0.7947
Epoch: 840. Train Loss: 0.06344. Test Loss: 1.092. Train Acc: 0.9848. Test Acc:0.796
Epoch: 850. Train Loss: 0.06644. Test Loss: 1.08. Train Acc: 0.983. Test Acc:0.7891
Epoch: 860. Train Loss: 0.07226. Test Loss: 1.111. Train Acc: 0.9827. Test Acc:0.79
Epoch: 870. Train Loss: 0.06993. Test Loss: 1.133. Train Acc: 0.9

Epoch: 530. Train Loss: 0.2279. Test Loss: 0.9573. Train Acc: 0.9561. Test Acc:0.7594
Epoch: 540. Train Loss: 0.2199. Test Loss: 0.9517. Train Acc: 0.9579. Test Acc:0.7585
Epoch: 550. Train Loss: 0.2175. Test Loss: 0.9598. Train Acc: 0.9582. Test Acc:0.7585
Epoch: 560. Train Loss: 0.21. Test Loss: 0.9339. Train Acc: 0.9611. Test Acc:0.762
Epoch: 570. Train Loss: 0.2085. Test Loss: 0.9537. Train Acc: 0.9608. Test Acc:0.7628
Epoch: 580. Train Loss: 0.201. Test Loss: 0.9417. Train Acc: 0.9626. Test Acc:0.7546
Epoch: 590. Train Loss: 0.1961. Test Loss: 0.9386. Train Acc: 0.9628. Test Acc:0.7577
Epoch: 600. Train Loss: 0.1977. Test Loss: 0.9741. Train Acc: 0.962. Test Acc:0.7516
Epoch: 610. Train Loss: 0.1851. Test Loss: 0.9519. Train Acc: 0.9631. Test Acc:0.7589
Epoch: 620. Train Loss: 0.1805. Test Loss: 0.9408. Train Acc: 0.966. Test Acc:0.7646
Epoch: 630. Train Loss: 0.1796. Test Loss: 0.9242. Train Acc: 0.967. Test Acc:0.7624
Epoch: 640. Train Loss: 0.1755. Test Loss: 0.9554. Train Acc:

Epoch: 310. Train Loss: 0.4251. Test Loss: 1.102. Train Acc: 0.901. Test Acc:0.7352
Epoch: 320. Train Loss: 0.4131. Test Loss: 1.087. Train Acc: 0.9025. Test Acc:0.7348
Epoch: 330. Train Loss: 0.3958. Test Loss: 1.088. Train Acc: 0.9088. Test Acc:0.7434
Epoch: 340. Train Loss: 0.3856. Test Loss: 1.067. Train Acc: 0.9069. Test Acc:0.749
Epoch: 350. Train Loss: 0.3725. Test Loss: 1.056. Train Acc: 0.9136. Test Acc:0.7508
Epoch: 360. Train Loss: 0.3627. Test Loss: 1.057. Train Acc: 0.914. Test Acc:0.7477
Epoch: 370. Train Loss: 0.3534. Test Loss: 1.042. Train Acc: 0.919. Test Acc:0.7482
Epoch: 380. Train Loss: 0.3378. Test Loss: 1.04. Train Acc: 0.9221. Test Acc:0.7508
Epoch: 390. Train Loss: 0.332. Test Loss: 1.022. Train Acc: 0.9231. Test Acc:0.7533
Epoch: 400. Train Loss: 0.3143. Test Loss: 1.019. Train Acc: 0.9261. Test Acc:0.7637
Epoch: 410. Train Loss: 0.3083. Test Loss: 1.009. Train Acc: 0.9285. Test Acc:0.7624
Epoch: 420. Train Loss: 0.2983. Test Loss: 1.009. Train Acc: 0.9316. Te

Epoch: 70. Train Loss: 1.498. Test Loss: 1.912. Train Acc: 0.5093. Test Acc:0.3532
Epoch: 80. Train Loss: 1.397. Test Loss: 1.822. Train Acc: 0.5475. Test Acc:0.389
Epoch: 90. Train Loss: 1.311. Test Loss: 1.732. Train Acc: 0.5904. Test Acc:0.4252
Epoch: 100. Train Loss: 1.235. Test Loss: 1.652. Train Acc: 0.6199. Test Acc:0.4554
Epoch: 110. Train Loss: 1.156. Test Loss: 1.572. Train Acc: 0.6667. Test Acc:0.4937
Epoch: 120. Train Loss: 1.094. Test Loss: 1.498. Train Acc: 0.6926. Test Acc:0.5231
Epoch: 130. Train Loss: 1.025. Test Loss: 1.429. Train Acc: 0.7303. Test Acc:0.5558
Epoch: 140. Train Loss: 0.9734. Test Loss: 1.382. Train Acc: 0.7519. Test Acc:0.5774
Epoch: 150. Train Loss: 0.9218. Test Loss: 1.325. Train Acc: 0.7742. Test Acc:0.608
Epoch: 160. Train Loss: 0.8826. Test Loss: 1.291. Train Acc: 0.7918. Test Acc:0.6218
Epoch: 170. Train Loss: 0.8393. Test Loss: 1.246. Train Acc: 0.8094. Test Acc:0.6356
Epoch: 180. Train Loss: 0.8052. Test Loss: 1.216. Train Acc: 0.8241. Test Acc

Epoch: 10. Train Loss: 3.961. Test Loss: 4.089. Train Acc: 0.06229. Test Acc:0.0401
Epoch: 20. Train Loss: 3.567. Test Loss: 3.82. Train Acc: 0.1076. Test Acc:0.07676
Epoch: 30. Train Loss: 2.972. Test Loss: 3.259. Train Acc: 0.2138. Test Acc:0.1518
Epoch: 40. Train Loss: 2.51. Test Loss: 2.895. Train Acc: 0.338. Test Acc:0.2363
Epoch: 50. Train Loss: 2.12. Test Loss: 2.576. Train Acc: 0.4333. Test Acc:0.3226
Epoch: 60. Train Loss: 1.857. Test Loss: 2.354. Train Acc: 0.4958. Test Acc:0.3881
Epoch: 70. Train Loss: 1.637. Test Loss: 2.163. Train Acc: 0.552. Test Acc:0.4304
Epoch: 80. Train Loss: 1.459. Test Loss: 2.011. Train Acc: 0.6086. Test Acc:0.4713
Epoch: 90. Train Loss: 1.313. Test Loss: 1.898. Train Acc: 0.6434. Test Acc:0.5024
Epoch: 100. Train Loss: 1.182. Test Loss: 1.787. Train Acc: 0.6864. Test Acc:0.5343
Epoch: 110. Train Loss: 1.053. Test Loss: 1.689. Train Acc: 0.7261. Test Acc:0.5619
Epoch: 120. Train Loss: 0.9546. Test Loss: 1.611. Train Acc: 0.753. Test Acc:0.5847
Epoc

Epoch: 980. Train Loss: 0.0842. Test Loss: 1.38. Train Acc: 0.9798. Test Acc:0.7417
Epoch: 990. Train Loss: 0.07855. Test Loss: 1.367. Train Acc: 0.9806. Test Acc:0.746
Epoch: 1000. Train Loss: 0.07739. Test Loss: 1.383. Train Acc: 0.9813. Test Acc:0.7456
ASRCNN__conv_pool_avg__conv_dp_0.2__fc_dp_0.2__fn_retrained-wav2vec_features-c__obj_voice_cmd__and__voice_cmd_lng using retrained-wav2vec_features-c on fold#1
-----------------------------------------------------------------------
      Layer (type)        Output Shape         Param #     Tr. Param #
          Conv1d-1        [10, 8, 200]           4,104           4,104
          Conv1d-2        [10, 8, 198]             200             200
         Dropout-3        [10, 8, 198]               0               0
       AvgPool1d-4         [10, 8, 99]               0               0
          Conv1d-5        [10, 16, 97]             400             400
         Dropout-6        [10, 16, 97]               0               0
       AvgPool1d

Epoch: 750. Train Loss: 0.2748. Test Loss: 1.19. Train Acc: 0.9549. Test Acc:0.6973
Epoch: 760. Train Loss: 0.2754. Test Loss: 1.193. Train Acc: 0.9529. Test Acc:0.6938
Epoch: 770. Train Loss: 0.2707. Test Loss: 1.186. Train Acc: 0.9549. Test Acc:0.6951
Epoch: 780. Train Loss: 0.2635. Test Loss: 1.191. Train Acc: 0.9577. Test Acc:0.6921
Epoch: 790. Train Loss: 0.2622. Test Loss: 1.207. Train Acc: 0.9579. Test Acc:0.6912
Epoch: 800. Train Loss: 0.2521. Test Loss: 1.18. Train Acc: 0.9601. Test Acc:0.6986
Epoch: 810. Train Loss: 0.2514. Test Loss: 1.194. Train Acc: 0.9596. Test Acc:0.7003
Epoch: 820. Train Loss: 0.2474. Test Loss: 1.197. Train Acc: 0.9594. Test Acc:0.6969
Epoch: 830. Train Loss: 0.2519. Test Loss: 1.224. Train Acc: 0.9599. Test Acc:0.6956
Epoch: 840. Train Loss: 0.2431. Test Loss: 1.211. Train Acc: 0.9611. Test Acc:0.6964
Epoch: 850. Train Loss: 0.2398. Test Loss: 1.201. Train Acc: 0.9598. Test Acc:0.6986
Epoch: 860. Train Loss: 0.2313. Test Loss: 1.192. Train Acc: 0.9633

Epoch: 530. Train Loss: 0.2403. Test Loss: 1.173. Train Acc: 0.9449. Test Acc:0.7335
Epoch: 540. Train Loss: 0.2199. Test Loss: 1.143. Train Acc: 0.953. Test Acc:0.7426
Epoch: 550. Train Loss: 0.2154. Test Loss: 1.157. Train Acc: 0.952. Test Acc:0.7391
Epoch: 560. Train Loss: 0.2078. Test Loss: 1.15. Train Acc: 0.9547. Test Acc:0.7434
Epoch: 570. Train Loss: 0.2068. Test Loss: 1.143. Train Acc: 0.9547. Test Acc:0.7443
Epoch: 580. Train Loss: 0.2046. Test Loss: 1.156. Train Acc: 0.9571. Test Acc:0.7391
Epoch: 590. Train Loss: 0.1966. Test Loss: 1.146. Train Acc: 0.9576. Test Acc:0.7451
Epoch: 600. Train Loss: 0.1908. Test Loss: 1.149. Train Acc: 0.9584. Test Acc:0.746
Epoch: 610. Train Loss: 0.1919. Test Loss: 1.156. Train Acc: 0.9581. Test Acc:0.7439
Epoch: 620. Train Loss: 0.1871. Test Loss: 1.155. Train Acc: 0.9589. Test Acc:0.7486
Epoch: 630. Train Loss: 0.1859. Test Loss: 1.143. Train Acc: 0.9599. Test Acc:0.7473
Epoch: 640. Train Loss: 0.1852. Test Loss: 1.156. Train Acc: 0.9577. 

Epoch: 300. Train Loss: 0.5501. Test Loss: 1.286. Train Acc: 0.8756. Test Acc:0.6468
Epoch: 310. Train Loss: 0.5366. Test Loss: 1.287. Train Acc: 0.8786. Test Acc:0.6546
Epoch: 320. Train Loss: 0.5074. Test Loss: 1.262. Train Acc: 0.8941. Test Acc:0.6641
Epoch: 330. Train Loss: 0.4959. Test Loss: 1.252. Train Acc: 0.897. Test Acc:0.6667
Epoch: 340. Train Loss: 0.4854. Test Loss: 1.25. Train Acc: 0.9002. Test Acc:0.6774
Epoch: 350. Train Loss: 0.4714. Test Loss: 1.249. Train Acc: 0.9061. Test Acc:0.6762
Epoch: 360. Train Loss: 0.468. Test Loss: 1.255. Train Acc: 0.9035. Test Acc:0.6697
Epoch: 370. Train Loss: 0.4547. Test Loss: 1.236. Train Acc: 0.9109. Test Acc:0.6774
Epoch: 380. Train Loss: 0.4458. Test Loss: 1.248. Train Acc: 0.9118. Test Acc:0.6701
Epoch: 390. Train Loss: 0.4365. Test Loss: 1.263. Train Acc: 0.9099. Test Acc:0.674
Epoch: 400. Train Loss: 0.4282. Test Loss: 1.246. Train Acc: 0.9189. Test Acc:0.68
Epoch: 410. Train Loss: 0.423. Test Loss: 1.254. Train Acc: 0.9185. Tes