<a href="https://colab.research.google.com/github/tabaraei/depression-detection/blob/master/baseline_replication.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


# `NetVLAD`

In [4]:
import math
import tensorflow as tf
# import tensorflow.contrib.slim as slim
import numpy as np
from keras import initializers, layers
import keras.backend as K
import sys

In [5]:
class NetVLAD(layers.Layer):
    """Creates a NetVLAD class.
    """
    def __init__(self, feature_size, max_samples, cluster_size, output_dim, **kwargs):

        self.feature_size = feature_size
        self.max_samples = max_samples
        self.output_dim = output_dim
        self.cluster_size = cluster_size
        super(NetVLAD, self).__init__(**kwargs)

    def build(self, input_shape):
    # Create a trainable weight variable for this layer.
        self.cluster_weights = self.add_weight(name='kernel_W1',
                                      shape=(self.feature_size, self.cluster_size),
                                      initializer=tf.random_normal_initializer(stddev=1 / math.sqrt(self.feature_size)),
                                      trainable=True)
        self.cluster_biases = self.add_weight(name='kernel_B1',
                                      shape=(self.cluster_size,),
                                      initializer=tf.random_normal_initializer(stddev=1 / math.sqrt(self.feature_size)),
                                      trainable=True)
        self.cluster_weights2 = self.add_weight(name='kernel_W2',
                                      shape=(1,self.feature_size, self.cluster_size),
                                      initializer=tf.random_normal_initializer(stddev=1 / math.sqrt(self.feature_size)),
                                      trainable=True)
        self.hidden1_weights = self.add_weight(name='kernel_H1',
                                      shape=(self.cluster_size*self.feature_size, self.output_dim),
                                      initializer=tf.random_normal_initializer(stddev=1 / math.sqrt(self.cluster_size)),
                                      trainable=True)

        super(NetVLAD, self).build(input_shape)  # Be sure to call this at the end

    def call(self, reshaped_input):
        """Forward pass of a NetVLAD block.

        Args:
        reshaped_input: If your input is in that form:
        'batch_size' x 'max_samples' x 'feature_size'
        It should be reshaped in the following form:
        'batch_size*max_samples' x 'feature_size'
        by performing:
        reshaped_input = tf.reshape(input, [-1, features_size])

        Returns:
        vlad: the pooled vector of size: 'batch_size' x 'output_dim'
        """
        """
        In Keras, there are two way to do matrix multiplication (dot product)
        1) K.dot : AxB -> when A has batchsize and B doesn't, use K.dot
        2) tf.matmul: AxB -> when A and B both have batchsize, use tf.matmul

        Error example: Use tf.matmul when A has batchsize (3 dim) and B doesn't (2 dim)
        ValueError: Shape must be rank 2 but is rank 3 for 'net_vlad_1/MatMul' (op: 'MatMul') with input shapes: [?,21,64], [64,3]

        tf.matmul might still work when the dim of A is (?,64), but this is too confusing.
        Just follow the above rules.
        """
        activation = K.dot(reshaped_input, self.cluster_weights)

        activation += self.cluster_biases

        activation = tf.nn.softmax(activation)

        activation = tf.reshape(activation,
                [-1, self.max_samples, self.cluster_size])

        a_sum = tf.reduce_sum(activation,-2,keep_dims=True)

        a = tf.multiply(a_sum,self.cluster_weights2)

        activation = tf.transpose(activation,perm=[0,2,1])

        reshaped_input = tf.reshape(reshaped_input,[-1,
            self.max_samples, self.feature_size])

        vlad = tf.matmul(activation,reshaped_input)
        vlad = tf.transpose(vlad,perm=[0,2,1])
        vlad = tf.subtract(vlad,a)
        vlad = tf.nn.l2_normalize(vlad,1)
        vlad = tf.reshape(vlad,[-1, self.cluster_size*self.feature_size])
        vlad = tf.nn.l2_normalize(vlad,1)
        vlad = K.dot(vlad, self.hidden1_weights)

        return vlad

    def compute_output_shape(self, input_shape):
        return tuple([None, self.output_dim])

# `audio_features_whole.py`

In [22]:
import os
import numpy as np
import pandas as pd
import wave
import librosa
# from python_speech_features import *
import sys
import pickle
import tensorflow.compat.v1 as tf
# import vggish.vggish_input as vggish_input
# import vggish.vggish_params as vggish_params
# import vggish.vggish_postprocess as vggish_postprocess
# import vggish.vggish_slim as vggish_slim
# import loupe_keras as lpk
# from allennlp.commands.elmo import ElmoEmbedder
from tqdm.notebook import trange, tqdm

In [18]:
# sys.path.append('/Users/linlin/Desktop/depression/classfication')

tf.enable_eager_execution()

# elmo = ElmoEmbedder()

# os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

# prefix = os.path.abspath(os.path.join(os.getcwd(), "."))

# # Paths to downloaded VGGish files.
# checkpoint_path =os.path.join(os.getcwd(),  'vggish/vggish_model.ckpt')
# pca_params_path = os.path.join(os.getcwd(), 'vggish/vggish_pca_params.npz')

DATASET_DIR = '/content/drive/MyDrive/Data/DepressionDetection/EATD-Corpus'
BASELINE_DIR = '/content/drive/MyDrive/Data/DepressionDetection/Baseline'
cluster_size = 16
min_len = 100
max_len = -1

In [None]:
# def to_vggish_embedds(x, sr):
#     # x为输入的音频，sr为sample_rate
#     input_batch = vggish_input.waveform_to_examples(x, sr)
#     with tf.Graph().as_default(), tf.Session() as sess:
#       vggish_slim.define_vggish_slim()
#       vggish_slim.load_vggish_slim_checkpoint(sess, checkpoint_path)

#       features_tensor = sess.graph.get_tensor_by_name(vggish_params.INPUT_TENSOR_NAME)
#       embedding_tensor = sess.graph.get_tensor_by_name(vggish_params.OUTPUT_TENSOR_NAME)
#       [embedding_batch] = sess.run([embedding_tensor],
#                                    feed_dict={features_tensor: input_batch})

#     # Postprocess the results to produce whitened quantized embeddings.
#     pproc = vggish_postprocess.Postprocessor(pca_params_path)
#     postprocessed_batch = pproc.postprocess(embedding_batch)

#     return tf.cast(postprocessed_batch, dtype='float32')

In [24]:
def wav2vlad(wave_data, sr):
    global cluster_size
    signal = wave_data
    melspec = librosa.feature.melspectrogram(y=signal, n_mels=80,sr=sr).astype(np.float32).T
    melspec = np.log(np.maximum(1e-6, melspec))
    feature_size = melspec.shape[1]
    max_samples = melspec.shape[0]
    output_dim = cluster_size * 16
    feat = NetVLAD(feature_size=feature_size, max_samples=max_samples, \
                            cluster_size=cluster_size, output_dim=output_dim) \
                                (tf.convert_to_tensor(melspec))
    with tf.Session() as sess:
        init = tf.global_variables_initializer()
        sess.run(init)
        r = feat.numpy()
    return r

In [45]:
def extract_features(audio_features, targets, folder):
    global max_len, min_len
    if not os.path.exists(f'{DATASET_DIR}/{folder}/positive_out.wav'):
        return
    positive_file = wave.open(f'{DATASET_DIR}/{folder}/positive_out.wav')
    sr1 = positive_file.getframerate()
    nframes1 = positive_file.getnframes()
    wave_data1 = np.frombuffer(positive_file.readframes(nframes1), dtype=np.short).astype(float)
    len1 = nframes1 / sr1

    neutral_file = wave.open(f'{DATASET_DIR}/{folder}/neutral_out.wav')
    sr2 = neutral_file.getframerate()
    nframes2 = neutral_file.getnframes()
    wave_data2 = np.frombuffer(neutral_file.readframes(nframes2), dtype=np.short).astype(float)
    len2 = nframes2 / sr2

    negative_file = wave.open(f'{DATASET_DIR}/{folder}/negative_out.wav')
    sr3 = negative_file.getframerate()
    nframes3 = negative_file.getnframes()
    wave_data3 = np.frombuffer(negative_file.readframes(nframes3), dtype=np.short).astype(float)
    len3 = nframes3/sr3

    for l in [len1, len2, len3]:
        if l > max_len:
            max_len = l
        if l < min_len:
            min_len = l

    with open(f'{DATASET_DIR}/{folder}/new_label.txt') as fli:
        target = float(fli.readline())

    if wave_data1.shape[0] < 1:
        wave_data1 = np.array([1e-4]*sr1*5)
    if wave_data2.shape[0] < 1:
        wave_data2 = np.array([1e-4]*sr2*5)
    if wave_data3.shape[0] < 1:
        wave_data3 = np.array([1e-4]*sr3*5)
    audio_features.append([wav2vlad(wave_data1, sr1), wav2vlad(wave_data2, sr2), \
        wav2vlad(wave_data3, sr3)])
    targets.append(1 if target >= 53 else 0)
    # targets.append(target)

In [46]:
audio_features = []
audio_targets = []

for index in trange(114):
    extract_features(audio_features, audio_targets, f't_{index+1}')

for index in trange(114):
    extract_features(audio_features, audio_targets, f'v_{index+1}')

  0%|          | 0/114 [00:00<?, ?it/s]

  0%|          | 0/114 [00:00<?, ?it/s]

In [47]:
print("Saving npz file locally...")
np.savez(f'{BASELINE_DIR}/Features/AudioWhole/whole_samples_clf_{cluster_size*16}.npz', audio_features)
np.savez(f'{BASELINE_DIR}/Features/AudioWhole/whole_labels_clf_{cluster_size*16}.npz', audio_targets)

print(max_len, min_len)

Saving npz file locally...
111.02 0.0


In [40]:
len(audio_features), np.array(audio_features[0]).shape

(162, (3, 1, 256))

# `audio_gru_whole`

In [9]:
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.nn import functional as F
import torch.optim as optim
from sklearn.metrics import confusion_matrix
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold

import numpy as np
import pandas as pd
import os
import pickle
import random
import itertools
from tqdm.notebook import trange, tqdm

In [4]:
BASELINE_DIR = '/content/drive/MyDrive/Data/DepressionDetection/Baseline'
audio_features = np.squeeze(np.load(f'{BASELINE_DIR}/Features/AudioWhole/whole_samples_clf_256.npz')['arr_0'], axis=2)
audio_targets = np.load(f'{BASELINE_DIR}/Features/AudioWhole/whole_labels_clf_256.npz')['arr_0']
audio_dep_idxs_tmp = np.where(audio_targets == 1)[0]
audio_non_idxs = np.where(audio_targets == 0)[0]
audio_features.shape, audio_targets.shape

((162, 3, 256), (162,))

In [5]:
class AudioBiLSTM(nn.Module):
    def __init__(self, config):
        super(AudioBiLSTM, self).__init__()
        self.num_classes = config['num_classes']
        self.learning_rate = config['learning_rate']
        self.dropout = config['dropout']
        self.hidden_dims = config['hidden_dims']
        self.rnn_layers = config['rnn_layers']
        self.embedding_size = config['embedding_size']
        self.bidirectional = config['bidirectional']

        self.build_model()
        # self.init_weight()

    def init_weight(net):
        for name, param in net.named_parameters():
            if not 'ln' in name:
                if 'bias' in name:
                    nn.init.constant_(param, 0.0)
                elif 'weight' in name:
                    nn.init.xavier_uniform_(param)

    def build_model(self):
        # attention layer
        self.attention_layer = nn.Sequential(
            nn.Linear(self.hidden_dims, self.hidden_dims),
            nn.ReLU(inplace=True))
        # self.attention_weights = self.attention_weights.view(self.hidden_dims, 1)

        # self.lstm_net_audio = nn.LSTM(self.embedding_size,
        #                         self.hidden_dims,
        #                         num_layers=self.rnn_layers,
        #                         dropout=self.dropout,
        #                         bidirectional=self.bidirectional,
        #                         batch_first=True)
        self.lstm_net_audio = nn.GRU(self.embedding_size, self.hidden_dims,
                                num_layers=self.rnn_layers, dropout=self.dropout, batch_first=True)

        self.ln = nn.LayerNorm(self.embedding_size)

        # FC层
        self.fc_audio = nn.Sequential(
            nn.Dropout(self.dropout),
            nn.Linear(self.hidden_dims, self.hidden_dims),
            nn.ReLU(),
            nn.Dropout(self.dropout),
            nn.Linear(self.hidden_dims, self.num_classes),
            # nn.ReLU(),
            nn.Softmax(dim=1)
        )

    def attention_net_with_w(self, lstm_out, lstm_hidden):
        '''
        :param lstm_out:    [batch_size, len_seq, n_hidden * 2]
        :param lstm_hidden: [batch_size, num_layers * num_directions, n_hidden]
        :return: [batch_size, n_hidden]
        '''
        lstm_tmp_out = torch.chunk(lstm_out, 2, -1)
        # h [batch_size, time_step, hidden_dims]
        h = lstm_tmp_out[0] + lstm_tmp_out[1]
        #         h = lstm_out
        # [batch_size, num_layers * num_directions, n_hidden]
        lstm_hidden = torch.sum(lstm_hidden, dim=1)
        # [batch_size, 1, n_hidden]
        lstm_hidden = lstm_hidden.unsqueeze(1)
        # atten_w [batch_size, 1, hidden_dims]
        atten_w = self.attention_layer(lstm_hidden)
        # m [batch_size, time_step, hidden_dims]
        m = nn.Tanh()(h)
        # atten_context [batch_size, 1, time_step]
       # print(atten_w.shape, m.transpose(1, 2).shape)
        atten_context = torch.bmm(atten_w, m.transpose(1, 2))
        # softmax_w [batch_size, 1, time_step]
        softmax_w = F.softmax(atten_context, dim=-1)
        # context [batch_size, 1, hidden_dims]
        context = torch.bmm(softmax_w, h)
        result = context.squeeze(1)
        return result

    def forward(self, x):
        x = self.ln(x)
        x, _ = self.lstm_net_audio(x)
        x = x.mean(dim=1)
        out = self.fc_audio(x)
        return out

In [6]:
config = {
    'num_classes': 2,
    'dropout': 0.5,
    'rnn_layers': 2,
    'embedding_size': 256,
    'batch_size': 8,
    'epochs': 170,
    'learning_rate': 6e-6,
    'hidden_dims': 256,
    'bidirectional': False,
    'cuda': False
}

In [7]:
def save(model, filename):
    save_filename = '{}.pt'.format(filename)
    torch.save(model, save_filename)
    print('Saved as %s' % save_filename)

def standard_confusion_matrix(y_test, y_test_pred):
    """
    Make confusion matrix with format:
                  -----------
                  | TP | FP |
                  -----------
                  | FN | TN |
                  -----------
    Parameters
    ----------
    y_true : ndarray - 1D
    y_pred : ndarray - 1D

    Returns
    -------
    ndarray - 2D
    """
    [[tn, fp], [fn, tp]] = confusion_matrix(y_test.cpu().numpy(), y_test_pred)
    return np.array([[tp, fp], [fn, tn]])

def model_performance(y_test, y_test_pred_proba):
    """
    Evaluation metrics for network performance.
    """
    y_test_pred = y_test_pred_proba.data.max(1, keepdim=True)[1]

    # Computing confusion matrix for test dataset
    conf_matrix = standard_confusion_matrix(y_test, y_test_pred.numpy())
    print("Confusion Matrix:")
    print(conf_matrix)

    return y_test_pred, conf_matrix

def train(epoch, train_idxs):
    global lr, train_acc
    model.train()
    batch_idx = 1
    total_loss = 0
    correct = 0
    pred = np.array([])
    X_train = audio_features[train_idxs]
    Y_train = audio_targets[train_idxs]
    for i in range(0, X_train.shape[0], config['batch_size']):
        if i + config['batch_size'] > X_train.shape[0]:
            x, y = X_train[i:], Y_train[i:]
        else:
            x, y = X_train[i:(i + config['batch_size'])], Y_train[i:(
                i + config['batch_size'])]
        if config['cuda']:
            x, y = Variable(torch.from_numpy(x).type(torch.FloatTensor), requires_grad=True).cuda(), Variable(torch.from_numpy(y)).cuda()
        else:
            x, y = Variable(torch.from_numpy(x).type(torch.FloatTensor), requires_grad=True), \
                Variable(torch.from_numpy(y))

        # 将模型的参数梯度设置为0
        optimizer.zero_grad()
        output = model(x)
        pred = output.data.max(1, keepdim=True)[1]
        #print(pred.shape, y.shape)
        correct += pred.eq(y.data.view_as(pred)).cpu().sum()
        loss = criterion(output, y)
        # 后向传播调整参数
        loss.backward()
        # 根据梯度更新网络参数
        optimizer.step()
        batch_idx += 1
        # loss.item()能够得到张量中的元素值
        total_loss += loss.item()

    train_acc = correct
    print(
        'Train Epoch: {:2d}\t Learning rate: {:.4f}\tLoss: {:.6f}\t Accuracy: {}/{} ({:.0f}%)\n '
        .format(epoch + 1, config['learning_rate'], total_loss, correct,
                X_train.shape[0], 100. * correct / X_train.shape[0]))


def evaluate(model, test_idxs, fold, train_idxs_tmp, train_idxs):
    model.eval()
    batch_idx = 1
    total_loss = 0
    global max_f1, max_acc, min_mae, X_test_lens, max_prec, max_rec
    pred = np.array([])
    with torch.no_grad():
        if config['cuda']:
            x, y = Variable(torch.from_numpy(audio_features[test_idxs]).type(torch.FloatTensor), requires_grad=True).cuda(),\
                Variable(torch.from_numpy(audio_targets[test_idxs])).cuda()
        else:
            x, y = Variable(torch.from_numpy(audio_features[test_idxs]).type(torch.FloatTensor), requires_grad=True), \
                Variable(torch.from_numpy(audio_targets[test_idxs])).type(torch.LongTensor)

        optimizer.zero_grad()
        output = model(x)
        loss = criterion(output, y)
        total_loss += loss.item()
        y_test_pred, conf_matrix = model_performance(y, output.cpu())
        accuracy = float(conf_matrix[0][0] + conf_matrix[1][1]) / np.sum(conf_matrix)
        precision = float(conf_matrix[0][0]) / (conf_matrix[0][0] + conf_matrix[0][1])
        recall = float(conf_matrix[0][0]) / (conf_matrix[0][0] + conf_matrix[1][0])
        f1_score = 2 * (precision * recall) / (precision + recall)
        print("Accuracy: {}".format(accuracy))
        print("Precision: {}".format(precision))
        print("Recall: {}".format(recall))
        print("F1-Score: {}\n".format(f1_score))
        print('=' * 89)

        if max_f1 <= f1_score and train_acc > len(train_idxs)*0.90  and f1_score > 0.5:
            max_f1 = f1_score
            max_acc = accuracy
            max_rec = recall
            max_prec = precision
            mode ='gru'
            save(model, f"{BASELINE_DIR}/Model/ClassificationWhole/Audio/BiLSTM_{mode}_vlad{config['embedding_size']}_{config['hidden_dims']}_{max_f1:.2f}_{fold}")
            np.save(f'{BASELINE_DIR}/Features/TextWhole/train_idxs_{f1_score:.2f}_{fold}.npy', train_idxs_tmp)
            print('*' * 64)
            print('model saved: f1: {}\tacc: {}'.format(max_f1, max_acc))
            print('*' * 64)

    return total_loss

def get_param_group(model):
    nd_list = []
    param_list = []
    for name, param in model.named_parameters():
        if 'ln' in name:
            nd_list.append(param)
        else:
            param_list.append(param)
    return [{'params': param_list, 'weight_decay': 1e-5}, {'params': nd_list, 'weight_decay': 0}]

In [10]:
kf = KFold(n_splits=3, shuffle=True)
fold = 1
for train_idxs_tmp, test_idxs_tmp in kf.split(audio_features):
# train_idxs_tmps = [
#     np.load(f'{BASELINE_DIR}/Features/TextWhole/train_idxs_0.63_1.npy', allow_pickle=True),
#     np.load(f'{BASELINE_DIR}/Features/TextWhole/train_idxs_0.60_2.npy', allow_pickle=True),
#     np.load(f'{BASELINE_DIR}/Features/TextWhole/train_idxs_0.60_3.npy', allow_pickle=True)
# ]
# for idx_idx, train_idxs_tmp in enumerate(train_idxs_tmps):
#     fold = idx_idx + 1
#     # if idx_idx != 1:
#     #     continue
#     test_idxs_tmp = list(set(list(audio_dep_idxs_tmp)+list(audio_non_idxs)) - set(train_idxs_tmp))
    train_idxs, test_idxs = [], []
    resample_idxs = [0,1,2,3,4,5]
    # depression data augmentation
    for idx in train_idxs_tmp:
        if idx in audio_dep_idxs_tmp:
            feat = audio_features[idx]
            count = 0
            for i in itertools.permutations(feat, feat.shape[0]):
                if count in resample_idxs:
                    audio_features = np.vstack((audio_features, np.expand_dims(list(i), 0)))
                    audio_targets = np.hstack((audio_targets, 1))
                    train_idxs.append(len(audio_features)-1)
                count += 1
        else:
            train_idxs.append(idx)

    for idx in test_idxs_tmp:
        if idx in audio_dep_idxs_tmp:
            feat = audio_features[idx]
            count = 0
            # resample_idxs = random.sample(range(6), 4)
            resample_idxs = [0,1,4,5]
            for i in itertools.permutations(feat, feat.shape[0]):
                if count in resample_idxs:
                    audio_features = np.vstack((audio_features, np.expand_dims(list(i), 0)))
                    audio_targets = np.hstack((audio_targets, 1))
                    test_idxs.append(len(audio_features)-1)
                count += 1
        else:
            test_idxs.append(idx)
        # test_idxs.append(idx)

    model = AudioBiLSTM(config)

    if config['cuda']:
        model = model.cuda()

    param_group = get_param_group(model)
    optimizer = optim.AdamW(param_group, lr=config['learning_rate'])
    criterion = nn.CrossEntropyLoss()
    # criterion = FocalLoss(class_num=2)
    max_f1 = -1
    max_acc = -1
    max_rec = -1
    max_prec = -1
    train_acc = -1

    for ep in trange(1, config['epochs']):
        train(ep, train_idxs)
        tloss = evaluate(model, test_idxs, fold, train_idxs_tmp, train_idxs)
    fold += 1

  0%|          | 0/169 [00:00<?, ?it/s]

Train Epoch:  2	 Learning rate: 0.0000	Loss: 28.028666	 Accuracy: 219/326 (67%)
 
Confusion Matrix:
[[76 45]
 [ 1  0]]
Accuracy: 0.6229508196721312
Precision: 0.628099173553719
Recall: 0.987012987012987
F1-Score: 0.7676767676767676

Train Epoch:  3	 Learning rate: 0.0000	Loss: 28.051587	 Accuracy: 221/326 (68%)
 
Confusion Matrix:
[[77 45]
 [ 0  0]]
Accuracy: 0.6311475409836066
Precision: 0.6311475409836066
Recall: 1.0
F1-Score: 0.7738693467336684

Train Epoch:  4	 Learning rate: 0.0000	Loss: 28.022439	 Accuracy: 223/326 (68%)
 
Confusion Matrix:
[[77 45]
 [ 0  0]]
Accuracy: 0.6311475409836066
Precision: 0.6311475409836066
Recall: 1.0
F1-Score: 0.7738693467336684

Train Epoch:  5	 Learning rate: 0.0000	Loss: 27.943698	 Accuracy: 232/326 (71%)
 
Confusion Matrix:
[[77 45]
 [ 0  0]]
Accuracy: 0.6311475409836066
Precision: 0.6311475409836066
Recall: 1.0
F1-Score: 0.7738693467336684

Train Epoch:  6	 Learning rate: 0.0000	Loss: 27.942598	 Accuracy: 230/326 (71%)
 
Confusion Matrix:
[[77 45

  0%|          | 0/169 [00:00<?, ?it/s]

Train Epoch:  2	 Learning rate: 0.0000	Loss: 25.117349	 Accuracy: 117/287 (41%)
 
Confusion Matrix:
[[  0   1]
 [105  39]]
Accuracy: 0.2689655172413793
Precision: 0.0
Recall: 0.0
F1-Score: nan



  f1_score = 2 * (precision * recall) / (precision + recall)


Train Epoch:  3	 Learning rate: 0.0000	Loss: 25.128571	 Accuracy: 109/287 (38%)
 
Confusion Matrix:
[[  0   1]
 [105  39]]
Accuracy: 0.2689655172413793
Precision: 0.0
Recall: 0.0
F1-Score: nan



  f1_score = 2 * (precision * recall) / (precision + recall)


Train Epoch:  4	 Learning rate: 0.0000	Loss: 25.130746	 Accuracy: 115/287 (40%)
 
Confusion Matrix:
[[  1   2]
 [104  38]]
Accuracy: 0.2689655172413793
Precision: 0.3333333333333333
Recall: 0.009523809523809525
F1-Score: 0.018518518518518517

Train Epoch:  5	 Learning rate: 0.0000	Loss: 25.106299	 Accuracy: 114/287 (40%)
 
Confusion Matrix:
[[  2   2]
 [103  38]]
Accuracy: 0.27586206896551724
Precision: 0.5
Recall: 0.01904761904761905
F1-Score: 0.03669724770642202

Train Epoch:  6	 Learning rate: 0.0000	Loss: 25.101917	 Accuracy: 115/287 (40%)
 
Confusion Matrix:
[[ 7  4]
 [98 36]]
Accuracy: 0.296551724137931
Precision: 0.6363636363636364
Recall: 0.06666666666666667
F1-Score: 0.12068965517241378

Train Epoch:  7	 Learning rate: 0.0000	Loss: 25.010643	 Accuracy: 129/287 (45%)
 
Confusion Matrix:
[[ 9  5]
 [96 35]]
Accuracy: 0.30344827586206896
Precision: 0.6428571428571429
Recall: 0.08571428571428572
F1-Score: 0.15126050420168066

Train Epoch:  8	 Learning rate: 0.0000	Loss: 25.050249	 

  0%|          | 0/169 [00:00<?, ?it/s]

Train Epoch:  2	 Learning rate: 0.0000	Loss: 26.715617	 Accuracy: 210/307 (68%)
 
Confusion Matrix:
[[86 47]
 [ 0  0]]
Accuracy: 0.6466165413533834
Precision: 0.6466165413533834
Recall: 1.0
F1-Score: 0.7853881278538813

Train Epoch:  3	 Learning rate: 0.0000	Loss: 26.760220	 Accuracy: 206/307 (67%)
 
Confusion Matrix:
[[86 47]
 [ 0  0]]
Accuracy: 0.6466165413533834
Precision: 0.6466165413533834
Recall: 1.0
F1-Score: 0.7853881278538813

Train Epoch:  4	 Learning rate: 0.0000	Loss: 26.711678	 Accuracy: 212/307 (69%)
 
Confusion Matrix:
[[86 47]
 [ 0  0]]
Accuracy: 0.6466165413533834
Precision: 0.6466165413533834
Recall: 1.0
F1-Score: 0.7853881278538813

Train Epoch:  5	 Learning rate: 0.0000	Loss: 26.622664	 Accuracy: 216/307 (70%)
 
Confusion Matrix:
[[86 47]
 [ 0  0]]
Accuracy: 0.6466165413533834
Precision: 0.6466165413533834
Recall: 1.0
F1-Score: 0.7853881278538813

Train Epoch:  6	 Learning rate: 0.0000	Loss: 26.631750	 Accuracy: 213/307 (69%)
 
Confusion Matrix:
[[86 47]
 [ 0  0]]
A