In [1]:
!pip install transformers
!pip install torch-scatter
!pip install torch-sparse
!pip install torch-geometric

Collecting transformers
  Downloading transformers-4.17.0-py3-none-any.whl (3.8 MB)
[?25l[K     |                                | 10 kB 40.5 MB/s eta 0:00:01[K     |▏                               | 20 kB 44.7 MB/s eta 0:00:01[K     |▎                               | 30 kB 23.4 MB/s eta 0:00:01[K     |▍                               | 40 kB 14.6 MB/s eta 0:00:01[K     |▍                               | 51 kB 12.9 MB/s eta 0:00:01[K     |▌                               | 61 kB 15.1 MB/s eta 0:00:01[K     |▋                               | 71 kB 14.5 MB/s eta 0:00:01[K     |▊                               | 81 kB 14.4 MB/s eta 0:00:01[K     |▊                               | 92 kB 15.8 MB/s eta 0:00:01[K     |▉                               | 102 kB 15.8 MB/s eta 0:00:01[K     |█                               | 112 kB 15.8 MB/s eta 0:00:01[K     |█                               | 122 kB 15.8 MB/s eta 0:00:01[K     |█▏                              | 133 kB 15.8

In [2]:
import numpy as np
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModel, BertTokenizer, BertModel
from google.colab import drive
import tensorflow as tf
import os
import sys
import xml.etree.ElementTree as ET
import glob
from scipy import io

In [3]:
#define device for deep learning
CUDA_LAUNCH_BLOCKING=1

device_name = tf.test.gpu_device_name()
if device_name == '/device:GPU:0':
    device = torch.device("cuda")
    print('GPU:', torch.cuda.get_device_name(0))
else:
    raise SystemError('GPU device not found')

GPU: Tesla T4


In [4]:
# set environment as googledrive to folder "resource"
data_path =  "/Colab Notebooks/"

try:
    drive.mount('/content/drive')
    data_path = "/content/drive/MyDrive/Colab Notebooks/UCCA-CNN/"

except:
    print("You are not working in Colab at the moment :(")

Mounted at /content/drive


In [5]:
# parameters

seed = 42

np.random.seed(seed)
node_embedding = np.random.uniform(low=0, high=1, size=(768,))

In [6]:
# Use last four layers by default
layers = [-4, -3, -2, -1]
tokenizer = AutoTokenizer.from_pretrained("bert-base-multilingual-cased")
model = AutoModel.from_pretrained("bert-base-multilingual-cased", output_hidden_states=True)

Downloading:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/625 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/972k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.87M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/681M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [7]:
def get_word_idx(sent: str, word: str):
     return sent.split(" ").index(word)
 
 
def get_hidden_states(encoded, token_ids_word, model, layers):
     """Push input IDs through model. Stack and sum `layers` (last four by default).
        Select only those subword token outputs that belong to our word of interest
        and average them."""
     with torch.no_grad():
         output = model(**encoded)
 
     # Get all hidden states
     states = output.hidden_states
     # Stack and sum all requested layers
     output = torch.stack([states[i] for i in layers]).sum(0).squeeze()
     # Only select the tokens that constitute the requested word
     word_tokens_output = output[token_ids_word]
 
     return word_tokens_output.mean(dim=0)
 
 
def get_word_vector(sent, idx, tokenizer, model, layers):
     """Get a word vector by first tokenizing the input sentence, getting all token idxs
        that make up the word of interest, and then `get_hidden_states`."""
     encoded = tokenizer.encode_plus(sent, return_tensors="pt")
     # get all token idxs that belong to the word of interest
     token_ids_word = np.where(np.array(encoded.word_ids()) == idx)
 
     return get_hidden_states(encoded, token_ids_word, model, layers)

In [8]:
def XML_processing(file):
  node2tag = {} # dict[node] = 'word/tag'
  parent2children = {} # dict[parent] = [children]
  parent2childrenterminal = {}
  maxnoode = 0
  sent = '' # sentence in a string
  tree = ET.parse(file)
  root = tree.getroot()
  for layer in root.iter('layer'):
    if layer.attrib['layerID'] == '0':
      for node in layer.iter('node'):
        for attribute in node.iter('attributes'):
          node2tag[node.attrib['ID']] = attribute.attrib['text']
          sent += attribute.attrib['text'] + ' '
    else:
      for node in layer.iter('node'):
        e = []
        for edge in node.iter('edge'):
          if edge.attrib['toID'].startswith("1"):
              if int(edge.attrib['toID'][2::]) > maxnoode:
                  maxnoode = int(edge.attrib['toID'][2::])
              e.append(edge.attrib['toID']) 
              parent2children[node.attrib['ID']] = e
          else:
              e.append(edge.attrib['toID']) 
              parent2childrenterminal[node.attrib['ID']] = e              
          if edge.attrib['type'] != 'Terminal':
            node2tag[edge.attrib['toID']] = edge.attrib['type']
  return node2tag, parent2children, parent2childrenterminal, maxnoode, sent[0:-1]

In [9]:
def create_feature_matrix(sent, parent2childrenterminal, maxnoode, node_embedding):

  #feature_matrix = []
  feature_matrix = np.zeros((maxnoode,768))
  words = sent.split()
  #word_embedding = node_embedding
  for index in range(maxnoode):
    key = "1."+str(index+1)
    if key in parent2childrenterminal.keys():
      value = int(parent2childrenterminal[key][0][2::])-1
      idx = get_word_idx(sent, words[value])

      word_embedding = get_word_vector(sent, idx, tokenizer, model, layers)
    else:
      word_embedding = node_embedding
    #feature_matrix.append(word_embedding)
    feature_matrix[index,:] = word_embedding
  return feature_matrix

def adj_list_to_matrix(adj_list, n):
    adj_matrix = np.zeros((n,n))
    np.fill_diagonal(adj_matrix,0)
    for i in adj_list:
        for j in adj_list[i]:
            adj_matrix[int(i[2::])-1,int(j[2::])-1] = 1
    return adj_matrix

In [None]:
file = data_path + "dataset/xml/turkish/1.xml"
node2tag, parent2children, parent2childrenterminal, maxnode, sent = XML_processing(file)

In [None]:
adj_matrix = adj_list_to_matrix(parent2children, maxnode)

feature_matrix = create_feature_matrix(sent, parent2childrenterminal, maxnode, node_embedding)
feature_matrix.shape, adj_matrix.shape

((18, 768), (18, 18))

In [122]:
tr_irony = pd.read_csv(data_path+"dataset/raw/turkishirony.csv")
tr_dict = {}
for item, label in enumerate(tr_irony["label"]):
  tr_dict[int(item)] = int(label)

In [123]:
adj_all = []
feature_all = []
count = []
for file in glob.glob(data_path + "dataset/xml/turkish600/*.xml"):
  node2tag, parent2children, parent2childrenterminal, maxnode, sent = XML_processing(file)
  file_name = file.split("/",-1)
  sent_id = int(file_name[-1].split(".")[0])
  count.append(tr_dict[int(sent_id)-1])
  adj_matrix = adj_list_to_matrix(parent2children, maxnode)
  adj_all.append(adj_matrix)
  feature_matrix = create_feature_matrix(sent, parent2childrenterminal, maxnode, node_embedding)
  feature_all.append(feature_matrix)

In [None]:
len(count)

600

In [None]:
io.savemat(data_path+'turkish_irony.mat', {'feature':feature_all,'adjencency':adj_all, "label":count})

In [None]:
tr_irony = io.loadmat(data_path + 'turkish.mat')
tr_irony600 = io.loadmat(data_path + 'turkish_irony.mat')

In [44]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.autograd as autograd
from torch.utils.data import Dataset, DataLoader
#from torch_geometric.data import DataLoader
import torch.nn.utils as utils
import torch.optim.lr_scheduler as lr_scheduler
from torch.nn import Parameter
from transformers import BertTokenizer, BertModel
from transformers import AutoModel, AutoTokenizer
from sklearn.utils import shuffle
import tensorflow as tf
import os
import sys
import math
import random
import xml.etree.ElementTree as ET
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, matthews_corrcoef, confusion_matrix, classification_report, f1_score, recall_score, precision_score, accuracy_score
from scipy.stats import pearsonr
from sklearn.model_selection import KFold
from nltk.tokenize import WordPunctTokenizer
from torch_geometric.utils import degree, remove_self_loops, add_self_loops
from torch_geometric.nn import GATConv, GATv2Conv
import warnings
warnings.filterwarnings("ignore")

In [60]:
from typing import Union, Tuple, Optional
from torch_geometric.typing import (Adj, Size, OptTensor, PairTensor)

import torch
from torch import Tensor
import torch.nn.functional as F
from torch.nn import Parameter, Linear
from torch_sparse import SparseTensor, set_diag
from torch_geometric.nn.conv import MessagePassing
from torch_geometric.utils import remove_self_loops, add_self_loops, softmax

from torch_geometric.nn.inits import glorot, zeros

In [119]:
# Model hyperparameters

RANDOM_SEED = 42
batch_size = 1
n_out = 2
epoch_size = 50
learning_rate = 0.0005
init_weight_decay = 0.2
init_clip_max_norm = 0.1
nhid=800
dropout = 0.1

In [15]:
# Model dataset

class IronyDataset(Dataset):
  def __init__(self, split, feature, adj, label):
    self.feature_array = np.array(feature)
    self.adj_array = np.array(adj)
    self.label_array = label

    #print("len ", len(self.feature_array), " ", len(self.label_array))

  def __len__(self):
    return len(self.feature_array)

  def __getitem__(self, idx):
    selected_label = int(self.label_array[idx])
    selected_feature = self.feature_array[idx]
    selected_adj = self.adj_array[idx]

    return selected_feature, selected_adj, selected_label


In [16]:
def collate_fn(data):
  data.sort(key=lambda x: (x[0].shape[0]), reverse=True)
  arrays, adjs, labels = zip(*data)
  lengths = [(array.shape[0]) for array in arrays]
  longest = max(lengths)
  targets = np.zeros([len(arrays), max(lengths), 768])
  targets_adj = np.zeros([len(arrays), max(lengths), max(lengths)])
  for i, cap in enumerate(arrays):
    end = lengths[i]
    array = arrays[i]
    adj = adjs[i]
    new_adj = np.pad(adj, [((longest - end),0),(0,(longest - end))], mode='constant')
    new_array = np.pad(array, [((longest - end),0),(0,0)], mode='constant')
    targets[i,:,:] = new_array
    targets_adj[i,:,:] = new_adj
  return targets, targets_adj, labels


In [17]:
tr_irony = io.loadmat(data_path + 'turkish.mat')
tr_irony600 = io.loadmat(data_path + 'turkish_irony.mat')
dataset = pd.DataFrame({'feature' : feature_all, 'label' : count})
#dataset600 = pd.DataFrame({'feature' : tr_irony600["feature"], 'label' : tr_irony600["label"]})


In [None]:
dataset["feature"].shape, np.array(dataset["label"]).shape

((220,), (220,))

In [None]:

train_feature, valid_feature, train_label, valid_label = train_test_split(feature_all, count, test_size=0.1, random_state=RANDOM_SEED)
#len(train_feature), len(valid_feature)

dl_train = DataLoader(IronyDataset("train", train_feature, train_label), batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
dl_val= DataLoader(IronyDataset("val", valid_feature, valid_label), batch_size=batch_size, shuffle=True, collate_fn=collate_fn)

len  198   198
len  22   22


In [79]:
class GraphAttentionLayer(nn.Module):
    """
    Simple GAT layer, similar to https://arxiv.org/abs/1710.10903
    """
    def __init__(self, in_features, out_features, dropout, alpha, concat=True):
        super(GraphAttentionLayer, self).__init__()
        self.dropout = dropout
        self.in_features = in_features
        self.out_features = out_features
        self.alpha = alpha
        self.concat = concat

        self.W = nn.Parameter(torch.empty(size=(in_features, out_features)))
        nn.init.xavier_uniform_(self.W.data, gain=1.414)
        self.a = nn.Parameter(torch.empty(size=(2*out_features, 1)))
        nn.init.xavier_uniform_(self.a.data, gain=1.414)

        self.leakyrelu = nn.LeakyReLU(self.alpha)

    def forward(self, h, adj):
        Wh = torch.matmul(h, self.W) # h.shape: (N, in_features), Wh.shape: (N, out_features)
        e = self._prepare_attentional_mechanism_input(Wh)

        zero_vec = -9e15*torch.ones_like(e)
        attention = torch.where(adj > 0, e, zero_vec)
        attention = F.softmax(attention, dim=1)
        attention = F.dropout(attention, self.dropout, training=self.training)
        h_prime = torch.matmul(attention, Wh)

        if self.concat:
            return F.elu(h_prime)
        else:
            return h_prime

    def _prepare_attentional_mechanism_input(self, Wh):
        # Wh.shape (N, out_feature)
        # self.a.shape (2 * out_feature, 1)
        # Wh1&2.shape (N, 1)
        # e.shape (N, N)
        Wh1 = torch.matmul(Wh, self.a[:self.out_features, :])
        Wh2 = torch.matmul(Wh, self.a[self.out_features:, :])
        # broadcast add
        e = Wh1 + Wh2.T
        return self.leakyrelu(e)

    def __repr__(self):
        return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')'


class SpecialSpmmFunction(torch.autograd.Function):
    """Special function for only sparse region backpropataion layer."""
    @staticmethod
    def forward(ctx, indices, values, shape, b):
        assert indices.requires_grad == False
        a = torch.sparse_coo_tensor(indices, values, shape)
        ctx.save_for_backward(a, b)
        ctx.N = shape[0]
        return torch.matmul(a, b)

    @staticmethod
    def backward(ctx, grad_output):
        a, b = ctx.saved_tensors
        grad_values = grad_b = None
        if ctx.needs_input_grad[1]:
            grad_a_dense = grad_output.matmul(b.t())
            edge_idx = a._indices()[0, :] * ctx.N + a._indices()[1, :]
            grad_values = grad_a_dense.view(-1)[edge_idx]
        if ctx.needs_input_grad[3]:
            grad_b = a.t().matmul(grad_output)
        return None, grad_values, None, grad_b


class SpecialSpmm(nn.Module):
    def forward(self, indices, values, shape, b):
        return SpecialSpmmFunction.apply(indices, values, shape, b)

    
class SpGraphAttentionLayer(nn.Module):
    """
    Sparse version GAT layer, similar to https://arxiv.org/abs/1710.10903
    """

    def __init__(self, in_features, out_features, dropout, alpha, concat=True):
        super(SpGraphAttentionLayer, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.alpha = alpha
        self.concat = concat

        self.W = nn.Parameter(torch.zeros(size=(in_features, out_features)))
        nn.init.xavier_normal_(self.W.data, gain=1.414)
                
        self.a = nn.Parameter(torch.zeros(size=(1, 2*out_features)))
        nn.init.xavier_normal_(self.a.data, gain=1.414)

        self.dropout = nn.Dropout(dropout)
        self.leakyrelu = nn.LeakyReLU(self.alpha)
        self.special_spmm = SpecialSpmm()

    def forward(self, input, adj):
        dv = 'cuda' if input.is_cuda else 'cpu'

        N = input.size()[0]
        edge = adj.nonzero().t()

        h = torch.mm(input, self.W)
        # h: N x out
        assert not torch.isnan(h).any()

        # Self-attention on the nodes - Shared attention mechanism
        edge_h = torch.cat((h[edge[0, :], :], h[edge[1, :], :]), dim=1).t()
        # edge: 2*D x E

        edge_e = torch.exp(-self.leakyrelu(self.a.mm(edge_h).squeeze()))
        assert not torch.isnan(edge_e).any()
        # edge_e: E

        e_rowsum = self.special_spmm(edge, edge_e, torch.Size([N, N]), torch.ones(size=(N,1), device=dv))
        # e_rowsum: N x 1

        edge_e = self.dropout(edge_e)
        # edge_e: E

        h_prime = self.special_spmm(edge, edge_e, torch.Size([N, N]), h)
        assert not torch.isnan(h_prime).any()
        # h_prime: N x out
        
        h_prime = h_prime.div(e_rowsum)
        # h_prime: N x out
        assert not torch.isnan(h_prime).any()

        if self.concat:
            # if this layer is not last layer,
            return F.elu(h_prime)
        else:
            # if this layer is last layer,
            return h_prime

    def __repr__(self):
        return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')'

In [83]:
class UCCA_GAT(nn.Module):
    def __init__(self, nfeat, nhid, nclass, dropout, alpha, nheads):
        """Dense version of GAT."""
        super(UCCA_GAT, self).__init__()
        self.dropout = dropout

        self.attentions = [GraphAttentionLayer(nfeat, nhid, dropout=dropout, alpha=alpha, concat=True) for _ in range(nheads)]
        for i, attention in enumerate(self.attentions):
            self.add_module('attention_{}'.format(i), attention)

        self.out_att = GraphAttentionLayer(nhid * nheads, nclass, dropout=dropout, alpha=alpha, concat=False)

    def forward(self, x, adj):
        x = F.dropout(x, self.dropout, training=self.training)
        x = torch.cat([att(x, adj) for att in self.attentions], dim=1)
        x = F.dropout(x, self.dropout, training=self.training)
        x = F.elu(self.out_att(x, adj))
        x= torch.mean(x, 1)
        #return F.log_softmax(x, dim=1)
        return x

In [None]:
gat_model = UCCA_GCN(nfeat=768, 
                     nhid=nhid, 
                     nclass=n_out, 
                    dropout=dropout)
gat_model.to(device)

UCCA_GCN(
  (gc1): GraphConvolution (768 -> 100)
  (gc2): GraphConvolution (100 -> 2)
  (dropout): Dropout(p=0.2, inplace=False)
  (fc1): Linear(in_features=768, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=2, bias=True)
)

In [22]:
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) #, weight_decay=init_weight_decay)
criterion = nn.CrossEntropyLoss()

In [23]:
def save_checkpoint(state, location):
	filepath = os.path.join(location, 'best.pth.tar')
	torch.save(state, filepath)

In [66]:
def train(train_dl, model, optimizer):
  model.train()
  total_loss = 0.
  for batch in train_dl:
    feature, adj, label = batch
    feature, adj, label = torch.FloatTensor(feature), torch.LongTensor(adj), torch.LongTensor(label)
    feature, adj, label = feature.to(device), adj.to(device), label.to(device)
    optimizer.zero_grad()
    output = model(feature, adj)
    loss = criterion(output.view(-1, n_out), label)
    loss.backward()
    torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
    optimizer.step()
    total_loss += loss.item()

  return total_loss/float(len(train_dl))

In [67]:
def evaluate(model, dl):
  total_loss = 0
  prediction_list = []
  label_list = []
  model.eval()
  with torch.no_grad():
    for batch in dl:		
      feature, adj, label = batch
      feature, adj, label = torch.FloatTensor(feature), torch.LongTensor(adj), torch.LongTensor(label)
      feature, adj, label = feature.to(device), adj.to(device), label.to(device)
      output = model(feature, adj)
      loss = criterion(output.view(-1, n_out), label)
      total_loss += loss.item()
      predicted = torch.argmax(output, dim=1)
      prediction_list.extend(predicted.data.cpu().numpy())
      label_list.extend(label.data.cpu().numpy())
  return f1_score(label_list, prediction_list, average='macro'), total_loss, label_list, prediction_list

In [26]:
def train_and_evaluate(model, optimizer, train_dl, val_dl, test_dl=None, fold=0):
  best_val_acc = -999.9
  r_test_acc = -999.0
  scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.95)
  label_best = []
  prediction_best = []
  for epoch in range(1, epoch_size+1):
    total_loss = train(train_dl, model, optimizer)
    val_acc, val_loss, label_list, prediction_list = evaluate(model, val_dl)
    #test_acc, test_loss = evaluate(model, test_dl)
    #print("Epoch = ", epoch, " train loss = ", total_loss, " val_acc = ", val_acc) #, " test_acc = ", test_acc)
    if val_acc > best_val_acc:
      save_checkpoint({'epoch': epoch , 'state_dict': model.state_dict(), 'optim_dict': optimizer.state_dict()}, location=data_path + 'result/')
      best_val_acc = val_acc
      #r_test_acc = test_acc
      label_best = label_list
      prediction_best = prediction_list
    scheduler.step()
  print("Best Val acc = ", best_val_acc) #, " Test Acc = ", r_test_acc)
  return best_val_acc,label_best, prediction_best


In [None]:
train_and_evaluate(gcn_model, optimizer, dl_train, dl_val)

NameError: ignored

In [None]:
def get_eval_report(labels, preds):
  mcc = matthews_corrcoef(labels, preds)
  tn, fp, fn, tp = confusion_matrix(labels, preds).ravel()
  return {
              "mcc": mcc,
              "tp": tp,
              "tn": tn,
              "fp": fp,
              "fn": fn
          }

In [110]:
def train_and_evaluate_fold():
  label_all = []
  prediction_all = []
  best_accuracy = []


  #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.95)
  k_folds = 10

  results = {}

  # Set fixed random number seed
  torch.manual_seed(42)

  # Define the K-fold Cross Validator
  kfold = KFold(n_splits=k_folds, random_state=RANDOM_SEED, shuffle=True)
  # Start print
  print('--------------------------------')

  # K-fold Cross Validation model evaluation
  dataset = pd.DataFrame({'feature' : feature_all, 'adj' : adj_all,'label' : count})
  #dataset = shuffle(dataset)
  #print(dataset)
  for fold, (train_idx, val_idx) in enumerate(kfold.split(dataset)):
    gcn_model = UCCA_GAT(nfeat=768, 
                         nhid=nhid,
                         nclass=n_out,
                         dropout=dropout,
                         alpha=0.1,
                         nheads=1).to(device)

    optimizer = torch.optim.Adam(gcn_model.parameters(), lr=learning_rate) #, weight_decay=init_weight_decay)

    train_df = dataset.iloc[train_idx]

    valid_df = dataset.iloc[val_idx]
    print(fold)
    dl_train = DataLoader(IronyDataset("train", train_df["feature"].to_numpy(), train_df["adj"].to_numpy(), list(train_df["label"])), batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
    dl_val= DataLoader(IronyDataset("val", valid_df["feature"].to_numpy(), valid_df["adj"].to_numpy(), list(valid_df["label"])), batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
    best_a,label_list, prediction_list = train_and_evaluate(gcn_model, optimizer, dl_train, dl_val, fold)
    best_accuracy.append(best_a)
    label_all.extend(label_list)
    prediction_all.extend(prediction_list)
  
  print(np.mean(best_accuracy))
  return label_all, prediction_all

In [124]:
label_all, prediction_all = train_and_evaluate_fold()

--------------------------------
0
Best Val acc =  0.6996662958843161
1
Best Val acc =  0.6825396825396826
2
Best Val acc =  0.7
3
Best Val acc =  0.6499027507641011
4
Best Val acc =  0.7818181818181817
5
Best Val acc =  0.6832453459294249
6
Best Val acc =  0.7159565580618212
7
Best Val acc =  0.613986013986014
8
Best Val acc =  0.6811188811188811
9
Best Val acc =  0.6996662958843158
0.6907900005986739


In [125]:
f1_score(label_all, prediction_all), recall_score(label_all, prediction_all), precision_score(label_all, prediction_all),accuracy_score(label_all, prediction_all)

(0.6869712351945855, 0.6766666666666666, 0.697594501718213, 0.6916666666666667)