# Word Embedding Model

In [1]:
# imports

# pandas
import pandas as pd

# numpy
import numpy as np

# regex
import regex as re

# sklearn
from sklearn.model_selection import train_test_split
from sklearn.model_selection import ParameterGrid, KFold
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.metrics import precision_recall_fscore_support 

# keras
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.initializers import Constant
from keras.models import Sequential
from keras.layers import Bidirectional, Dense, Dropout, SpatialDropout1D, Embedding, Flatten, Conv1D, MaxPooling1D, LSTM, Flatten
from keras.callbacks import ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.optimizers import Adam, SGD

# gensim
from gensim.models import KeyedVectors

# tabulate
from tabulate import tabulate

### Preprocess

In [2]:
# preprocessing, tokenization function
"""
project note:
modified version of https://www.kaggle.com/code/amackcrane/python-version-of-glove-twitter-preprocess-script/script

originally supplied by creators as GloVE as a Rudy script and modified by several contributors

-----

Script for preprocessing tweets by Romain Paulus
with small modifications by Jeffrey Pennington
with translation to Python by Motoki Wu (github.com/tokestermw)

Translation of Ruby script to create features for GloVe vectors for Twitter data.
http://nlp.stanford.edu/projects/glove/preprocess-twitter.rb

this version from gist.github.com/ppope > preprocess_twitter.py

light edits by amackcrane
"""

import regex as re

FLAGS = re.MULTILINE | re.DOTALL

def hashtag(text):
    text = text.group()
    hashtag_body = text[1:]
    if hashtag_body.isupper():
        result = " <hashtag> {} <allcaps> ".format(hashtag_body.lower())
    else:
        result = ' ' + " ".join(["<hashtag>"] + re.split(r"(?=[A-Z])", hashtag_body, flags=FLAGS)) + ' '
    return result

def allcaps(text):
    text = text.group()
    return text.lower() + " <allcaps> " 


def tokenize(text):
    # Different regex parts for smiley faces
    eyes = r"[8:=;]"
    nose = r"['`\-]?"

    # function so code less repetitive
    def re_sub(pattern, repl):
        return re.sub(pattern, repl, text, flags=FLAGS)

    # replace apostrophes and backticks with empty string
    text = re_sub(r"['`]", '')
    # replace urls
    text = re_sub(r"https?:\/\/\S+\b|www\.(\w+\.)+\S*", " <url> ")
    # replace @user mentions
    text = re_sub(r"@\w+", " <user> ")
    # replaces smiles
    text = re_sub(r"{}{}[)dD]+|[)dD]+{}{}".format(eyes, nose, nose, eyes), " <smile> ")
    # replaces lolfaces
    text = re_sub(r"{}{}p+".format(eyes, nose), " <lolface> ")
    # replaces sadfaces
    text = re_sub(r"{}{}\(+|\)+{}{}".format(eyes, nose, nose, eyes), " <sadface> ")
    # replaces neutralfaces
    text = re_sub(r"{}{}[\/|l*]".format(eyes, nose), " <neutralface> ")
    # replaces forward slashes with spaces around a forward slash
    text = re_sub(r"/"," / ")
    # replaces hearts
    text = re_sub(r"<3"," <heart> ")
    # replaces numbers
    text = re_sub(r"[-+]?[.\d]*[\d]+[:,.\d]*", " <number> ")
    # replace hashtags
    text = re_sub(r"#\w+", hashtag)  
    # replace repeated punctuation (!?.)
    text = re_sub(r"([!?.]){2,}", r"\1 <repeat> ")
    # replace repeated non whitespace (that isn't !, ?, or .)
    text = re_sub(r"\b(\S*?)(\S)\2{2,}\b", r"\1\2 <elong> ")
    # replace one character from [a-zA-Z<>()] followed by one char from [?!.:;,] with
    #   those two characters separated by a space
    text = re_sub(r"([a-zA-Z<>()])([?!.:;,])", r"\1 \2")
    # replace 1+ characters from [a-zA-Z<>] surrounded by () with
    #   those characters surrounded by spaces surrounded by ()
    text = re_sub(r"\(([a-zA-Z<>]+)\)", r"( \1 )")
    # replace 2+ uppercase letters with those letters lowercase followed by <allcaps>
    text = re_sub(r" ([A-Z]){2,} ", allcaps)

    # handle certain punctuation being next to letters
    pt = r"[@#&\+\$\\{}\[\]\?!()\^_\-.,:';~=]" 
    text = re_sub(rf"([a-zA-Z])({pt}[()^<>_\-.:';~=ox]*{pt}|{pt}+)", r"\1 \2 ")
    text = re_sub(rf"({pt}[()^<>_\-.:';~=ox]*{pt}|{pt}+)([a-zA-Z])", r" \1 \2")

    # handle strings of *
    text = re_sub(r"(\*+)", r" \1 ")

    # replace two or more spaces with one space
    text = re_sub(r" {2,}", r" ")

    return text.lower()

In [3]:
dataset = pd.read_csv('../Dataset/Tweets.csv', encoding='ISO-8859-1')

dataset.drop(['textID', 'selected_text'], axis=1, inplace=True)
dataset['text'] = dataset['text'].apply(lambda t: str(t))

# apply preprocessing to a dataset copy
dataset_pre = dataset.copy(deep=True)

# apply preprocessing using tokenize function
dataset_pre['text'] = dataset_pre['text'].apply(lambda t: tokenize(t))

X_train, X_test, y_train, y_test = train_test_split(dataset_pre.drop(['sentiment'], axis='columns'), dataset_pre['sentiment'], test_size=0.2, random_state=21)

In [4]:
# build vocabulary for training set

tokenizer = Tokenizer()
# don't do any additional punctuation filtering, that's what the preprocessing step is for
tokenizer.filters = '' 

# create a vocabulary (mapping of words to indices) for the training set
# note that no words are removed in the vocabulary creation, there is no minimum amount of occurances needed
tokenizer.fit_on_texts(X_train.text) 

# words in the vocabulary built from training set
training_words = list(tokenizer.word_index.keys())

training_vocab_size = len(training_words) + 1

In [5]:
# transform examples into sequences of indices

SEQUENCE_LENGTH = 50

# texts_to_sequences turns dataframe of text into lists of vocab indices for the words in each text
train_index_seqs = tokenizer.texts_to_sequences(X_train['text'])

# pad_sequences pads the list of indices to the same length with 0's (pads on front)
X_train_seq = pad_sequences(train_index_seqs, maxlen=SEQUENCE_LENGTH)

In [6]:
# inspect how preprocessing and turning into a sequence works on an example

train_idx = 4

# original dataset entry
dataset_idx = X_train.iloc[train_idx].name
print('original:', dataset.text.loc[dataset_idx])

# preprocessed text entry 
print('preprocessed:', X_train.text.iloc[train_idx])

# preprocessed text as vocab indices
seq = train_index_seqs[train_idx]
print('as training vocabulary indices:', seq)

# convert back from vocab indices to words of text
print('back to preprocssed: ', end='')
for i in seq:
    print(tokenizer.index_word[i], end=' ')

original: Hmm..You can`t judge a book by looking at its cover
preprocessed: hmm . <repeat> you cant judge a book by looking at its cover
as training vocabulary indices: [802, 1, 5, 13, 59, 3330, 8, 472, 142, 255, 33, 31, 1126]
back to preprocssed: hmm . <repeat> you cant judge a book by looking at its cover 

In [7]:
# encode class labels numerically 
lab_encoder = LabelEncoder()
lab_encoder.fit(y_train.unique())
y_train_numeric = lab_encoder.transform(y_train.to_list())
y_train_numeric = y_train_numeric.reshape(-1,1)

# one hot encoding
hot_encoder = OneHotEncoder(sparse=False)
hot_encoder.fit(y_train_numeric)
y_train_encoded = hot_encoder.transform(y_train_numeric)

In [8]:
print("X_train", X_train_seq.shape)
print("y_train", y_train_encoded.shape)

X_train (21984, 50)
y_train (21984, 3)


### Create the embedding

This cell downloads the pretrained embeddings from the internet. <br>
It downloads a zip to the directory ./pretrained_embeddings (creates it if it doesn't exist) and unzips it.

In [9]:
# GloVE embeddings: https://nlp.stanford.edu/projects/glove/

!wget -P ./pretrained_embeddings https://nlp.stanford.edu/data/glove.twitter.27B.zip
!unzip -qo ./pretrained_embeddings/glove.twitter.27B.zip -d ./pretrained_embeddings

In [10]:
# load the pretrained embedding

pretrained_embeddings = ['glove.twitter.27B.25d.txt', 'glove.twitter.27B.50d.txt', 'glove.twitter.27B.100d.txt', 'glove.twitter.27B.200d.txt']

embedding_choice = 3

path = './pretrained_embeddings/' + pretrained_embeddings[embedding_choice]

word_vectors = KeyedVectors.load_word2vec_format(path, binary=False, no_header=True)

# embedding vocabulary, the words that the pretrained embedding was trained on
embedding_words = word_vectors.index_to_key

# size of each vector in embedding
EMBEDDING_DIM = word_vectors.vector_size

In [11]:
print('embedding shape (words in embedding vocabulary, dimensions of embedding vectors):', word_vectors.vectors.shape)

# vector for word 'cat'
cat_vec_2 = word_vectors.get_vector('cat')
print(f'vector for word \'cat\' (size {EMBEDDING_DIM}):', cat_vec_2[0:5], '...', cat_vec_2[-5:])

embedding shape (words in embedding vocabulary, dimensions of embedding vectors): (1193514, 200)
vector for word 'cat' (size 200): [ 0.14557  -0.47214   0.045594 -0.11133  -0.44561 ] ... [ 0.29545  -0.49186   0.24053  -0.46081  -0.077296]


In [12]:
# words similar to my_word art closer to it in space
my_word = 'cat'
word_vectors.most_similar(my_word)

[('dog', 0.8324302434921265),
 ('cats', 0.7685185074806213),
 ('kitty', 0.750445544719696),
 ('kitten', 0.7489697933197021),
 ('pet', 0.7319862842559814),
 ('puppy', 0.7023192644119263),
 ('dogs', 0.7016381621360779),
 ('animal', 0.6421106457710266),
 ('bear', 0.6309184432029724),
 ('meow', 0.6304775476455688)]

In [13]:
# create embedding matrix which will make up the embedding layer of the neural network

embedding_matrix = np.zeros((training_vocab_size, EMBEDDING_DIM))

# words from our dataset that aren't in the pretrained embedding's vocabulary
not_in_embedding = []

# for each word and its index in the tokenizer vocabulary (these are our words from the dataset)
for word, i in tokenizer.word_index.items():

  # if that word is in the model's mapping between words and embeddings (vectors)
  if word in word_vectors:

    # set the row of the embedding matrix corresponding to the word's index in the tokenizer vocabulary
    #   to the embedding (vector) of that word 
    embedding_matrix[i] = word_vectors[word]

  else:
    not_in_embedding.append(word)
    
# for each word in the dataset, embedding_matrix has its embedding (vector) from the pretrained model
# if the word isn't present in the pretrained embedding, the vector is all 0's
print('dimensions of embedding matrix:', embedding_matrix.shape)
print('number of words in dataset that weren\'t in pretrained embedding vocabulary:', len(not_in_embedding))

dimensions of embedding matrix: (20710, 200)
number of words in dataset that weren't in pretrained embedding vocabulary: 2434


In [14]:
# the embedding layer translates indices for words in the training vocabulary into their 
#   embedding/vector equivalents as determined by the pretrained embedding

# input_dim=training_vocab_size is the number of words in our training set
# output_dim=EMBEDDING_DIM is the dimension of the embedding, the dimension of each vector in the embedding
# embeddings_initializer=Constant(embedding_matrix) are embeddings for each word in our training set
# input_length=SEQUENCE_LENGTH is the size of examples once transformed, padded into lists of word indices
# trainable=False so the embedding layer weights are not updated with training
embedding_layer = Embedding(input_dim=training_vocab_size, output_dim=EMBEDDING_DIM, embeddings_initializer=Constant(embedding_matrix), 
                            input_length=SEQUENCE_LENGTH, trainable=False)

### Set up grid search

In [15]:
# a function that creates a model with specified hyperparameters

def create_model(layers_with_args,
                 learn_rate = 1e-3,
                 optimizer = 'Adam',
                 momentum = 0.9,
                 loss_fn = 'mse'):

    layers = []

    layers.append(embedding_layer)

    for layer_type, layer_args in layers_with_args:

        if layer_type == 'drp':
            layers.append(Dropout(**layer_args))

        elif layer_type == 'sdrp':
            layers.append(SpatialDropout1D(**layer_args))

        elif layer_type == 'conv':
            layers.append(Conv1D(**layer_args))

        elif layer_type == 'lstm':
            layers.append(LSTM(**layer_args))

        elif layer_type == 'blstm':
            layers.append(Bidirectional(LSTM(**layer_args)))

        elif layer_type == 'dns':
            layers.append(Dense(**layer_args))    

        elif layer_type == 'flt':
            layers.append(Flatten(**layer_args))
        
        elif layer_type == 'mxp':
            layers.append(MaxPooling1D(**layer_args))

        else:
            print('invalid layer type!')
            return None


    if optimizer == 'SGD':
        opt = SGD(learning_rate=learn_rate, momentum=momentum)

    elif optimizer == 'Adam':
        opt = Adam(learning_rate=learn_rate)

    else:
        opt = optimizer


    model = Sequential(layers=layers)

    model.compile(optimizer=opt,
                  loss=loss_fn,
                  metrics=['accuracy'])   

    return model

In [16]:
# callbacks 

reduce_lr_factor = 0.1
reduce_lr_min_lr = 1e-5
reduce_lr_patience = 5
reduce_lr_cooldown = 0
reduce_lr_monitor = 'val_loss'
rlr = ReduceLROnPlateau(factor=reduce_lr_factor, min_lr=reduce_lr_min_lr, patience=reduce_lr_patience, cooldown=reduce_lr_cooldown, monitor=reduce_lr_monitor)

early_stop_patience = 5
early_stop_min_delta = 1e-4
early_stop_monitor = 'val_accuracy'
es = EarlyStopping(patience=early_stop_patience, min_delta=early_stop_min_delta, monitor=early_stop_monitor)

callbacks = [rlr, es]

In [17]:
# values for some of the hyper parameters

epoch_choices = [50] #[50, 100]
learn_rate_choices = [3e-3] #[1e-4, 3e-3, 1e-2] 
optimizer_choices = ['Adam'] #['Adamax', 'Adagrad', 'Adam', 'SGD', 'Nadam', 'Adadelta']   
loss_choices = ['categorical_crossentropy'] #['mse', 'categorical_crossentropy'] 

In [18]:
# best model found by gridsearch
# 76.1 with k fold (lr 3e-3, Adam, epochs=50, categorical_crossentropy)
grid = [
        {
        'layer_order': [('drp', 'blstm', 'dns')],
        'layer_0_args': [{'rate': 0.6}],
        'layer_1_args': [{'units': 150}],
        'layer_2_args': [{'units': 3, 'activation': 'sigmoid'}] 
        }
       ]

In [25]:
# custom grid search 

# list of dicts
# one dict per architecture (architecture = number and types of layers)
# each dict specifies the different parameter values for an architecture's layers
grid = [
        {
        'layer_order': [('drp', 'lstm', 'dns')],
        'layer_0_args': [{'rate': 0.5}, 
                         {'rate': 0.2}],
        'layer_1_args': [{'units': 100, 'dropout': 0.2, 'recurrent_dropout': 0.2},
                         {'units': 200, 'dropout': 0.2}], 
        'layer_2_args': [{'units': 3, 'activation': 'sigmoid'}] # don't mess with the number of units in the output layer
        },

        {
        'layer_order': [('drp', 'lstm', 'dns', 'dns')],
        'layer_0_args': [{'rate': 0.5}, 
                         {'rate': 0.2}],
        'layer_1_args': [{'units': 100, 'dropout': 0.2, 'recurrent_dropout': 0.2},
                         {'units': 200, 'dropout': 0.2}], 
        'layer_2_args': [{'units': 50, 'activation': 'relu'}, {'units': 100, 'activation': 'relu'}],
        'layer_3_args': [{'units': 3, 'activation': 'sigmoid'}] # don't mess with the number of units in the output layer
        }
       ]

In [19]:
other_params = {
                'epochs': epoch_choices,
                'learn_rate': learn_rate_choices,
                'optimizer': optimizer_choices,
                'loss_fn': loss_choices
               }

# add other params dict to each dict in grid
for d in grid:
    d.update(other_params)

### Perform the grid search

In [20]:
# grid search
# do k-fold cross validation on each model searched
# record metrics

param_grid = ParameterGrid(grid)

cur_model = 1
num_models = len(param_grid)
print('number of models to search:', num_models)

histories = []
all_scores = []
acc_and_args = []

k_fold = True

# for each set of parameters in the grid search
for p in param_grid:

        print(f'\nmodel {cur_model} of {num_models}\nsearching parameters:', p)
        cur_model += 1

        # for each layer, combine layer type with its args 
        layer_order = p['layer_order']
        layers_with_args = []
        for i in range(len(layer_order)):

            args_key = 'layer_' + str(i) + '_args'
            layer_args = p[args_key]
            layers_with_args.append((layer_order[i], layer_args))


        # if k fold cross validation specified
        if k_fold:

            scores = {
                'test_accuracy': [],
                'test_error': [],
                'test_precision_neg': [],
                'test_precision_neut': [],
                'test_precision_pos': [],
                'test_recall_neg': [],
                'test_recall_neut': [],
                'test_recall_pos': [],
                'test_f1_score_neg': [],
                'test_f1_score_neut': [],
                'test_f1_score_pos': []
                    }

            fold_num = 1

            kfold = KFold(n_splits=10)

            # for each fold, train a model on the other folds and evaluate on the test fold
            for train, test in kfold.split(X_train_seq, y_train_encoded):

                # configure
                model = create_model(layers_with_args=layers_with_args, learn_rate=p['learn_rate'], optimizer=p['optimizer'], loss_fn=p['loss_fn'])
                
                # train
                history = model.fit(X_train_seq[train], y_train_encoded[train], batch_size=128, epochs=p['epochs'], 
                                    validation_data=(X_train_seq[test], y_train_encoded[test]), callbacks=callbacks, verbose=0)

                # evaluate
                error, acc = model.evaluate(X_train_seq[test], y_train_encoded[test], verbose=0)

                # save scores for later classification report
                preds = np.argmax(model.predict(X_train_seq[test]), axis=1).reshape(-1, 1)
                y_true = hot_encoder.inverse_transform(y_train_encoded[test])

                precision, recall, f1, _ = precision_recall_fscore_support(y_true, preds)

                scores['test_accuracy'].append(acc)
                scores['test_error'].append(error)
                scores['test_precision_neg'].append(precision[0])
                scores['test_precision_neut'].append(precision[1])
                scores['test_precision_pos'].append(precision[2])
                scores['test_recall_neg'].append(recall[0])
                scores['test_recall_neut'].append(recall[1])
                scores['test_recall_pos'].append(recall[2])
                scores['test_f1_score_neg'].append(f1[0])
                scores['test_f1_score_neut'].append(f1[1])
                scores['test_f1_score_pos'].append(f1[2])

                # print error and accuracy for this fold
                print(f'Fold {fold_num} - {model.metrics_names[0]} ({p["loss_fn"]}): {error}; {model.metrics_names[1]}: {acc*100}%')

                fold_num = fold_num + 1


            avg_acc = np.mean(scores['test_accuracy'])
            print('Average accuracy: %.3f' % avg_acc)
            histories.append((avg_acc, p))
            all_scores.append((scores, p))

        # no k fold cross validation
        else:
            model = create_model(layers_with_args=layers_with_args, learn_rate=p['learn_rate'], optimizer=p['optimizer'], loss_fn=p['loss_fn'])

            # train model
            history = model.fit(X_train_seq, y_train_encoded, batch_size=128, epochs=p['epochs'],
                                validation_split=0.2, callbacks=callbacks, verbose=0)

            # print the average validation accuracy of the last 6 epochs (because early stopping after 5 epochs of no improvement)
            # if it is more than threshold, save the model arguments
            threshold = 0.745
            avg_acc = np.mean(history.history['val_accuracy'][-7:-1])
            print('avg validation accuracy of last 6 epochs:', avg_acc)
            if avg_acc > threshold:
                acc_and_args.append((avg_acc, p))
            
            # save metrics
            histories.append((avg_acc, p))

number of models to search: 1

model 1 of 1
searching parameters: {'epochs': 50, 'layer_0_args': {'rate': 0.6}, 'layer_1_args': {'units': 150}, 'layer_2_args': {'units': 3, 'activation': 'sigmoid'}, 'layer_order': ('drp', 'blstm', 'dns'), 'learn_rate': 0.003, 'loss_fn': 'categorical_crossentropy', 'optimizer': 'Adam'}


2022-06-04 00:00:58.831380: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Fold 1 - loss (categorical_crossentropy): 0.5875210165977478; accuracy: 76.98954343795776%
Fold 2 - loss (categorical_crossentropy): 0.6401592493057251; accuracy: 74.57935214042664%
Fold 3 - loss (categorical_crossentropy): 0.623387336730957; accuracy: 76.03456377983093%
Fold 4 - loss (categorical_crossentropy): 0.5831453800201416; accuracy: 76.58026218414307%
Fold 5 - loss (categorical_crossentropy): 0.6005998253822327; accuracy: 75.25022625923157%
Fold 6 - loss (categorical_crossentropy): 0.5994269847869873; accuracy: 76.7060935497284%
Fold 7 - loss (categorical_crossentropy): 0.6260656714439392; accuracy: 75.93266367912292%
Fold 8 - loss (categorical_crossentropy): 0.6144983172416687; accuracy: 75.93266367912292%
Fold 9 - loss (categorical_crossentropy): 0.6425366997718811; accuracy: 75.88716745376587%
Fold 10 - loss (categorical_crossentropy): 0.617222249507904; accuracy: 75.75068473815918%
Average accuracy: 0.760


In [21]:
# function to print out a classification report
# can't use normal classification_report since we want to display our values from cross validation
def custom_classif_report(metric_scores, just_acc=False):

  scores_df = pd.DataFrame(metric_scores)
  
  # average of each metric over the splits of cross validation
  scores_mean = scores_df.mean() 
  
  # overall averages for precision, recall, and f1 across class labels
  avg_precision = scores_mean[['test_precision_neut', 'test_precision_pos', 'test_precision_neg']].mean()
  avg_recall = scores_mean[['test_recall_neut', 'test_recall_pos', 'test_recall_neg']].mean()
  avg_f1 = scores_mean[['test_f1_score_neut', 'test_f1_score_pos', 'test_f1_score_neg']].mean()
  
  print('Classification Report:')

  # precision, recall, f1 metrics in table printable form
  metric_info = {
    'precision': [scores_mean['test_precision_neut'], scores_mean['test_precision_pos'], scores_mean['test_precision_neg'], avg_precision], 
    'recall': [scores_mean['test_recall_neut'], scores_mean['test_recall_pos'], scores_mean['test_recall_neg'], avg_recall], 
    'f1-score': [scores_mean['test_f1_score_neut'], scores_mean['test_f1_score_pos'], scores_mean['test_f1_score_neg'], avg_f1]}
  
  # print table for precision, recall, f1
  if not just_acc:
    print(tabulate(metric_info, headers='keys', tablefmt='fancy_grid', showindex=['neutral', 'positive', 'negative', 'average']))
  
  # print table for accuracy
  acc_info = {'accuracy': [scores_mean['test_accuracy']]}
  print(tabulate(acc_info, headers='keys', tablefmt='fancy_grid'))

In [22]:
# classification report for first model of grid search
custom_classif_report(all_scores[0][0])

Classification Report:
╒══════════╤═════════════╤══════════╤════════════╕
│          │   precision │   recall │   f1-score │
╞══════════╪═════════════╪══════════╪════════════╡
│ neutral  │    0.726407 │ 0.739474 │   0.732692 │
├──────────┼─────────────┼──────────┼────────────┤
│ positive │    0.813657 │ 0.797405 │   0.80531  │
├──────────┼─────────────┼──────────┼────────────┤
│ negative │    0.750257 │ 0.747346 │   0.7483   │
├──────────┼─────────────┼──────────┼────────────┤
│ average  │    0.76344  │ 0.761408 │   0.762101 │
╘══════════╧═════════════╧══════════╧════════════╛
╒════════════╕
│   accuracy │
╞════════════╡
│   0.759643 │
╘════════════╛
