## This script is used to load a trained model, and then to predict negation and uncertainty

In [None]:
import sys
sys.path.append('./libs')

import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import pickle
import pandas as pd
from itertools import islice

from tabulate import tabulate
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report as eskclarep
from seqeval.metrics import precision_score, recall_score, f1_score, accuracy_score
from seqeval.metrics import classification_report as seqclarep
from sklearn.preprocessing import LabelBinarizer
from itertools import chain

from tensorflow import keras
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Concatenate, Lambda, Input, LSTM, Embedding, Dense, TimeDistributed, Dropout, Bidirectional, InputLayer, Activation, Flatten, Masking
from tensorflow.keras.optimizers import Adam, schedules

from tf2crf import CRF as crf6
from mwrapper import ModelWithCRFLoss, ModelWithCRFLossDSCLoss
from utils import build_matrix_embeddings as bme, plot_model_performance, logits_to_tokens, report_to_df

from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import TensorBoard

from IPython.core.display import display, HTML

import datetime, os
import random

SEED = 42
os.environ['PYTHONHASHSEED']=str(SEED)
os.environ['TF_CUDNN_DETERMINISTIC'] = '1'  # TF 2.1+
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

MODEL         = 'base_model_bioEmb_300_skyp'
logs_base_dir = "./logs"
log_dir       = logs_base_dir + "/" + MODEL
save_base_dir = './model-save'
save_dir      = save_base_dir + "/" + MODEL

os.makedirs(logs_base_dir, exist_ok=True)
os.makedirs(log_dir,       exist_ok=True)
os.makedirs(save_base_dir, exist_ok=True)
os.makedirs(save_dir,      exist_ok=True)

%load_ext tensorboard

# ****** DEFINICION DE PARAMETROS *********
NUM_WORDS       = 25331 + 2
LEN_SENTS       = 190
NUM_TAGS        = 9 + 2

# ****** DEFINICION DE HIPERPARAMETROS *********
_EPOCHS         = 60
EMBED_DIM       = 300
_DROPOUT        = 0.5
_BACH_SIZE      = 512 + 256
_LEARN_RATE     = 1e-2


gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
            
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

In [None]:
word2idx = np.load('vectors/word2index.npy', allow_pickle=True).item()
tag2idx  = np.load('vectors/tag2index.npy', allow_pickle=True).item()
idx2tag  = np.load('vectors/index2tag.npy', allow_pickle=True).item()

In [None]:
print(tag2idx)

In [None]:
#Loading the model
print("\nLoading the model for negation and uncertainty detection ...")
model = tf.saved_model.load(save_dir + "/")
print ("\nModel loaded ...")

In [None]:
#Note: Sentences must be previously tokenized (e.g NLTK or Spacy for spanish)
#The model receives a tokenized sentence and returns a negation or speculation label for each token.
#Next sentences have been proviously tokenized. 
#It is recommended to use  NLTK o r Spacy tokenizerfor spanish language.

negation_samples = [
    "No dolor toráxico ni agudo .".split(),
    "Paciente con probable carcinoma pulmonar .".split(),
    "Inflamacion aguda negativa .".split(),
    "helicobacter pylori negativo .".split(),
    "negativo para malignidad .".split(),
    "No se puede descartar cáncer de pulmón .".split(),
    "Sin fiebre ni nauseas .".split()    
]

In [None]:
negation_samples_X    = []

for s1 in negation_samples:
    s1_int = []
    for w in s1:
        try:
            s1_int.append(word2idx[w.lower()])
        except KeyError:
            s1_int.append(word2idx['-OOV-'])
    negation_samples_X.append(s1_int)

negation_samples_X = pad_sequences(negation_samples_X, maxlen=LEN_SENTS, padding='post')

print("Negation: \n", negation_samples_X)

In [None]:
predictions1 = model(negation_samples_X).numpy()
#print("Negation: \n", predictions1, predictions1.shape)

In [None]:
#log_tokens1 = logits_to_tokens(np.argmax(predictions1, -1), idx2tag)
log_tokens1 = logits_to_tokens(predictions1, idx2tag)
print ("Sentence " , negation_samples[0])
print("Negation: \n", log_tokens1[0])

In [None]:
for h, oracc in enumerate(negation_samples):
    heads = oracc
    body  = [log_tokens1[h][:len(oracc)]]
    display(HTML("<div style='overflow-x: auto; white-space: nowrap;'>" + 
                 tabulate(body, headers=heads, tablefmt="html") + 
                 "</div>"))