# Load bilstm y crf - clinicalEmb-300-skyp
## NewDataset Lung Cancer

### Load and using the lung cancer BiLSTM-CRF Model

In [1]:
import sys
sys.path.append('./libs')

import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import pickle
import pandas as pd
import seaborn as sn
from itertools import islice

from tabulate import tabulate
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report as eskclarep
from seqeval.metrics import precision_score, recall_score, f1_score, accuracy_score
from seqeval.metrics import classification_report as seqclarep
from sklearn.preprocessing import LabelBinarizer
from itertools import chain
from sklearn.model_selection import KFold

from tensorflow import keras
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Concatenate, Lambda, Input, LSTM, Embedding, Dense, TimeDistributed, Dropout, Bidirectional, InputLayer, Activation, Flatten, Masking
from tensorflow.keras.optimizers import Adam, schedules

from tf2crf import CRF as crf6
from mwrapper import ModelWithCRFLoss, ModelWithCRFLossDSCLoss
from utils import build_matrix_embeddings as bme, plot_model_performance, logits_to_tokens, report_to_df2

from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import TensorBoard

from IPython.core.display import display, HTML

import datetime, os
import random

In [2]:
SEED = 42
os.environ['PYTHONHASHSEED']=str(SEED)
os.environ['TF_CUDNN_DETERMINISTIC'] = '1'  # TF 2.1+
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

MODEL         = 'new-clinial-base-model-new-labels-name' 
logs_base_dir = "./logs"
log_dir       = logs_base_dir + "/" + MODEL
save_base_dir = './saved_model'
save_dir      = save_base_dir + "/" + MODEL

os.makedirs(logs_base_dir, exist_ok=True)
os.makedirs(log_dir,       exist_ok=True)
os.makedirs(save_base_dir, exist_ok=True)
os.makedirs(save_dir,      exist_ok=True)

#%load_ext tensorboard

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
            
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

2 Physical GPUs, 2 Logical GPUs


In [3]:
# ****** DEFINICION DE PARAMETROS *********
NUM_WORDS       = 14485 + 2 #13705 + 2
LEN_SENTS       = 306
NUM_TAGS        = 37 + 2


# ****** DEFINICION DE HIPERPARAMETROS *********
_EPOCHS         = 50
EMBED_DIM       = 300
_DROPOUT        = 0.5
_BACH_SIZE      = 512 + (256+128)
_LEARN_RATE     = 5e-3
NUM_FOLDS       = 5
VAL_SPLIT       = 0.2

prime_data_fold = []
total_data_fold = []

## Loading Vectors

In [5]:
word2idx = np.load('./vectors/word2index.npy', allow_pickle=True).item()
tag2idx  = np.load('./vectors/tag2index.npy', allow_pickle=True).item()
idx2tag  = np.load('./vectors/index2tag.npy', allow_pickle=True).item()

In [6]:
print(tag2idx)

{'B-BIOMARKER_STATUS': 2, 'B-CHEMOTHERAPY': 3, 'I-STAGE': 4, 'I-FREQ': 5, 'B-FAMILY': 6, 'I-IMPLICIT_DATE': 7, 'B-DRUG': 8, 'B-OCURRENCE_EVENT': 9, 'B-STAGE': 10, 'B-COMORBIDITY': 11, 'I-SURGERY': 12, 'I-TNM': 13, 'B-METRIC': 14, 'I-FAMILY': 15, 'I-OCURRENCE_EVENT': 16, 'B-EXPLICIT_DATE': 17, 'I-BIOMARKER': 18, 'B-CANCER_CONCEPT': 19, 'O': 20, 'B-IMPLICIT_DATE': 21, 'B-TNM': 22, 'B-RADIOTHERAPY': 23, 'I-TOXIC_HABIT': 24, 'B-BIOMARKER': 25, 'B-QUANTITY': 26, 'I-DRUG': 27, 'I-BIOMARKER_STATUS': 28, 'I-CANCER_CONCEPT': 29, 'B-TOXIC_HABIT': 30, 'I-CLINICAL_SERVICE': 31, 'B-SURGERY': 32, 'I-COMORBIDITY': 33, 'I-METRIC': 34, 'I-QUANTITY': 35, 'I-EXPLICIT_DATE': 36, 'B-CLINICAL_SERVICE': 37, 'B-FREQ': 38, '-PAD-': 0, '-OOV-': 1}


In [7]:
#Loading the model
print("\nLoading the model NER ...")
model = tf.saved_model.load(save_dir + "/")
print ("\nModel loaded ...")


Loading the model NER ...

Model loaded ...


In [8]:
#Note: Sentences must be previously tokenized (e.g NLTK or Spacy for spanish)
#The model receives a tokenized sentence and returns a negation or speculation label for each token.
#Next sentences have been proviously tokenized. 
#It is recommended to use  NLTK o r Spacy tokenizerfor spanish language.
#NER = Named Entity Recognition
NER_samples = [
    "CICLO 2 CARBOPLATINO / PACLITAXEL . ".split(),
    "En Agosto de 2015 ha recibido 3 ciclos de CISPLATINO / VINORELBINA buena tolerancia clinica .".split(),
    "QT : CISPLATINO 75 mg / m2 DIA 1 IV + VINORELBINA 25 mg / m2 IV DIAS 1,8 - Adenocarcinoma pulmon lobulo superior derecho ".split(),
    "El dia 27 de junio iniciamos tratamiento con quimioterapia segun esquema CARBOPLATINO / PEMETREXED .".split(),
    "CICLO 1 CARBOPLATINO AUC 5 - PEMETREXED 500 mg/m2 IV cada 21 dias .".split(),
    "RT con dosis 50 Gy, se encuentra bien . ".split(),
    "Carcinoma escamoso de pulmón cT3 cN2 cM0 (al menos estadio IIIB de TNM 8ª ed .".split(),
    "Diagnosticado en marzo de 2016 de Adenoca de pulmón cT2cN2cM1a .".split(),
    "Ha sido diagnosticada de cancer de pulmon en marzo de 2019 .".split(),
    "Inicia tratamiento con Cisplatino + Pemetrexed + Bevacizumab (5 ciclos administrados, ultimo en enero de 2014).".split(),
    "Carcinoma escamoso de pulmón intervenido en marzo 2017 .".split(),
    "En 2014, intervenido de carcinoma de pulmón pT2bN1cM0 realizandose nefrectomia derecha .".split(),
    "Carcinoma microcitico de pulmon t4n2m0 en tto quimioterapico: carboplatino / etoposido .".split(),
    "Paciente con Carcinoma de pulmón,  ROS1 negativo, ALK postivo, EFGR negativo. ".split(),
    "Biopsia hepatica 07/05/2019 : bajo control ecografico se localiza masa homogenea tenuemente hipoecogenica de 4,8 x 2,9 x 2,3 cm en segmento ii y se procede a realizar bag ecoguiada de la misma . ".split(),
    "Paciente fumador de 4 cigarrillos al dia".split(),
    "Se visualizan numerosas adenopatias mediastinicas".split()
]
    

In [9]:
NER_samples_X    = []

for s1 in NER_samples:
    s1_int = []
    for w in s1:
        try:
            s1_int.append(word2idx[w.lower()])
        except KeyError:
            s1_int.append(word2idx['-OOV-'])
    NER_samples_X.append(s1_int)

NER_samples_X = pad_sequences(NER_samples_X, maxlen=LEN_SENTS, padding='post')

print("Negation: \n", NER_samples_X)

Negation: 
 [[ 9543  7740  9516 ...     0     0     0]
 [12211 11767   328 ...     0     0     0]
 [14296 12691 14451 ...     0     0     0]
 ...
 [ 6052  1325 14168 ...     0     0     0]
 [ 8920  8125   328 ...     0     0     0]
 [ 8965  9493    77 ...     0     0     0]]


In [10]:
predictions1 = model(NER_samples_X).numpy()

In [11]:
log_tokens1 = logits_to_tokens(predictions1, idx2tag)
print ("Sentence " , NER_samples_X[0])
print("Negation: \n", log_tokens1[0])

Sentence  [ 9543  7740  9516  5858  2343 11138     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     

In [12]:
for h, oracc in enumerate(NER_samples):
    heads = oracc
    body  = [log_tokens1[h][:len(oracc)]]
    display(HTML("<div style='overflow-x: auto; white-space: nowrap;'>" + 
                 tabulate(body, headers=heads, tablefmt="html") + 
                 "</div>"))

CICLO,2,CARBOPLATINO,/,PACLITAXEL,.
B-METRIC,B-QUANTITY,B-DRUG,O,B-DRUG,O


En,Agosto,de,2015,ha,recibido,3,ciclos,de.1,CISPLATINO,/,VINORELBINA,buena,tolerancia,clinica,.
O,B-EXPLICIT_DATE,I-EXPLICIT_DATE,I-EXPLICIT_DATE,O,O,B-QUANTITY,B-METRIC,O,B-DRUG,O,B-DRUG,O,O,O,O


QT,:,CISPLATINO,75,mg,/,m2,DIA,1,IV,+,VINORELBINA,25,mg.1,/.1,m2.1,IV.1,DIAS,"1,8",-,Adenocarcinoma,pulmon,lobulo,superior,derecho
O,O,B-DRUG,B-QUANTITY,B-METRIC,I-METRIC,I-METRIC,B-METRIC,B-QUANTITY,O,O,B-DRUG,B-QUANTITY,B-METRIC,I-METRIC,I-METRIC,O,O,B-QUANTITY,O,B-CANCER_CONCEPT,I-CANCER_CONCEPT,I-CANCER_CONCEPT,I-CANCER_CONCEPT,I-CANCER_CONCEPT


El,dia,27,de,junio,iniciamos,tratamiento,con,quimioterapia,segun,esquema,CARBOPLATINO,/,PEMETREXED,.
O,O,B-EXPLICIT_DATE,I-EXPLICIT_DATE,I-EXPLICIT_DATE,B-OCURRENCE_EVENT,I-OCURRENCE_EVENT,O,B-CHEMOTHERAPY,O,O,B-DRUG,O,B-DRUG,O


CICLO,1,CARBOPLATINO,AUC,5,-,PEMETREXED,500,mg/m2,IV,cada,21,dias,.
B-METRIC,B-QUANTITY,B-DRUG,B-METRIC,B-QUANTITY,O,B-DRUG,B-QUANTITY,B-METRIC,O,B-FREQ,I-FREQ,I-FREQ,O


RT,con,dosis,50,"Gy,",se,encuentra,bien,.
O,O,O,B-QUANTITY,B-METRIC,O,O,O,O


Carcinoma,escamoso,de,pulmón,cT3,cN2,cM0,(al,menos,estadio,IIIB,de.1,TNM,8ª,ed,.
B-CANCER_CONCEPT,I-CANCER_CONCEPT,I-CANCER_CONCEPT,I-CANCER_CONCEPT,B-TNM,I-TNM,I-TNM,I-TNM,O,B-STAGE,I-STAGE,O,O,B-QUANTITY,B-METRIC,O


Diagnosticado,en,marzo,de,2016,de.1,Adenoca,de.2,pulmón,cT2cN2cM1a,.
B-OCURRENCE_EVENT,O,B-EXPLICIT_DATE,I-EXPLICIT_DATE,I-EXPLICIT_DATE,O,B-CANCER_CONCEPT,I-CANCER_CONCEPT,I-CANCER_CONCEPT,B-TNM,O


Ha,sido,diagnosticada,de,cancer,de.1,pulmon,en,marzo,de.2,2019,.
O,O,B-OCURRENCE_EVENT,O,B-CANCER_CONCEPT,I-CANCER_CONCEPT,I-CANCER_CONCEPT,O,B-EXPLICIT_DATE,I-EXPLICIT_DATE,I-EXPLICIT_DATE,O


Inicia,tratamiento,con,Cisplatino,+,Pemetrexed,+.1,Bevacizumab,(5,ciclos,"administrados,",ultimo,en,enero,de,2014).
B-OCURRENCE_EVENT,I-OCURRENCE_EVENT,O,B-DRUG,O,B-DRUG,O,B-DRUG,B-QUANTITY,B-METRIC,O,B-OCURRENCE_EVENT,O,B-EXPLICIT_DATE,I-EXPLICIT_DATE,-PAD-


Carcinoma,escamoso,de,pulmón,intervenido,en,marzo,2017,.
B-CANCER_CONCEPT,I-CANCER_CONCEPT,I-CANCER_CONCEPT,I-CANCER_CONCEPT,B-OCURRENCE_EVENT,O,B-EXPLICIT_DATE,I-EXPLICIT_DATE,O


En,"2014,",intervenido,de,carcinoma,de.1,pulmón,pT2bN1cM0,realizandose,nefrectomia,derecha,.
O,B-EXPLICIT_DATE,B-OCURRENCE_EVENT,O,B-CANCER_CONCEPT,I-CANCER_CONCEPT,I-CANCER_CONCEPT,B-TNM,O,B-SURGERY,I-SURGERY,O


Carcinoma,microcitico,de,pulmon,t4n2m0,en,tto,quimioterapico:,carboplatino,/,etoposido,.
B-CANCER_CONCEPT,I-CANCER_CONCEPT,I-CANCER_CONCEPT,I-CANCER_CONCEPT,B-TNM,O,O,O,B-DRUG,O,B-DRUG,O


Paciente,con,Carcinoma,de,"pulmón,",ROS1,"negativo,",ALK,"postivo,",EFGR,negativo.
O,O,B-CANCER_CONCEPT,I-CANCER_CONCEPT,I-CANCER_CONCEPT,B-TNM,I-TNM,B-BIOMARKER,-PAD-,-PAD-,-PAD-


Biopsia,hepatica,07/05/2019,:,bajo,control,ecografico,se,localiza,masa,homogenea,tenuemente,hipoecogenica,de,"4,8",x,"2,9",x.1,"2,3",cm,en,segmento,ii,y,se.1,procede,a,realizar,bag,ecoguiada,de.1,la,misma,.
O,O,O,O,O,O,O,O,O,O,O,O,O,O,B-QUANTITY,I-QUANTITY,I-QUANTITY,I-QUANTITY,I-QUANTITY,B-METRIC,O,O,O,O,O,O,O,O,O,O,O,O,O,O


Paciente,fumador,de,4,cigarrillos,al,dia
O,B-TOXIC_HABIT,O,B-QUANTITY,B-METRIC,O,B-FREQ


Se,visualizan,numerosas,adenopatias,mediastinicas
O,O,O,B-COMORBIDITY,O
