# Library

In [2]:
import pandas as pd
import numpy as np
import os
import re
import collections
#from gensim.parsing.preprocessing import remove_stopwords
import nltk
from nltk.corpus import stopwords
import itertools 
from nltk.tokenize import word_tokenize
from string import punctuation

from nltk import ngrams

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer

# Functions

In [3]:
def read_texts(path):
    data = []
    file_name = os.listdir(path)

    for name in file_name:
        if name.endswith('.txt'):
            with open(path + name,encoding="utf8") as f:
                text = f.read()
                data.append({'nombre':name.replace('.txt',''), 'texto':text})

    df = pd.DataFrame(data)
    return df

# Load Data

### Testing

220 clinical cases.

In [3]:
testing_abbr = pd.read_csv("../datasets/testing_set/clinical_cases.abbreviations.testing_set.tsv", sep = '\t')
testing_met = pd.read_csv("../datasets/testing_set/clinical_cases.metadata.testing_set.tsv", sep = '\t')
testing_rel = pd.read_csv("../datasets/testing_set/clinical_cases.relations.testing_set.tsv", sep = '\t')

In [4]:
testing_met.head()

Unnamed: 0,# Document_ID,Case_ID,ISSN,Date,Source,Full_Text_Link
0,S1130-01082006000700014-1.txt,1.txt,1130-0108,2006-07-01,Revista Española de Enfermedades Digestivas v...,http://scielo.isciii.es/scielo.php?script=sci_...
1,S1130-01082007000300006-7.txt,7.txt,1130-0108,2007-03-01,Revista Española de Enfermedades Digestivas v...,http://scielo.isciii.es/scielo.php?script=sci_...
2,S1134-80462009000100005-1.txt,1.txt,1134-8046,2009-02-01,Revista de la Sociedad Española del Dolor v.1...,http://scielo.isciii.es/scielo.php?script=sci_...
3,S1137-66272014000300015-1.txt,1.txt,1137-6627,2014-12-01,Anales del Sistema Sanitario de Navarra v.37 ...,http://scielo.isciii.es/scielo.php?script=sci_...
4,S0365-66912004001200011-1.txt,1.txt,0365-6691,2004-12-01,Archivos de la Sociedad Española de Oftalmolog...,http://scielo.isciii.es/scielo.php?script=sci_...


In [5]:
testing_rel = testing_rel.reset_index()

In [6]:
testing_rel.columns = ['# Document_ID', 'Mention_A_type', 'Mention_A_StartOffset',
      'Mention_A', 'Relation_type', 'Mention_B_type',
       'Mention_B_StartOffset', 'Mention_B_EndOffset', 'Mention_B']

In [7]:
testing_rel = testing_rel.rename(columns = {'# Document_ID': 'doc_id'})

In [8]:
testing_rel.head()

Unnamed: 0,doc_id,Mention_A_type,Mention_A_StartOffset,Mention_A,Relation_type,Mention_B_type,Mention_B_StartOffset,Mention_B_EndOffset,Mention_B
0,S0211-69952013000500019-1,SHORT_FORM,3739,ARA II,SHORT-LONG,LONG_FORM,3695,3737,antagonista del receptor de angiotesina II
1,S0211-69952013000500019-1,SHORT_FORM,2793,PCR,SHORT-LONG,LONG_FORM,2798,2837,reacción en cadena de enzima polimerasa
2,S0365-66912004000600008-1,SHORT_FORM,406,AV,SHORT-LONG,LONG_FORM,390,404,agudeza visual
3,S0211-69952012000500025-1,SHORT_FORM,945,angio-TAC,SHORT-LONG,LONG_FORM,908,943,angiotomografía computarizada axial
4,S1130-05582017000200122-1,SHORT_FORM,940,RMN,SHORT-LONG,LONG_FORM,910,938,resonancia magnética nuclear


In [9]:
testing_rel.Relation_type.unique()

array(['SHORT-LONG', 'SHORT-NESTED', 'NESTED-LONG'], dtype=object)

In [10]:
testing_abbr = testing_abbr.rename(columns = {'# Document_ID': 'doc_id'})

In [11]:
testing_abbr.head()

Unnamed: 0,doc_id,StartOffset,EndOffset,Abbreviation,Definition,Definition_lemmatized
0,S0004-06142010000500014-1,2037,2044,16SrRNA,16s ribosomal rna,16s ribosomal rno
1,S0004-06142010000500014-1,1349,1351,M.,mycobacterium,mycobacterium
2,S0004-06142010000500014-1,1339,1342,PCR,polymerase chain reaction,polymerase chain reaction
3,S0004-06142010000500014-1,611,615,BHCG,beta-human chorionic gonadotropin,beta-humar chorionic gonadotropin
4,S0004-06142010000500014-1,594,597,CEA,carcinoembrionary antigen,carcinoembrionary antiger


In [12]:
testing_raw = read_texts("../datasets/testing_set/testing_set.raw_text/")

In [13]:
testing_raw.head()

Unnamed: 0,nombre,texto
0,S1139-76322009000700016-1,Paciente de sexo femenino de 13 años y 7 meses...
1,S0210-48062007000700015-1,Varón de 72 años con antecedentes personales d...
2,S0212-71992005000500009-1,"Varón de 81 años, con antecedentes de fibrilac..."
3,S0365-66912004001200011-1,Paciente varón de 52 años que acudió a urgenci...
4,S1130-01082009000900015-1,Varón de 54 años con episodios de pancreatitis...


### Backgound

2879 clinical cases. 220 will be use for clinical evaluation

In [14]:
back_met = pd.read_csv("../datasets/background_test/clinical_cases.metadata.background_set.tsv", sep = '\t')

In [15]:
back_met = back_met.rename(columns = {'# Document_ID': 'doc_id'})

In [16]:
back_met.head()

Unnamed: 0,doc_id,Case_ID,ISSN,Date,Source,Full_Text_Link
0,S0325-00752010000100014.txt,1.txt,0325-0075,02/2010,Archivos argentinos de pediatría,http://www.scielo.org.ar/scielo.php?script=sci...
1,S0325-00752013000200014.txt,1.txt,0325-0075,04/2013,Archivos argentinos de pediatría,http://www.scielo.org.ar/scielo.php?script=sci...
2,S0325-00752011000400017.txt,1.txt,0325-0075,08/2011,Archivos argentinos de pediatría,http://www.scielo.org.ar/scielo.php?script=sci...
3,S0325-00752013000600022.txt,1.txt,0325-0075,12/2013,Archivos argentinos de pediatría,http://www.scielo.org.ar/scielo.php?script=sci...
4,S0325-00752008000500013.txt,1.txt,0325-0075,10/2008,Archivos argentinos de pediatría,http://www.scielo.org.ar/scielo.php?script=sci...


In [17]:
back_raw = read_texts("../datasets/background_test/background_test_set/")

In [18]:
back_raw.head()

Unnamed: 0,nombre,texto
0,S0212-71992004000800006-1,Mujer de 57 años de edad con antecedentes pers...
1,S0716-10182014000100007-1,Se recibió en el Hospital Veterinario Puente A...
2,S1130-01082009000200016-1,"Mujer de 47 años de edad, sin antecedentes de ..."
3,S0716-10182015000400016-1,"Escolar de 11 años de edad, género femenino, s..."
4,S1130-01082004001200010-2,"Varón de 25 años, remitido a nuestro Servicio ..."


### Development

146 clinical cases

In [19]:
dev_abbr = pd.read_csv("../datasets/development_set/clinical_cases.abbreviations.development_set.tsv", sep = '\t')
dev_met = pd.read_csv("../datasets/development_set/clinical_cases.metadata.development_set.tsv", sep = '\t')
dev_rel = pd.read_csv("../datasets/development_set/clinical_cases.relations.development_set.tsv", sep = '\t')

In [20]:
dev_met = dev_met.rename(columns = {'# Document_ID': 'doc_id'})

In [21]:
dev_met.head()

Unnamed: 0,doc_id,Case_ID,ISSN,Date,Source,Full_Text_Link
0,S1130-14732005000300004-1.txt,1.txt,1130-1473,2005-06-01,Neurocirugía v.16 n.3 2005,http://scielo.isciii.es/scielo.php?script=sci_...
1,S1130-01082008000200009-1.txt,1.txt,1130-0108,2008-02-01,Revista Española de Enfermedades Digestivas v...,http://scielo.isciii.es/scielo.php?script=sci_...
2,S1137-66272012000300021-1.txt,1.txt,1137-6627,2012-12-01,Anales del Sistema Sanitario de Navarra v.35 ...,http://scielo.isciii.es/scielo.php?script=sci_...
3,S1699-695X2016000200010-1.txt,1.txt,1699-695X,2016-06-01,Revista Clínica de Medicina de Familia v.9 n....,http://scielo.isciii.es/scielo.php?script=sci_...
4,S1130-01082007001100009-1.txt,1.txt,1130-0108,2007-11-01,Revista Española de Enfermedades Digestivas v...,http://scielo.isciii.es/scielo.php?script=sci_...


In [22]:
dev_rel = dev_rel.reset_index()

In [23]:
dev_rel.columns = ['# Document_ID', 'Mention_A_type', 'Mention_A_StartOffset',
      'Mention_A', 'Relation_type', 'Mention_B_type',
       'Mention_B_StartOffset', 'Mention_B_EndOffset', 'Mention_B']

In [24]:
dev_rel = dev_rel.rename(columns = {'# Document_ID': 'doc_id'})

In [25]:
dev_rel.head()

Unnamed: 0,doc_id,Mention_A_type,Mention_A_StartOffset,Mention_A,Relation_type,Mention_B_type,Mention_B_StartOffset,Mention_B_EndOffset,Mention_B
0,S1888-75462015000400006-1,SHORT_FORM,1436,AP,SHORT-LONG,LONG_FORM,1419,1434,anteroposterior
1,S0210-56912009000700006-2,SHORT_FORM,876,angio-RMN,SHORT-LONG,LONG_FORM,858,874,angiorresonancia
2,S0210-56912009000700006-2,SHORT_FORM,819,RMN,SHORT-LONG,LONG_FORM,789,817,resonancia magnética nuclear
3,S1698-44472005000300015-1,SHORT_FORM,209,ATM,SHORT-LONG,LONG_FORM,177,207,articulación témporomandibular
4,S1698-44472005000300015-1,SHORT_FORM,1125,TC,SHORT-LONG,LONG_FORM,1099,1123,tomografía computerizada


In [26]:
dev_abbr = dev_abbr.rename(columns = {'# Document_ID': 'doc_id'})

In [27]:
dev_abbr.head()

Unnamed: 0,doc_id,StartOffset,EndOffset,Abbreviation,Definition,Definition_lemmatized
0,S1130-14732005000300004-1,1216,1218,C3,tercera vértebra cervical,tercero vértebra cervical
1,S1130-14732005000300004-1,717,719,C2,segunda vértebra cervical,segundo vértebra cervical
2,S1130-14732005000300004-1,3191,3193,C3,tercera vértebra cervical,tercero vértebra cervical
3,S1130-14732005000300004-1,2867,2869,C3,tercera vértebra cervical,tercero vértebra cervical
4,S1130-14732005000300004-1,2862,2864,C2,segunda vértebra cervical,segundo vértebra cervical


In [28]:
dev_raw = read_texts("../datasets/development_set/development_set.raw_text/")

In [29]:
dev_raw.head()

Unnamed: 0,nombre,texto
0,S1139-76322017000200010-1,Niña de dos años y diez meses con antecedentes...
1,S0365-66912005001100008-1,Se presenta el caso de un varón de 45 años que...
2,S1130-01082006001000017-1,Mujer de 42 años de edad con antecedentes pers...
3,S0212-71992005001200008-1,Paciente de 57 años con ingresos hospitalarios...
4,S0365-66912007000300010-1,Paciente de 33 años que el 20-08-05 es traslad...


### Sample test

15 clinical cases

In [30]:
sample_abbr = pd.read_csv("../datasets/sample_set/clinical_cases.abbreviations.sample_set.tsv", sep = '\t')
sample_met = pd.read_csv("../datasets/sample_set/clinical_cases.metadata.sample_set.tsv", sep = '\t')
sample_rel = pd.read_csv("../datasets/sample_set/clinical_cases.relations.sample_set.tsv", sep = '\t')

In [31]:
sample_met = sample_met.rename(columns = {'# Document_ID': 'doc_id'})

In [32]:
sample_met.head()

Unnamed: 0,doc_id,Case_ID,ISSN,Date,Source,Full_Text_Link
0,S0004-06142006000900015-1,1,0004-0614,2006-11-01,Archivos Españoles de Urología (Ed. impresa) ...,http://scielo.isciii.es/scielo.php?script=sci_...
1,S0004-06142006000600015-1,1,0004-0614,2006-08-01,Archivos Españoles de Urología (Ed. impresa) ...,http://scielo.isciii.es/scielo.php?script=sci_...
2,S0004-06142007000700014-1,1,0004-0614,2007-09-01,Archivos Españoles de Urología (Ed. impresa) ...,http://scielo.isciii.es/scielo.php?script=sci_...
3,S0004-06142007000900013-1,1,0004-0614,2007-11-01,Archivos Españoles de Urología (Ed. impresa) ...,http://scielo.isciii.es/scielo.php?script=sci_...
4,S0004-06142006000200014-1,1,0004-0614,2006-03-01,Archivos Españoles de Urología (Ed. impresa) ...,http://scielo.isciii.es/scielo.php?script=sci_...


In [33]:
sample_rel = sample_rel.rename(columns = {'# Document_ID': 'doc_id'})

In [34]:
sample_rel.head()

Unnamed: 0,doc_id,Mention_A_type,Mention_A_StartOffset,Mention_A_EndOffset,Mention_A,Relation_type,Mention_B_type,Mention_B_StartOffset,Mention_B_EndOffset,Mention_B
0,S0004-06142006000700014-1,SHORT_FORM,926,929,CEA,SHORT-LONG,LONG_FORM,896,924,Antígeno Carcino Embrionario
1,S0004-06142005001000011-1,SHORT_FORM,1626,1629,ROT,SHORT-LONG,LONG_FORM,1600,1624,reflejos osteotendinosos
2,S0004-06142005001000011-1,SHORT_FORM,1715,1718,RMN,SHORT-LONG,LONG_FORM,1685,1713,resonancia magnética nuclear
3,S0004-06142005001000011-1,SHORT_FORM,1663,1666,RCP,SHORT-LONG,LONG_FORM,1639,1661,reflejo cutaneoplantar
4,S0004-06142005001000011-1,SHORT_FORM,1808,1811,LCR,SHORT-LONG,LONG_FORM,1783,1806,líquido cefalorraquídeo


In [35]:
sample_abbr = sample_abbr.rename(columns = {'# Document_ID': 'doc_id'})

In [36]:
sample_abbr.head()

Unnamed: 0,doc_id,StartOffset,EndOffset,Abbreviation,Definition,Definition_lemmatized
0,S0004-06142005001000011-1,1034,1036,Kg,kilogramo,kilogramo
1,S0004-06142005001000011-1,1031,1033,mg,miligramo,miligramo
2,S0004-06142005001000011-1,196,199,IgA,inmunoglobulina a,inmunoglobulina a
3,S0004-06142005001000011-1,2057,2060,LCR,líquido cefalorraquídeo,líquido cefalorraquídeo
4,S0004-06142005001000011-1,1594,1598,EEII,extremidades inferiores,extremidad inferior


In [37]:
sample_raw = read_texts("../datasets/sample_set/sample_set.raw_text/")

In [38]:
sample_raw.head()

Unnamed: 0,nombre,texto
0,S0004-06142006000600014-1,"Paciente varón, de 40 años de edad, con antece..."
1,S0004-06142006000300015-1,Paciente de 50 años con antecedente de litiasi...
2,S0004-06142007000900013-1,Presentamos el caso de un recién nacido de tre...
3,S0004-06142005001000011-1,Varón de 58 años de edad en el momento del tra...
4,S0004-06142006000700013-1,"Paciente varón, de 63 años de edad, mestizo, d..."


### Trainning

318 clinical cases

In [9]:
train_abbr = pd.read_csv("../datasets/trainning_set/clinical_cases.abbreviations.training_set.tsv", sep = '\t')
train_met = pd.read_csv("../datasets/trainning_set/clinical_cases.metadata.training_set.tsv", sep = '\t')
train_rel = pd.read_csv("../datasets/trainning_set/clinical_cases.relations.training_set.tsv", sep = '\t')

In [40]:
train_met = train_met.rename(columns = {'# Document_ID': 'doc_id'})

In [41]:
train_met.head()

Unnamed: 0,doc_id,Case_ID,ISSN,Date,Source,Full_Text_Link
0,S1139-76322015000500009-1.txt,1.txt,1139-7632,2015-12-01,Pediatría Atención Primaria v.17 n.68 2015,http://scielo.isciii.es/scielo.php?script=sci_...
1,S1130-05582008000400007-2.txt,2.txt,1130-0558,2008-08-01,Revista Española de Cirugía Oral y Maxilofacia...,http://scielo.isciii.es/scielo.php?script=sci_...
2,S0210-48062006000100012-1.txt,1.txt,0210-4806,2006-01-01,Actas Urológicas Españolas v.30 n.1 2006,http://scielo.isciii.es/scielo.php?script=sci_...
3,S0213-12852003000500002-1.txt,1.txt,0213-1285,2003-10-01,Avances en Odontoestomatología v.19 n.5 2003,http://scielo.isciii.es/scielo.php?script=sci_...
4,S0212-71992005000400007-1.txt,1.txt,0212-7199,2005-04-01,Anales de Medicina Interna v.22 n.4 2005,http://scielo.isciii.es/scielo.php?script=sci_...


In [42]:
train_rel = train_rel.reset_index()

In [43]:
train_rel.columns = ['# Document_ID', 'Mention_A_type', 'Mention_A_StartOffset',
      'Mention_A', 'Relation_type', 'Mention_B_type',
       'Mention_B_StartOffset', 'Mention_B_EndOffset', 'Mention_B']

In [44]:
train_rel = train_rel.rename(columns = {'# Document_ID': 'doc_id'})

In [45]:
train_rel.head()

Unnamed: 0,doc_id,Mention_A_type,Mention_A_StartOffset,Mention_A,Relation_type,Mention_B_type,Mention_B_StartOffset,Mention_B_EndOffset,Mention_B
0,S1130-01082009000400014-1,SHORT_FORM,476,NPT,SHORT-LONG,LONG_FORM,454.0,474.0,nutrición parenteral
1,S1130-63432016000100009-1,SHORT_FORM,614,NIHSS,SHORT-LONG,LONG_FORM,621.0,662.0,National Institute of Health Stroke Scale
2,S1139-76322017000200007-1,SHORT_FORM,1145,CMV,SHORT-LONG,LONG_FORM,1128.0,1143.0,citomegalovirus
3,S1139-76322017000200007-1,SHORT_FORM,1243,VSG,SHORT-LONG,LONG_FORM,1206.0,1241.0,velocidad de sedimentación globular
4,S1139-76322017000200007-1,SHORT_FORM,1300,IGRA,SHORT-LONG,LONG_FORM,1267.0,1298.0,interferon-gamma release assays


In [46]:
train_abbr = train_abbr.rename(columns = {'# Document_ID': 'doc_id'})

In [47]:
train_abbr.Definition.nunique()

908

In [4]:
train_raw = read_texts("../datasets/trainning_set/training_set.raw_text/")

In [5]:
train_raw = train_raw.rename(columns = {'nombre': 'doc_id'})

In [6]:
train_raw.head()

Unnamed: 0,doc_id,texto
0,S1130-05582012000300005-1,Acude a nuestras consultas a un paciente que p...
1,S0212-71992005000400009-1,"Se trataba de un varón de 27 años de edad, que..."
2,S0004-06142008000700015-2,Varón de 33 años fumador de un paquete de ciga...
3,S0210-56912006000800008-1,"Hombre de 42 años, bebedor de más de 100 g de ..."
4,S0376-78922009000300010-1,Paciente de 18 años de edad que 5 meses antes ...


## Clean text

In [7]:
import itertools
import pandas as pd
import numpy as np
import re
import os
from tqdm import tqdm

# Drawing the embeddings
import matplotlib.pyplot as plt

# Deep learning:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input,Dense

from scipy import sparse

Pasamos los textos a diccionario o los almacenamos en una lsita

In [8]:
# data = dict(zip(train_raw.doc_id, train_raw.texto))
data  = train_raw['texto'].tolist()

In [9]:
len(data)

318

In [9]:
#nltk.download('stopwords')
swords = list(set(stopwords.words('spanish')))

In [10]:
def clean_text(string):
    """
    A method to clean text 
    """
    
    # Removing the punctuations
    for x in string.lower(): 
        if x in punctuation: 
            string = string.replace(x, "") 

#     # Converting the text to lower
#     string = string.lower()

    # Removing stop words
    string = ' '.join([word for word in string.split() if word not in swords])

    # Cleaning the whitespaces
    string = re.sub(r'\s+', ' ', string).strip()

    return string 

In [11]:
# Defining the window for context
window = 3

# Creating a placeholder for the scanning of the word list
word_lists = []
all_text = []

for text in data:

    # Cleaning the text
    text = clean_text(text)

    # Appending to the all text list
    all_text.append(text) 

    # Creating a context dictionary
    for i, word in enumerate(word_tokenize(text)):
        for w in range(window):
            # Getting the context that is ahead by *window* words
            if i + 1 + w < len(word_tokenize(text)): 
                word_lists.append([word] + [word_tokenize(text)[(i + 1 + w)]])
            # Getting the context that is behind by *window* words    
            if i - w - 1 >= 0:
                word_lists.append([word] + [word_tokenize(text)[(i - w - 1)]])

In [12]:
def create_unique_word_dict(text):
    """
    A method that creates a dictionary where the keys are unique words
    and key values are indices
    """
    # Getting all the unique words from our text and sorting them alphabetically
    words = list(set(word_tokenize(text)))
    words.sort()

    # Creating the dictionary for the unique words
    unique_word_dict = {}
    for i, word in enumerate(words):
        unique_word_dict.update({
            word: i
        })

    return unique_word_dict 

In [13]:
unique_word_dict = create_unique_word_dict(text)

Ahora lo hacemos con redes neuronales

In [16]:
from scipy import sparse
import numpy as np

# Defining the number of features (unique words)
n_words = len(unique_word_dict)

# Getting all the unique words 
words = list(unique_word_dict.keys())

# Creating the X and Y matrices using one hot encoding
X = []
Y = []

for i, word_list in tqdm(enumerate(word_lists)):
    # Getting the indices
    main_word_index = unique_word_dict.get(word_list[0])
    context_word_index = unique_word_dict.get(word_list[1])

    # Creating the placeholders   
    X_row = np.zeros(n_words)
    Y_row = np.zeros(n_words)

    # One hot encoding the main word
    X_row[main_word_index] = 1

    # One hot encoding the Y matrix words 
    Y_row[context_word_index] = 1

    # Appending to the main matrices
    X.append(X_row)
    Y.append(Y_row)

# Converting the matrices into an array
X = np.asarray(X)
Y = np.asarray(Y)

412356it [00:02, 180306.06it/s]


In [None]:
# Defining the size of the embedding
embed_size = 2

# Defining the neural network
inp = Input(shape=(X.shape[1],))
x = Dense(units=embed_size, activation='linear')(inp)
x = Dense(units=Y.shape[1], activation='softmax')(x)
model = Model(inputs=inp, outputs=x)
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam')

# Optimizing the network weights
model.fit(
    x=X, 
    y=Y, 
    batch_size=256,
    epochs=1000
    )

# Obtaining the weights from the neural network. 
# These are the so called word embeddings

# The input layer 
weights = model.get_weights()[0]

# Creating a dictionary to store the embeddings in. The key is a unique word and 
# the value is the numeric vector
embedding_dict = {}
for word in words: 
    embedding_dict.update({
        word: weights[unique_word_dict.get(word)]
        })

In [141]:
embedding_dict

{'13800': array([ 20.779772, -21.137363], dtype=float32),
 '18': array([ 20.80568 , -20.965282], dtype=float32),
 '2': array([ 20.898937, -21.040941], dtype=float32),
 '3': array([ 20.945713, -21.048672], dtype=float32),
 '30': array([ 20.963013, -20.828737], dtype=float32),
 '4': array([ 20.963993, -20.911514], dtype=float32),
 '5': array([ 20.817879, -20.929903], dtype=float32),
 '7': array([ 21.00971 , -20.880281], dtype=float32),
 'A': array([ 21.103846, -21.04492 ], dtype=float32),
 'En': array([ 20.899458, -20.949558], dtype=float32),
 'Gran': array([ 20.834793, -20.856009], dtype=float32),
 'Había': array([ 20.946804, -20.923315], dtype=float32),
 'La': array([ 20.92101, -21.03038], dtype=float32),
 'Paciente': array([ 20.90998 , -20.898355], dtype=float32),
 'Presentaba': array([ 20.782146, -20.863832], dtype=float32),
 'Se': array([ 21.025097, -20.94296 ], dtype=float32),
 'Servicio': array([ 20.799217, -20.971178], dtype=float32),
 'Tsur': array([ 21.017084, -20.803314], dtyp

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(15, 15))
for word in list(unique_word_dict.keys()):
  coord = embedding_dict.get(word)
  plt.scatter(coord[0], coord[1])
  plt.annotate(word, (coord[0], coord[1]))