In [1]:
import nltk

In [2]:
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag

In [3]:
ex = 'European authorities fined Google a record $5.1 billion on Wednesday for abusing its power in the mobile phone market and ordered the company to alter its practices'

In [4]:
def preprocess(sent):
    sent = nltk.word_tokenize(sent)
    sent = nltk.pos_tag(sent)
    return sent

In [5]:
sent = preprocess(ex)

In [6]:
sent

[('European', 'JJ'),
 ('authorities', 'NNS'),
 ('fined', 'VBD'),
 ('Google', 'NNP'),
 ('a', 'DT'),
 ('record', 'NN'),
 ('$', '$'),
 ('5.1', 'CD'),
 ('billion', 'CD'),
 ('on', 'IN'),
 ('Wednesday', 'NNP'),
 ('for', 'IN'),
 ('abusing', 'VBG'),
 ('its', 'PRP$'),
 ('power', 'NN'),
 ('in', 'IN'),
 ('the', 'DT'),
 ('mobile', 'JJ'),
 ('phone', 'NN'),
 ('market', 'NN'),
 ('and', 'CC'),
 ('ordered', 'VBD'),
 ('the', 'DT'),
 ('company', 'NN'),
 ('to', 'TO'),
 ('alter', 'VB'),
 ('its', 'PRP$'),
 ('practices', 'NNS')]

In [7]:
pattern = 'NP: {<DT>?<JJ>*<NN>}'

In [8]:
cp = nltk.RegexpParser(pattern)
cs = cp.parse(sent)
print(cs)

(S
  European/JJ
  authorities/NNS
  fined/VBD
  Google/NNP
  (NP a/DT record/NN)
  $/$
  5.1/CD
  billion/CD
  on/IN
  Wednesday/NNP
  for/IN
  abusing/VBG
  its/PRP$
  (NP power/NN)
  in/IN
  (NP the/DT mobile/JJ phone/NN)
  (NP market/NN)
  and/CC
  ordered/VBD
  (NP the/DT company/NN)
  to/TO
  alter/VB
  its/PRP$
  practices/NNS)


In [9]:
cs.draw()


In [10]:
from nltk.chunk import conlltags2tree, tree2conlltags
from pprint import pprint
iob_tagged = tree2conlltags(cs)
pprint(iob_tagged)


[('European', 'JJ', 'O'),
 ('authorities', 'NNS', 'O'),
 ('fined', 'VBD', 'O'),
 ('Google', 'NNP', 'O'),
 ('a', 'DT', 'B-NP'),
 ('record', 'NN', 'I-NP'),
 ('$', '$', 'O'),
 ('5.1', 'CD', 'O'),
 ('billion', 'CD', 'O'),
 ('on', 'IN', 'O'),
 ('Wednesday', 'NNP', 'O'),
 ('for', 'IN', 'O'),
 ('abusing', 'VBG', 'O'),
 ('its', 'PRP$', 'O'),
 ('power', 'NN', 'B-NP'),
 ('in', 'IN', 'O'),
 ('the', 'DT', 'B-NP'),
 ('mobile', 'JJ', 'I-NP'),
 ('phone', 'NN', 'I-NP'),
 ('market', 'NN', 'B-NP'),
 ('and', 'CC', 'O'),
 ('ordered', 'VBD', 'O'),
 ('the', 'DT', 'B-NP'),
 ('company', 'NN', 'I-NP'),
 ('to', 'TO', 'O'),
 ('alter', 'VB', 'O'),
 ('its', 'PRP$', 'O'),
 ('practices', 'NNS', 'O')]


In [12]:
ne_tree = nltk.ne_chunk(pos_tag(word_tokenize(ex)))
print(ne_tree)

(S
  (GPE European/JJ)
  authorities/NNS
  fined/VBD
  (PERSON Google/NNP)
  a/DT
  record/NN
  $/$
  5.1/CD
  billion/CD
  on/IN
  Wednesday/NNP
  for/IN
  abusing/VBG
  its/PRP$
  power/NN
  in/IN
  the/DT
  mobile/JJ
  phone/NN
  market/NN
  and/CC
  ordered/VBD
  the/DT
  company/NN
  to/TO
  alter/VB
  its/PRP$
  practices/NNS)


In [13]:
import spacy


ModuleNotFoundError: No module named 'spacy'

In [14]:
!pip install spacy

Collecting spacy
  Downloading spacy-3.0.3-cp38-cp38-win_amd64.whl (11.8 MB)
Collecting preshed<3.1.0,>=3.0.2
  Downloading preshed-3.0.5-cp38-cp38-win_amd64.whl (112 kB)
Collecting thinc<8.1.0,>=8.0.0
  Downloading thinc-8.0.1-cp38-cp38-win_amd64.whl (1.0 MB)
Collecting wasabi<1.1.0,>=0.8.1
  Using cached wasabi-0.8.2-py3-none-any.whl (23 kB)
Collecting srsly<3.0.0,>=2.4.0
  Downloading srsly-2.4.0-cp38-cp38-win_amd64.whl (451 kB)
Collecting pydantic<1.8.0,>=1.7.1
  Downloading pydantic-1.7.3-cp38-cp38-win_amd64.whl (1.8 MB)
Collecting typer<0.4.0,>=0.3.0
  Downloading typer-0.3.2-py3-none-any.whl (21 kB)
Collecting murmurhash<1.1.0,>=0.28.0
  Downloading murmurhash-1.0.5-cp38-cp38-win_amd64.whl (21 kB)
Collecting blis<0.8.0,>=0.4.0
  Downloading blis-0.7.4-cp38-cp38-win_amd64.whl (6.5 MB)
Collecting catalogue<2.1.0,>=2.0.1
  Downloading catalogue-2.0.1-py3-none-any.whl (9.6 kB)
Collecting pathy
  Downloading pathy-0.4.0-py3-none-any.whl (36 kB)
Collecting spacy-legacy<3.1.0,>=3.0.0
 

In [15]:
import spacy
from spacy import displacy
from collections import Counter
import en_core_web_sm
nlp = en_core_web_sm.load()

ModuleNotFoundError: No module named 'en_core_web_sm'

In [16]:
!pip install en_core_web_sm


ERROR: Could not find a version that satisfies the requirement en_core_web_sm (from versions: none)
ERROR: No matching distribution found for en_core_web_sm


In [17]:
!pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_md-2.0.0/en_core_web_md-2.0.0.tar.gz
    

Collecting https://github.com/explosion/spacy-models/releases/download/en_core_web_md-2.0.0/en_core_web_md-2.0.0.tar.gz
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_md-2.0.0/en_core_web_md-2.0.0.tar.gz (120.8 MB)
Building wheels for collected packages: en-core-web-md
  Building wheel for en-core-web-md (setup.py): started
  Building wheel for en-core-web-md (setup.py): finished with status 'done'
  Created wheel for en-core-web-md: filename=en_core_web_md-2.0.0-py3-none-any.whl size=122523225 sha256=f9b141b51a2f279b6afd16b8e63be2197784f28ffcc59cf508e9d8de0b6e13f4
  Stored in directory: c:\users\niraj\appdata\local\pip\cache\wheels\f2\1d\62\9eb147a5e9a1de1a8275822d6253f6f90fe33a48f3718e772b
Successfully built en-core-web-md
Installing collected packages: en-core-web-md
Successfully installed en-core-web-md-2.0.0


In [20]:
import spacy
from spacy import displacy
from collections import Counter
import en_core_web_sm

ModuleNotFoundError: No module named 'en_core_web_sm'

In [21]:
!pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.2.0/en_core_web_sm-2.2.0.tar.gz

Collecting https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.2.0/en_core_web_sm-2.2.0.tar.gz
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.2.0/en_core_web_sm-2.2.0.tar.gz (12.0 MB)
Building wheels for collected packages: en-core-web-sm
  Building wheel for en-core-web-sm (setup.py): started
  Building wheel for en-core-web-sm (setup.py): finished with status 'done'
  Created wheel for en-core-web-sm: filename=en_core_web_sm-2.2.0-py3-none-any.whl size=12019125 sha256=84b928d2e364324a718fb2ccf693d35740da09311e86c910b491b4a5d7cc1424
  Stored in directory: c:\users\niraj\appdata\local\pip\cache\wheels\fc\31\e9\092e6f05b2817c9cb45804a3d1bf2b9bf6575742c01819337c
Successfully built en-core-web-sm
Installing collected packages: en-core-web-sm
Successfully installed en-core-web-sm-2.2.0


In [22]:
import spacy
from spacy import displacy
from collections import Counter
import en_core_web_sm
nlp = en_core_web_sm.load()



OSError: [E053] Could not read config.cfg from C:\ProgramData\Anaconda3\lib\site-packages\en_core_web_sm\en_core_web_sm-2.2.0\config.cfg

In [23]:
import json

In [24]:
from urllib.request import urlopen
from urllib.parse import quote
import json


In [28]:
baseUrl = 'https://browser.ihtsdotools.org/snowstorm/snomed-ct'
edition = 'MAIN'
version = '2019-07-31'

#Prints fsn of a concept
def getConceptById(id):
    url = baseUrl + '/browser/' + edition + '/' + version + '/concepts/' + id
    response = urlopen(url).read()
    data = json.loads(response.decode('utf-8'))

    print (data['fsn']['term'])

#Prints description by id
def getDescriptionById(id):
    url = baseUrl + '/' + edition + '/' + version + '/descriptions/' + id
    response = urlopen(url).read()
    data = json.loads(response.decode('utf-8'))

    print (data['term'])

#Prints number of concepts with descriptions containing the search term
def getConceptsByString(searchTerm):
    url = baseUrl + '/browser/' + edition + '/' + version + '/concepts?term=' + quote(searchTerm) + '&activeFilter=true&offset=0&limit=50'
    response = urlopen(url).read()
    data = json.loads(response.decode('utf-8'))

    print (data['total'])

#Prints number of descriptions containing the search term with a specific semantic tag
def getDescriptionsByStringFromProcedure(searchTerm, semanticTag):
    url = baseUrl + '/browser/' + edition + '/' + version + '/descriptions?term=' + quote(searchTerm) + '&conceptActive=true&semanticTag=' + quote(semanticTag) + '&groupByConcept=false&searchMode=STANDARD&offset=0&limit=50'
    response = urlopen(url).read()
    data = json.loads(response.decode('utf-8'))

    print (data['totalElements'])

getConceptById('109152007')
getDescriptionById('679406011')
getConceptsByString('heart attack')
getDescriptionsByStringFromProcedure('heart', 'procedure')

Bilirubin test kit (physical object)
Methylphenyltetrahydropyridine (substance)
471023
864


In [26]:
baseUrl = 'https://browser.ihtsdotools.org/snowstorm/snomed-ct'
edition = 'MAIN'
version = '2019-07-31'

#Prints fsn of a concept
def getConceptById(id):
    url = baseUrl + '/browser/' + edition + '/' + version + '/concepts/' + id
    response = urlopen(url).read()
    data = json.loads(response.decode('utf-8'))
    print(data)
getConceptById('109152007')

{'conceptId': '109152007', 'fsn': {'term': 'Bilirubin test kit (physical object)', 'lang': 'en'}, 'pt': {'term': 'Bilirubin test kit', 'lang': 'en'}, 'active': True, 'effectiveTime': '20020131', 'released': True, 'releasedEffectiveTime': 20020131, 'moduleId': '900000000000207008', 'definitionStatus': 'PRIMITIVE', 'descriptions': [{'active': True, 'moduleId': '900000000000207008', 'released': True, 'releasedEffectiveTime': 20170731, 'descriptionId': '173687013', 'term': 'Bilirubin test kit', 'conceptId': '109152007', 'typeId': '900000000000013009', 'acceptabilityMap': {'900000000000509007': 'PREFERRED', '900000000000508004': 'PREFERRED'}, 'type': 'SYNONYM', 'lang': 'en', 'caseSignificance': 'CASE_INSENSITIVE', 'effectiveTime': '20170731'}, {'active': True, 'moduleId': '900000000000207008', 'released': True, 'releasedEffectiveTime': 20170731, 'descriptionId': '3324354018', 'term': 'Bilirubin test kit (physical object)', 'conceptId': '109152007', 'typeId': '900000000000003001', 'acceptabi

In [29]:
https://browser.ihtsdotools.org/snowstorm/snomed-ct/browser/MAIN/2019-07-31/concepts/109152007

SyntaxError: invalid syntax (<ipython-input-29-05ba2fd7a670>, line 1)

In [30]:
url='https://www.mtsamples.com/site/pages/browse.asp?type=89-Discharge%20Summary'
response = urlopen(url).read()
data = json.loads(response.decode('utf-8'))

HTTPError: HTTP Error 403: Forbidden

In [50]:
************************* NER MODEL *************************

SyntaxError: invalid syntax (<ipython-input-50-7f19940deee0>, line 1)

In [32]:
from spacy.lang.en import English

In [33]:
nlp = English()

In [42]:
text = """He determined to drop his litigation with the monastry, and relinguish his claims to the wood-cuting and fishery rights at once. He was the more ready to do this becuase the rights had become much less valuable, and he had 
indeed the vaguest idea where the wood and river in question were."""

In [45]:
my_doc = nlp(text)

In [46]:
token_list = []
for token in my_doc:
    token_list.append(token.text)

In [47]:
from spacy.lang.en.stop_words import STOP_WORDS

# Create list of word tokens after removing stopwords
filtered_sentence =[] 

for word in token_list:
    lexeme = nlp.vocab[word]
    if lexeme.is_stop == False:
        filtered_sentence.append(word) 
#print(token_list)
print(filtered_sentence)

['determined', 'drop', 'litigation', 'monastry', ',', 'relinguish', 'claims', 'wood', '-', 'cuting', 'fishery', 'rights', '.', 'ready', 'becuase', 'rights', 'valuable', ',', '\n', 'vaguest', 'idea', 'wood', 'river', 'question', '.']


In [None]:
************* NLTK *******************

In [54]:
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize 
import nltk
from nltk.stem import WordNetLemmatizer
set(stopwords.words('english'))

text = """He determined to drop his litigation with the monastry, and relinguish his claims to the wood-cuting and 
fishery rights at once. He was the more ready to do this becuase the rights had become much lesser valuable, and he had 
indeed the vaguest idea where the wood and river in question were."""

stop_words = set(stopwords.words('english')) 
  
word_tokens = word_tokenize(text) 
    
filtered_sentence = [] 
  
for w in word_tokens: 
    if w not in stop_words: 
        filtered_sentence.append(w) 
print(filtered_sentence) 

lemma_word = []
import nltk
from nltk.stem import WordNetLemmatizer
wordnet_lemmatizer = WordNetLemmatizer()
for w in filtered_sentence:
    word1 = wordnet_lemmatizer.lemmatize(w, pos = "n")
    word2 = wordnet_lemmatizer.lemmatize(word1, pos = "v")
    word3 = wordnet_lemmatizer.lemmatize(word2, pos = ("a"))
    lemma_word.append(word3)
print(lemma_word)

['He', 'determined', 'drop', 'litigation', 'monastry', ',', 'relinguish', 'claims', 'wood-cuting', 'fishery', 'rights', '.', 'He', 'ready', 'becuase', 'rights', 'become', 'much', 'lesser', 'valuable', ',', 'indeed', 'vaguest', 'idea', 'wood', 'river', 'question', '.']
['He', 'determine', 'drop', 'litigation', 'monastry', ',', 'relinguish', 'claim', 'wood-cuting', 'fishery', 'right', '.', 'He', 'ready', 'becuase', 'right', 'become', 'much', 'less', 'valuable', ',', 'indeed', 'vague', 'idea', 'wood', 'river', 'question', '.']


In [55]:
**************  Word2Vec ******************

SyntaxError: invalid syntax (<ipython-input-55-d9b1c18804c9>, line 1)

In [196]:
text = "hello hello hello hello hello hello hello hello hello hgllo"

# Note the .lower() as upper and lowercase does not matter in our implementation
# [['natural', 'language', 'processing', 'and', 'machine', 'learning', 'is', 'fun', 'and', 'exciting']]
corpus = [[word.lower() for word in text.split()]]
corpus

[['hello',
  'hello',
  'hello',
  'hello',
  'hello',
  'hello',
  'hello',
  'hello',
  'hello',
  'hgllo']]

In [197]:
settings = {'window_size':2,'n':10,'epochs': 50,'learning_rate':0.01}

In [198]:
from collections import defaultdict
import numpy as np

In [199]:
class word2vec():
  def __init__(self):
    self.n = settings['n']
    self.lr = settings['learning_rate']
    self.epochs = settings['epochs']
    self.window = settings['window_size']

  def generate_training_data(self, settings, corpus):
    # Find unique word counts using dictonary
    word_counts = defaultdict(int)
    for row in corpus:
      for word in row:
        word_counts[word] += 1
    ## How many unique words in vocab? 9
    self.v_count = len(word_counts.keys())
    # Generate Lookup Dictionaries (vocab)
    self.words_list = list(word_counts.keys())
    # Generate word:index
    self.word_index = dict((word, i) for i, word in enumerate(self.words_list))
    # Generate index:word
    self.index_word = dict((i, word) for i, word in enumerate(self.words_list))

    training_data = []
    # Cycle through each sentence in corpus
    for sentence in corpus:
      sent_len = len(sentence)
      # Cycle through each word in sentence
      for i, word in enumerate(sentence):
        # Convert target word to one-hot
        w_target = self.word2onehot(sentence[i])
        # Cycle through context window
        w_context = []
        # Note: window_size 2 will have range of 5 values
        for j in range(i - self.window, i + self.window+1):
          # Criteria for context word 
          # 1. Target word cannot be context word (j != i)
          # 2. Index must be greater or equal than 0 (j >= 0) - if not list index out of range
          # 3. Index must be less or equal than length of sentence (j <= sent_len-1) - if not list index out of range 
          if j != i and j <= sent_len-1 and j >= 0:
            # Append the one-hot representation of word to w_context
            w_context.append(self.word2onehot(sentence[j]))
            # print(sentence[i], sentence[j]) 
            # training_data contains a one-hot representation of the target word and context words
        training_data.append([w_target, w_context])
    return np.array(training_data)
  
  def word2onehot(self, word):
    # word_vec - initialise a blank vector
    word_vec = [0 for i in range(0, self.v_count)] # Alternative - np.zeros(self.v_count)
    # Get ID of word from word_index
    word_index = self.word_index[word]
    # Change value from 0 to 1 according to ID of the word
    word_vec[word_index] = 1
    return word_vec
  
  def train(self, training_data):
    self.w1 = np.random.uniform(-1, 1, (self.v_count, self.n))
    self.w2 = np.random.uniform(-1, 1, (self.n, self.v_count))
    
  def forward_pass(self, x):
    # x is one-hot vector for target word, shape - 9x1
    # Run through first matrix (w1) to get hidden layer - 10x9 dot 9x1 gives us 10x1
    h = np.dot(self.w1.T, x)
    # Dot product hidden layer with second matrix (w2) - 9x10 dot 10x1 gives us 9x1
    u = np.dot(self.w2.T, h)
    # Run 1x9 through softmax to force each element to range of [0, 1] - 1x8
    y_c = self.softmax(u)
    return y_c, h, u
  
  def softmax(self, x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0)


  # 1. For a target word, calculate difference between y_pred and each of the context words
  # 2. Sum up the differences using np.sum to give us the error for this particular target word
  #EI = np.sum([np.subtract(y_pred, word) for word in w_c], axis=0)

  # Backpropagation
  # We use SGD to backpropagate errors - calculate loss on the output layer 
  #self.backprop(EI, h, w_t)

  # Calculate loss
  # There are 2 parts to the loss function
  # Part 1: -ve sum of all the output +
  # Part 2: length of context words * log of sum for all elements (exponential-ed) in the output layer before softmax (u)
  # Note: word.index(1) returns the index in the context word vector with value 1
  # Note: u[word.index(1)] returns the value of the output layer before softmax
  #self.loss += -np.sum([u[word.index(1)] for word in w_c]) + len(w_c) * np.log(np.sum(np.exp(u)))
  #print('Epoch:', i, "Loss:", self.loss)
  
  def backprop(self, e, h, x):
    # https://docs.scipy.org/doc/numpy-1.15.1/reference/generated/numpy.outer.html
    # Column vector EI represents row-wise sum of prediction errors across each context word for the current center word
    # Going backwards, we need to take derivative of E with respect of w2
    # h - shape 10x1, e - shape 9x1, dl_dw2 - shape 10x9
    dl_dw2 = np.outer(h, e)
    # x - shape 1x8, w2 - 5x8, e.T - 8x1
    # x - 1x8, np.dot() - 5x1, dl_dw1 - 8x5
    dl_dw1 = np.outer(x, np.dot(self.w2, e.T))
    # Update weights
    self.w1 = self.w1 - (self.lr * dl_dw1)
    self.w2 = self.w2 - (self.lr * dl_dw2)
      
  def word_vec(self, word):
    w_index = self.word_index[word]
    v_w = self.w1[w_index]
    return v_w
  
  def vec_sim(self, word, top_n):
    v_w1 = self.word_vec(word)
    word_sim = {}

    for i in range(self.v_count):
      # Find the similary score for each word in vocab
      v_w2 = self.w1[i]
      theta_sum = np.dot(v_w1, v_w2)
      theta_den = np.linalg.norm(v_w1) * np.linalg.norm(v_w2)
      theta = theta_sum / theta_den

      word = self.index_word[i]
      word_sim[word] = theta

    words_sorted = sorted(word_sim.items(), key=lambda kv: kv[1], reverse=True)

    for word, sim in words_sorted[:top_n]:
      print(word, sim)

In [200]:
w2v = word2vec()
# Numpy ndarray with one-hot representation for [target_word, context_words]
training_data = w2v.generate_training_data(settings, corpus)

In [201]:
training_data


array([[list([1, 0]), list([[1, 0], [1, 0]])],
       [list([1, 0]), list([[1, 0], [1, 0], [1, 0]])],
       [list([1, 0]), list([[1, 0], [1, 0], [1, 0], [1, 0]])],
       [list([1, 0]), list([[1, 0], [1, 0], [1, 0], [1, 0]])],
       [list([1, 0]), list([[1, 0], [1, 0], [1, 0], [1, 0]])],
       [list([1, 0]), list([[1, 0], [1, 0], [1, 0], [1, 0]])],
       [list([1, 0]), list([[1, 0], [1, 0], [1, 0], [1, 0]])],
       [list([1, 0]), list([[1, 0], [1, 0], [1, 0], [0, 1]])],
       [list([1, 0]), list([[1, 0], [1, 0], [0, 1]])],
       [list([0, 1]), list([[1, 0], [1, 0]])]], dtype=object)

In [202]:
for i in range(w2v.epochs):
    w2v.train(training_data)


In [203]:
vec = w2v.word_vec("hello")

In [204]:
vec

array([ 0.42449835, -0.77414975,  0.08171911, -0.28020617, -0.68854559,
       -0.85839747, -0.33396541,  0.88135877, -0.59625071,  0.98745925])

In [205]:
w2v.vec_sim("hello", 3)

hello 1.0
hgllo -0.11794247225366143


In [1]:
**************** BERT ************************************************************************************

SyntaxError: invalid syntax (<ipython-input-1-0f93dcc62c54>, line 1)

In [4]:
!pip install sentence-transformers



In [11]:
import numpy as np

In [5]:
from sentence_transformers import SentenceTransformer
sbert_model = SentenceTransformer('bert-base-nli-mean-tokens')

100%|███████████████████████████████████████████████████████████████████████████████| 405M/405M [04:29<00:00, 1.50MB/s]


In [66]:
sentences = ["A 74 year old gentleman, known case of Type 2 diabetes mellitus, hypertension, acute inferior wall MI with complete heart block, presented with complaints of chest pain since 10 days.", 
       "Physical examination showed that patient is conscious and oriented. Pulse-94/min; BP-120/80 mmHg; CVS-S1S2 normal; RS-NVBS; ABD-Soft .", 
       "Brad came to dinner with us.",
       "On admission ECG showed sinus rhythm, QS with T inversion in III, AVF . Echo showed RWMA involving inferior wall with adequate LV systolic function .",
       "TIMI III flow achieved with good end result .",
       "i dont have have cancer"]

In [67]:
def cosine(u, v):
    return np.dot(u, v) / (np.linalg.norm(u) * np.linalg.norm(v))

In [68]:
sentence_embeddings = sbert_model.encode(sentences)

In [71]:
query = "i dont have heart ache in evening"
query_vec = sbert_model.encode([query])[0]

In [72]:
for sent in sentences:
  sim = cosine(query_vec, sbert_model.encode([sent])[0])
  print("Sentence = ", sent, "; similarity = ", sim)

Sentence =  A 74 year old gentleman, known case of Type 2 diabetes mellitus, hypertension, acute inferior wall MI with complete heart block, presented with complaints of chest pain since 10 days. ; similarity =  -0.009621797
Sentence =  Physical examination showed that patient is conscious and oriented. Pulse-94/min; BP-120/80 mmHg; CVS-S1S2 normal; RS-NVBS; ABD-Soft . ; similarity =  0.34176022
Sentence =  Brad came to dinner with us. ; similarity =  0.18913847
Sentence =  On admission ECG showed sinus rhythm, QS with T inversion in III, AVF . Echo showed RWMA involving inferior wall with adequate LV systolic function . ; similarity =  0.29188272
Sentence =  TIMI III flow achieved with good end result . ; similarity =  0.4672234
Sentence =  i dont have have cancer ; similarity =  0.7527088
