# Med7

In [1]:
!pip install -U spacy

Requirement already up-to-date: spacy in /Users/sdeshpande/opt/anaconda3/lib/python3.8/site-packages (2.3.2)


In [2]:
!pip install -U https://med7.s3.eu-west-2.amazonaws.com/en_core_med7_lg.tar.gz

Collecting https://med7.s3.eu-west-2.amazonaws.com/en_core_med7_lg.tar.gz
  Downloading https://med7.s3.eu-west-2.amazonaws.com/en_core_med7_lg.tar.gz (892.8 MB)
[K     |████████████████████████████████| 892.8 MB 38 kB/s  eta 0:00:0101
Building wheels for collected packages: en-core-med7-lg
  Building wheel for en-core-med7-lg (setup.py) ... [?25ldone
[?25h  Created wheel for en-core-med7-lg: filename=en_core_med7_lg-0.0.3-py3-none-any.whl size=893633388 sha256=835ae931c44ed5c26288ff2885e08c728db1a3fd3d5c8d3104ecf09564b78dd8
  Stored in directory: /private/var/folders/z4/zq1sx9z918l822zf41yhflvr0000gn/T/pip-ephem-wheel-cache-w_mplqza/wheels/88/c7/ea/e3ee3fdfff04db8decf451ccac11cc16333868c059048a4661
Successfully built en-core-med7-lg
Installing collected packages: en-core-med7-lg
Successfully installed en-core-med7-lg-0.0.3


In [3]:
import spacy
import en_core_med7_lg

In [4]:
med7 = en_core_med7_lg.load()

In [5]:
# create distinct colours for labels

col_dict = {}
seven_colours = ['#e6194B', '#3cb44b', '#ffe119', '#ffd8b1', '#f58231', '#f032e6', '#42d4f4']
for label, colour in zip(med7.pipe_labels['ner'], seven_colours):
    col_dict[label] = colour

options = {'ents': med7.pipe_labels['ner'], 'colors':col_dict}   

In [6]:
# some text which contains medical concepts
text = 'A patient was prescribed Magnesium hydroxide 400mg/5ml suspension PO of total 30ml  bid for the next 5 days.'
doc = med7(text)

In [7]:
spacy.displacy.render(doc, style='ent', jupyter=True, options=options)

In [8]:
# print identified entities
[(ent.text, ent.label_) for ent in doc.ents]

[('Magnesium hydroxide', 'DRUG'),
 ('400mg/5ml', 'DOSAGE'),
 ('suspension', 'FORM'),
 ('PO', 'ROUTE'),
 ('30ml', 'DOSAGE'),
 ('bid', 'FREQUENCY'),
 ('for the next 5 days', 'DURATION')]

In [9]:
# plot dependencies
spacy.displacy.render(doc, style='dep', jupyter=True)

# Stanza

In [10]:
!pip install stanza

Collecting stanza
  Downloading stanza-1.1.1-py3-none-any.whl (227 kB)
[K     |████████████████████████████████| 227 kB 3.7 MB/s eta 0:00:01
Installing collected packages: stanza
Successfully installed stanza-1.1.1


In [12]:
import stanza
stanza.download('en')
nlp = stanza.Pipeline(lang='en', processors='tokenize')
doc = nlp('This is a test sentence for stanza. This is another sentence.')
for i, sentence in enumerate(doc.sentences):
    print(f'====== Sentence {i+1} tokens =======')
    print(*[f'id: {token.id}\ttext: {token.text}' for token in sentence.tokens], sep='\n')

Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/master/resources_1.1.0.json: 122kB [00:00, 28.5MB/s]                    
2020-10-04 17:16:35 INFO: Downloading default packages for language: en (English)...
Downloading http://nlp.stanford.edu/software/stanza/1.1.0/en/default.zip: 100%|██████████| 428M/428M [00:44<00:00, 9.55MB/s] 
2020-10-04 17:17:26 INFO: Finished downloading models and saved to /Users/sdeshpande/stanza_resources.
2020-10-04 17:17:26 INFO: Loading these models for language: en (English):
| Processor | Package |
-----------------------
| tokenize  | ewt     |

2020-10-04 17:17:26 INFO: Use device: cpu
2020-10-04 17:17:26 INFO: Loading: tokenize
2020-10-04 17:17:26 INFO: Done loading processors!


id: (1,)	text: This
id: (2,)	text: is
id: (3,)	text: a
id: (4,)	text: test
id: (5,)	text: sentence
id: (6,)	text: for
id: (7,)	text: stanza
id: (8,)	text: .
id: (1,)	text: This
id: (2,)	text: is
id: (3,)	text: another
id: (4,)	text: sentence
id: (5,)	text: .


In [17]:
import stanza
# download and initialize the CRAFT pipeline
stanza.download('en', package='craft')
nlp = stanza.Pipeline('en', package='craft')
# annotate example text
doc = nlp('A single-cell transcriptomic atlas characterizes ageing tissues in the mouse.')
# print out dependency tree
doc.sentences[0].print_dependencies()

Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/master/resources_1.1.0.json: 122kB [00:00, 29.9MB/s]                    
2020-10-04 17:42:28 INFO: Downloading these customized packages for language: en (English)...
| Processor | Package |
-----------------------
| tokenize  | craft   |
| pos       | craft   |
| lemma     | craft   |
| depparse  | craft   |
| pretrain  | craft   |

2020-10-04 17:42:28 INFO: File exists: /Users/sdeshpande/stanza_resources/en/tokenize/craft.pt.
2020-10-04 17:42:28 INFO: File exists: /Users/sdeshpande/stanza_resources/en/pos/craft.pt.
2020-10-04 17:42:28 INFO: File exists: /Users/sdeshpande/stanza_resources/en/lemma/craft.pt.
2020-10-04 17:42:28 INFO: File exists: /Users/sdeshpande/stanza_resources/en/depparse/craft.pt.
2020-10-04 17:42:29 INFO: File exists: /Users/sdeshpande/stanza_resources/en/pretrain/craft.pt.
2020-10-04 17:42:29 INFO: Finished downloading models and saved to /Users/sdeshpande/stanza_resources.
2020-10-04 17:

('A', 6, 'det')
('single', 4, 'amod')
('-', 4, 'punct')
('cell', 6, 'compound')
('transcriptomic', 6, 'amod')
('atlas', 7, 'nsubj')
('characterizes', 0, 'root')
('ageing', 9, 'compound')
('tissues', 7, 'obj')
('in', 12, 'case')
('the', 12, 'det')
('mouse', 7, 'obl')
('.', 7, 'punct')


In [18]:
# download and initialize a mimic pipeline with an i2b2 NER model
stanza.download('en', package='mimic', processors={'ner': 'i2b2'})
nlp = stanza.Pipeline('en', package='mimic', processors={'ner': 'i2b2'})
# annotate clinical text
doc = nlp('The patient had a sore throat and was treated with Cepacol lozenges.')
# print out all entities
for ent in doc.entities:
    print(f'{ent.text}\t{ent.type}')


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/master/resources_1.1.0.json: 122kB [00:00, 24.7MB/s]                    
2020-10-04 17:42:52 INFO: Downloading these customized packages for language: en (English)...
| Processor       | Package |
-----------------------------
| tokenize        | mimic   |
| pos             | mimic   |
| lemma           | mimic   |
| depparse        | mimic   |
| ner             | i2b2    |
| forward_charlm  | mimic   |
| pretrain        | mimic   |
| backward_charlm | mimic   |

Downloading http://nlp.stanford.edu/software/stanza/1.1.0/en/tokenize/mimic.pt: 100%|██████████| 631k/631k [00:01<00:00, 390kB/s]
Downloading http://nlp.stanford.edu/software/stanza/1.1.0/en/pos/mimic.pt: 100%|██████████| 20.8M/20.8M [00:06<00:00, 3.46MB/s]
Downloading http://nlp.stanford.edu/software/stanza/1.1.0/en/lemma/mimic.pt: 100%|██████████| 4.19M/4.19M [00:02<00:00, 1.63MB/s]
Downloading http://nlp.stanford.edu/software/stanza/1.1.0/en/depparse

a sore throat	PROBLEM
Cepacol lozenges	TREATMENT


In [19]:
# download and initialize a mimic pipeline with an i2b2 NER model
stanza.download('en', package='mimic', processors={'ner': 'Radiology'})
nlp = stanza.Pipeline('en', package='mimic', processors={'ner': 'Radiology'})
# annotate clinical text
doc = nlp('The patient had a sore throat and was treated with Cepacol lozenges.')
# print out all entities
for ent in doc.entities:
    print(f'{ent.text}\t{ent.type}')

Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/master/resources_1.1.0.json: 122kB [00:00, 25.2MB/s]                    
2020-10-04 17:52:32 INFO: Downloading these customized packages for language: en (English)...
| Processor       | Package   |
-------------------------------
| tokenize        | mimic     |
| pos             | mimic     |
| lemma           | mimic     |
| depparse        | mimic     |
| ner             | radiology |
| forward_charlm  | mimic     |
| pretrain        | mimic     |
| backward_charlm | mimic     |

2020-10-04 17:52:32 INFO: File exists: /Users/sdeshpande/stanza_resources/en/tokenize/mimic.pt.
2020-10-04 17:52:32 INFO: File exists: /Users/sdeshpande/stanza_resources/en/pos/mimic.pt.
2020-10-04 17:52:32 INFO: File exists: /Users/sdeshpande/stanza_resources/en/lemma/mimic.pt.
2020-10-04 17:52:32 INFO: File exists: /Users/sdeshpande/stanza_resources/en/depparse/mimic.pt.
Downloading http://nlp.stanford.edu/software/stanza/1.1.0/en/

sore throat	ANATOMY
