In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn

from spacy.cli.train import train
import spacy

In [40]:
train("config/config.cfg",
      output_path='sig_ner_model',
      overrides={"paths.train": "train_docs.spacy", 
                 "paths.dev": "test_docs.spacy",
                 "training.max_epochs": 68})

[38;5;4mℹ Saving to output directory: sig_ner_model[0m
[38;5;4mℹ Using CPU[0m
[1m
[38;5;2m✔ Initialized pipeline[0m
[1m
[38;5;4mℹ Pipeline: ['tok2vec', 'ner'][0m
[38;5;4mℹ Initial learn rate: 0.0[0m
E    #       LOSS TOK2VEC  LOSS NER  ENTS_F  ENTS_P  ENTS_R  SCORE 
---  ------  ------------  --------  ------  ------  ------  ------
  0       0          0.00    876.38    0.00    0.00    0.00    0.00
  3     200        232.85  253498.43    0.00    0.00    0.00    0.00
  6     400       2538.98  47127.05    0.33   85.71    0.17    0.00
 10     600       6289.14  27416.00   18.73   48.11   11.63    0.19
 13     800       1955.74  18480.40   35.70   53.23   26.85    0.36
 16    1000       9137.81  15990.31   47.05   62.16   37.85    0.47
 20    1200       2047.95  12642.24   58.47   69.13   50.66    0.58
 23    1400       1129.24  10456.20   61.88   69.44   55.80    0.62
 26    1600       2516.88   9280.43   60.27   69.08   53.45    0.60
 30    1800        943.15   8733.76   65

In [45]:
trained_nlp = spacy.load('./sig_ner_model/model-best')

##### first outcome! 

In [9]:
inp = 'TAKE 1 TABLET  (150 MCG TOTAL) BY MOUTH two times DAILY for one week by the way patient id is 303023 and take 2 aderol every day'.lower()
[(e, e.label_) for e in trained_nlp(inp).ents]

[(1, 'Dosage'),
 (tablet, 'Form'),
 (150 mcg, 'Strength'),
 (two times daily, 'Frequency'),
 (aderol, 'Drug'),
 (every day, 'Frequency')]

In [47]:
"INHALE 2 PUFFS INTO THE LUNGS EVERY DAY"
inp = 'Take two tablets 3 times every week'.lower()
[(e, e.label_) for e in trained_nlp(inp).ents]

[(two, 'Dosage'), (tablets, 'Form'), (3 times every week, 'Dosage')]

In [151]:
inp = 'inhale 3 puffs of albuterol 2 times a day for one week patient id is 20202'
[(e, e.label_) for e in trained_nlp(inp).ents]

[(3, 'Dosage'), (puffs, 'Form'), (albuterol, 'Drug'), (2 times a day, 'Frequency'), (for one week, 'Duration')]


 We can even parse multuple dosing instructions

In [171]:
inp = 'inhale 3 puffs of albuterol 2 times a day for one week' \
      ' then 1 puff of albuterol every day for 2 months'

print([(e, e.label_) for e in trained_nlp(inp).ents])

[(3, 'Dosage'), (puffs, 'Form'), (albuterol, 'Drug'), (2 times a day, 'Frequency'), (for one week, 'Duration'), (1, 'Dosage'), (puff, 'Form'), (albuterol, 'Drug'), (every day, 'Frequency'), (for 2 months, 'Duration')]


In [10]:
inp = 'TAKE 1 TABLET (20 MG) BY MOUTH every 2 months'.lower()
print([(e, e.label_) for e in trained_nlp(inp).ents])

[(1, 'Dosage'), (tablet, 'Form'), (20 mg, 'Strength'), (every 2 months, 'Frequency')]


 There are many things to improve, one example is the frequency that can and should be parsed to an interval type and amoubt (e.g type=day, amount=2)

Also, the model only recognizes encountered features, so inputting to the model a new sentence, will not be parsed correctly. 
When we will use the pre-trained Bert model, this issue should be resolved

In [11]:
inp = 'Take 1 tablet of ibuprofen 3 times a day'


[(e, e.label_) for e in trained_nlp(inp).ents]

[(1, 'Dosage'),
 (tablet, 'Form'),
 (ibuprofen, 'Drug'),
 (3 times a day, 'Dosage')]

In [179]:
inp = ' ASPIRIN 100 mg Initiate Medication IMMEDIATE RELEASE TABLET 1.5 daily'.lower()

[(e, e.label_) for e in trained_nlp(inp).ents]

[(aspirin, 'Drug'),
 (100 mg, 'Strength'),
 (tablet, 'Form'),
 (1.5, 'Dosage'),
 (daily, 'Frequency')]