## Install all libraries
- Spacy
- GLiNER
- Download the trained model for English en_core_web_sm

- Author: Alva Rani James, PHD

In [None]:
!pip install -U spacy 
!python -m spacy download en_core_web_sm
!pip install gliner

## import all libraries

In [2]:
from gliner import GLiNER
# import the base gliner model
model = GLiNER.from_pretrained("urchade/gliner_base")
import spacy
from spacy import displacy
from spacy.matcher import PhraseMatcher
import timeit



## Define the text for the NER task and get the number of words within the text

In [7]:
text = """
Nutrition labs at the next visit
Follow nutrition recommendations.
Encouraged intake of 3 meals per day of low protein foods.
Encouraged intake of water instead of calorie drinks.
Aim for <15gm per day of protein from diet
Discussed importance of daily diet log to aid with protein counting and weight loss
Begin vitamin D supplementation, 1000 IUs per day 
We will contact the PCPs office and request assistance in placing referral to a local therapist/psychologist
We will continue to work on obtaining insurance coverage for her formula. 
If this continues to be deferred, we will consider starting tyrosine supplementation in the interim.
Followup in the metabolic clinic in 1 months  
"""
print("The number of words in the text:",len(text))

The number of words in the text: 730


## Step 1: GLiNER

In [38]:
# define the labels
labels = ['breast milk','protein', 'supplementation'
    'liquid predominant diet',
    'formula', 
    'supplementation',
    'meals',
    'water']
entities = model.predict_entities(text, labels, threshold=0.5)
for entity in entities: print(entity["text"], "=>", entity["label"])

water => breast milk
calorie drinks => formula
vitamin D => supplementation
formula => formula
tyrosine supplementation => supplementation


#### If you change the threshold
- Between 0.5 to 0.9

In [22]:
thres =[0.5,0.8,0.9]
for thre in thres:
    entities = model.predict_entities(text, labels, threshold=thre)
    for entity in entities: 
        print("for the threshold:",thre, entity["text"], "=>", entity["label"])

for the threshold: 0.5 water => breast milk
for the threshold: 0.5 calorie drinks => formula
for the threshold: 0.5 vitamin D => supplementation
for the threshold: 0.5 formula => formula
for the threshold: 0.5 tyrosine supplementation => supplementation
for the threshold: 0.8 water => water
for the threshold: 0.8 vitamin D supplementation => supplementation
for the threshold: 0.8 tyrosine supplementation => supplementation
for the threshold: 0.9 water => water


In [24]:
%timeit -o 1 + 2

7.77 ns ± 0.201 ns per loop (mean ± std. dev. of 7 runs, 100,000,000 loops each)


<TimeitResult : 7.77 ns ± 0.201 ns per loop (mean ± std. dev. of 7 runs, 100,000,000 loops each)>

In [30]:
def oct(x):
   return x*x
timeit.Timer("for x in range(100): oct(x)", "gc.enable()").timeit()

7.643843499994546

## Step 2: Example with spacy
- Customized labels using Pharsematcher function

In [15]:
nlp = spacy.load("en_core_web_sm")
phrase_matcher = PhraseMatcher(nlp.vocab)
food_list = [nlp.make_doc(text) for text in [
  'breast milk','protein', 'supplementation',
    'liquid predominant diet',
    'formula', 
    'supplementation',
    'meals',
    'water']]

phrase_matcher.add("DIET",None, *food_list)
doc    = nlp(text)
matches = phrase_matcher(doc)

# Assign labels and update Doc object with entities
for match_id, start, end in matches:
    rule_id = nlp.vocab.strings[match_id]  # get the label
    span = doc[start : end]  # get the matched slice of the doc
    print(rule_id,"=>", span.text)

DIET => meals
DIET => protein
DIET => water
DIET => protein
DIET => protein
DIET => supplementation
DIET => formula
DIET => supplementation


## Get the time for SPACY and GLiNER

- GLiNER

In [43]:
code_to_measure = '''
for entity in entities: print(entity["text"], "=>", entity["label"])
'''
time_taken = timeit.timeit(stmt=code_to_measure, globals=globals(), number=1)
print("Time taken for GLiNER:", time_taken, "seconds")

water => breast milk
calorie drinks => formula
vitamin D => supplementation
formula => formula
tyrosine supplementation => supplementation
Time taken for GLiNER: 0.00013079999916953966 seconds


- SPACY

In [44]:
code_to_measure= '''
for match_id, start, end in matches:
    rule_id = nlp.vocab.strings[match_id]  # get the label
    span = doc[start : end]  # get the matched slice of the doc
    print(rule_id,"=>", span.text)
'''
time_taken = timeit.timeit(stmt=code_to_measure, globals=globals(), number=1)
print("Time taken for SPACY:", time_taken, "seconds")

DIET => meals
DIET => protein
DIET => water
DIET => protein
DIET => protein
DIET => supplementation
DIET => formula
DIET => supplementation
Time taken for SPACY: 0.0003290999957243912 seconds
