In [1]:
import random
import json
import spacy
from spacy.training.example import Example
from spacy.util import minibatch, compounding

### Sample training data for Medical Named Entity Recognition (NER)

In [2]:
TRAIN_DATA = [
    ("The patient's heart rate is elevated.", {"entities": [(14, 19, "ORGAN")]}),
    ("She broke her leg during the soccer match.", {"entities": [(14, 17, "BODY_PART")]}),
    ("He has been experiencing severe stomach pain.", {"entities": [(32, 39, "ORGAN")]}),
    ("The X-ray showed a fracture in her spine.", {"entities": [(35, 40, "BODY_PART")]}),
    ("His kidneys are functioning at a lower level than normal.", {"entities": [(4, 11, "ORGAN")]}),
    ("The patient has a history of liver disease.", {"entities": [(29, 34, "ORGAN")]}),
    ("The MRI revealed a problem with his brain.", {"entities": [(36, 41, "ORGAN")]}),
    ("She has been diagnosed with lung cancer.", {"entities": [(28, 32, "ORGAN")]}),
    ("The patient has a broken arm.", {"entities": [(25, 28, "BODY_PART")]}),
    ("She has a high fever and sore throat.", {"entities": [(30, 36, "BODY_PART")]}),
    ("The patient's blood pressure is too high.", {"entities": [(14, 19, "BODY_PART")]}),
    ("He has a rash on his skin.", {"entities": [(21, 25, "BODY_PART")]}),
    ("The patient has a history of eye disorders.", {"entities": [(29, 32, "ORGAN")]}),
    ("She has been experiencing chest pain.", {"entities": [(26, 31, "BODY_PART")]}),
    ("The patient has a fractured skull.", {"entities": [(28, 33, "BODY_PART")]}),
]

for text, annotation in TRAIN_DATA:
    for start, end, label in annotation["entities"]:
        print(f"Label: {label}, Extracted: '{text[start:end]}'")

Label: ORGAN, Extracted: 'heart'
Label: BODY_PART, Extracted: 'leg'
Label: ORGAN, Extracted: 'stomach'
Label: BODY_PART, Extracted: 'spine'
Label: ORGAN, Extracted: 'kidneys'
Label: ORGAN, Extracted: 'liver'
Label: ORGAN, Extracted: 'brain'
Label: ORGAN, Extracted: 'lung'
Label: BODY_PART, Extracted: 'arm'
Label: BODY_PART, Extracted: 'throat'
Label: BODY_PART, Extracted: 'blood'
Label: BODY_PART, Extracted: 'skin'
Label: ORGAN, Extracted: 'eye'
Label: BODY_PART, Extracted: 'chest'
Label: BODY_PART, Extracted: 'skull'


### Training a Medical NER model with spaCy and evaluating on test docs

In [3]:
nlp = spacy.load('en_core_web_md')

ner = nlp.get_pipe('ner')
ner.add_label('ORGAN')
ner.add_label('BODY_PART')

N_ITER = 20
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]

with nlp.disable_pipes(*other_pipes):
    optimizer = nlp.create_optimizer()
    for i in range(N_ITER):
        random.shuffle(TRAIN_DATA)
        for text, annotations in TRAIN_DATA:
            example = Example.from_dict(nlp.make_doc(text), annotations)
            nlp.update([example], drop=0.5, sgd=optimizer)

test_texts = [
    "The patient complained of severe heart pain and a broken leg.",
    "He has a problem with his spleen.",
    "She has a cut on her finger.",
    "The patient has a dislocated shoulder.",
]

for text in test_texts:
    doc = nlp(text)
    print(f"Text: {text}\nEntities: {[(e.label_, e.text) for e in doc.ents]}\n")

Text: The patient complained of severe heart pain and a broken leg.
Entities: [('ORGAN', 'heart'), ('BODY_PART', 'leg')]

Text: He has a problem with his spleen.
Entities: [('ORGAN', 'spleen')]

Text: She has a cut on her finger.
Entities: [('BODY_PART', 'finger')]

Text: The patient has a dislocated shoulder.
Entities: [('BODY_PART', 'shoulder')]



### Loading and Preparing Training Data

In [4]:
def load_training_data(file_path):
    with open(file_path, 'r') as file:
        content = json.load(file)

    data = []
    for dialogue in content:
        for turn in dialogue['turns']:
            if turn['speaker'] == 'USER':
                for frame in turn['frames']:
                    if 'active_intent' in frame['state']:
                        utterance = turn['utterance']
                        intent = frame['state']['active_intent']
                        data.append((utterance, intent))
    return data

def prepare_training_data(data):
    training_data = []
    for text, intent in data:
        training_data.append((text, {"cats": {intent: 1.0}}))
    return training_data

data = load_training_data('data/services.json')
training_data = prepare_training_data(data)

### Training text classification model and testing on new texts

In [5]:
nlp = spacy.blank("en")
textcat = nlp.add_pipe("textcat", last=True)

for _, annotations in training_data:
    for cat in annotations['cats']:
        textcat.add_label(cat) # FindProvider, BookAppointment, NONE

N_ITER = 15
nlp.begin_training()

for i in range(N_ITER):
    random.shuffle(training_data)
    for batch in minibatch(training_data, size=compounding(4.0, 32.0, 1.001)):
        for text, annotations in batch:
            example = Example.from_dict(nlp.make_doc(text), annotations)
            nlp.update([example])

texts = [
    "Please find me a dentist in Cupertino.",
    "I need a plumber in San Francisco.",
    "Can you locate a nearby gym?",
    
    "Book an appointment with Dr. Smith for tomorrow.",
    "Schedule a meeting with my lawyer at 3 PM.",
    "Can you arrange a dentist appointment for next week?",

    "Nothing specific.",
    "No need for assistance.",
    "I'm okay for now.",
]

for text in texts:
    doc = nlp(text)
    print(f"Text: {text}\nIntents: {[(k, round(v, 2)) for k, v in doc.cats.items()]}\n")

Text: Please find me a dentist in Cupertino.
Intents: [('FindProvider', 1.0), ('BookAppointment', 0.0), ('NONE', 0.0)]

Text: I need a plumber in San Francisco.
Intents: [('FindProvider', 1.0), ('BookAppointment', 0.0), ('NONE', 0.0)]

Text: Can you locate a nearby gym?
Intents: [('FindProvider', 1.0), ('BookAppointment', 0.0), ('NONE', 0.0)]

Text: Book an appointment with Dr. Smith for tomorrow.
Intents: [('FindProvider', 0.0), ('BookAppointment', 1.0), ('NONE', 0.0)]

Text: Schedule a meeting with my lawyer at 3 PM.
Intents: [('FindProvider', 0.0), ('BookAppointment', 1.0), ('NONE', 0.0)]

Text: Can you arrange a dentist appointment for next week?
Intents: [('FindProvider', 0.0), ('BookAppointment', 1.0), ('NONE', 0.0)]

Text: Nothing specific.
Intents: [('FindProvider', 0.05), ('BookAppointment', 0.0), ('NONE', 0.95)]

Text: No need for assistance.
Intents: [('FindProvider', 0.0), ('BookAppointment', 0.0), ('NONE', 1.0)]

Text: I'm okay for now.
Intents: [('FindProvider', 0.26), ('