In [1]:
%pip install -q "openvino>=2023.1.0"

Note: you may need to restart the kernel to use updated packages.


In [2]:
# Fetch `notebook_utils` module
import urllib.request
urllib.request.urlretrieve(
    url='https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/main/notebooks/utils/notebook_utils.py',
    filename='notebook_utils.py'
)

from notebook_utils import download_file

In [3]:
import operator
import time
from urllib import parse
from pathlib import Path

import numpy as np
import openvino as ov


download_file(
    url='https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/main/notebooks/213-question-answering/html_reader.py',
    filename='html_reader.py'
)
import html_reader as reader

download_file(
    url='https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/main/notebooks/213-question-answering/tokens_bert.py',
    filename='tokens_bert.py'
)
import tokens_bert as tokens

html_reader.py:   0%|          | 0.00/635 [00:00<?, ?B/s]

tokens_bert.py:   0%|          | 0.00/929 [00:00<?, ?B/s]

In [4]:
MODEL_DIR = Path("../../python/python_backend/trained_models/question_answering/")
MODEL_DIR.mkdir(exist_ok=True)

model_xml_url = "https://storage.openvinotoolkit.org/repositories/open_model_zoo/2023.0/models_bin/1/bert-small-uncased-whole-word-masking-squad-int8-0002/FP16-INT8/bert-small-uncased-whole-word-masking-squad-int8-0002.xml"
model_bin_url = "https://storage.openvinotoolkit.org/repositories/open_model_zoo/2023.0/models_bin/1/bert-small-uncased-whole-word-masking-squad-int8-0002/FP16-INT8/bert-small-uncased-whole-word-masking-squad-int8-0002.bin"

download_file(model_xml_url, model_xml_url.split("/")[-1], MODEL_DIR)
download_file(model_bin_url, model_bin_url.split("/")[-1], MODEL_DIR)

model_path = MODEL_DIR / model_xml_url.split("/")[-1]

../../python/python_backend/trained_models/question_answering/bert-small-uncased-whole-word-masking-squad-int8…

../../python/python_backend/trained_models/question_answering/bert-small-uncased-whole-word-masking-squad-int8…

In [5]:
model_path

PosixPath('../../python/python_backend/trained_models/question_answering/bert-small-uncased-whole-word-masking-squad-int8-0002.xml')

In [6]:
# Initialize OpenVINO Runtime.
core = ov.Core()
# Read the network and corresponding weights from a file.
model = core.read_model(model_path)

In [7]:
import ipywidgets as widgets

core = ov.Core()

device = widgets.Dropdown(
    options=core.available_devices + ["AUTO"],
    value='AUTO',
    description='Device:',
    disabled=False,
)

device

Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO')

In [8]:
compiled_model = core.compile_model(model=model, device_name=device.value)

# Get input and output names of nodes.
input_keys = list(compiled_model.inputs)
output_keys = list(compiled_model.outputs)

# Get the network input size.
input_size = compiled_model.input(0).shape[1]

In [9]:
[i.any_name for i in input_keys], [o.any_name for o in output_keys]

(['input_ids', 'attention_mask', 'token_type_ids', 'position_ids'],
 ['output_s', 'output_e'])

In [18]:
# Download the vocabulary from the openvino_notebooks storage
vocab_file_path = download_file(
    "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/text/bert-uncased/vocab.txt",
    directory=MODEL_DIR
)

# Create a dictionary with words and their indices.
vocab = tokens.load_vocab_file(str(vocab_file_path))

# Define special tokens.
cls_token = vocab["[CLS]"]
pad_token = vocab["[PAD]"]
sep_token = vocab["[SEP]"]


# A function to load text from given urls.
def load_context(sources):
    input_urls = []
    paragraphs = []
    for source in sources:
        result = parse.urlparse(source)
        if all([result.scheme, result.netloc]):
            input_urls.append(source)
        else:
            paragraphs.append(source)

    paragraphs.extend(reader.get_paragraphs(input_urls))
    # Produce one big context string.
    return "\n".join(paragraphs)

../../python/python_backend/trained_models/question_answering/vocab.txt:   0%|          | 0.00/226k [00:00<?, …

In [19]:
# A generator of a sequence of inputs.
def prepare_input(question_tokens, context_tokens):
    # A length of question in tokens.
    question_len = len(question_tokens)
    # The context part size.
    context_len = input_size - question_len - 3

    if context_len < 16:
        raise RuntimeError("Question is too long in comparison to input size. No space for context")

    # Take parts of the context with overlapping by 0.5.
    for start in range(0, max(1, len(context_tokens) - context_len), context_len // 2):
        # A part of the context.
        part_context_tokens = context_tokens[start:start + context_len]
        # The input: a question and the context separated by special tokens.
        input_ids = [cls_token] + question_tokens + [sep_token] + part_context_tokens + [sep_token]
        # 1 for any index if there is no padding token, 0 otherwise.
        attention_mask = [1] * len(input_ids)
        # 0 for question tokens, 1 for context part.
        token_type_ids = [0] * (question_len + 2) + [1] * (len(part_context_tokens) + 1)

        # Add padding at the end.
        (input_ids, attention_mask, token_type_ids), pad_number = pad(input_ids=input_ids,
                                                                      attention_mask=attention_mask,
                                                                      token_type_ids=token_type_ids)

        # Create an input to feed the model.
        input_dict = {
            "input_ids": np.array([input_ids], dtype=np.int32),
            "attention_mask": np.array([attention_mask], dtype=np.int32),
            "token_type_ids": np.array([token_type_ids], dtype=np.int32),
        }

        # Some models require additional position_ids.
        if "position_ids" in [i_key.any_name for i_key in input_keys]:
            position_ids = np.arange(len(input_ids))
            input_dict["position_ids"] = np.array([position_ids], dtype=np.int32)

        yield input_dict, pad_number, start


# A function to add padding.
def pad(input_ids, attention_mask, token_type_ids):
    # How many padding tokens.
    diff_input_size = input_size - len(input_ids)

    if diff_input_size > 0:
        # Add padding to all the inputs.
        input_ids = input_ids + [pad_token] * diff_input_size
        attention_mask = attention_mask + [0] * diff_input_size
        token_type_ids = token_type_ids + [0] * diff_input_size

    return (input_ids, attention_mask, token_type_ids), diff_input_size

In [20]:
# Based on https://github.com/openvinotoolkit/open_model_zoo/blob/bf03f505a650bafe8da03d2747a8b55c5cb2ef16/demos/common/python/openvino/model_zoo/model_api/models/bert.py#L163
def postprocess(output_start, output_end, question_tokens, context_tokens_start_end, padding, start_idx):

    def get_score(logits):
        out = np.exp(logits)
        return out / out.sum(axis=-1)

    # Get start-end scores for the context.
    score_start = get_score(output_start)
    score_end = get_score(output_end)

    # An index of the first context token in a tensor.
    context_start_idx = len(question_tokens) + 2
    # An index of the last+1 context token in a tensor.
    context_end_idx = input_size - padding - 1

    # Find product of all start-end combinations to find the best one.
    max_score, max_start, max_end = find_best_answer_window(start_score=score_start,
                                                            end_score=score_end,
                                                            context_start_idx=context_start_idx,
                                                            context_end_idx=context_end_idx)

    # Convert to context text start-end index.
    max_start = context_tokens_start_end[max_start + start_idx][0]
    max_end = context_tokens_start_end[max_end + start_idx][1]

    return max_score, max_start, max_end


# Based on https://github.com/openvinotoolkit/open_model_zoo/blob/bf03f505a650bafe8da03d2747a8b55c5cb2ef16/demos/common/python/openvino/model_zoo/model_api/models/bert.py#L188
def find_best_answer_window(start_score, end_score, context_start_idx, context_end_idx):
    context_len = context_end_idx - context_start_idx
    score_mat = np.matmul(
        start_score[context_start_idx:context_end_idx].reshape((context_len, 1)),
        end_score[context_start_idx:context_end_idx].reshape((1, context_len)),
    )
    # Reset candidates with end before start.
    score_mat = np.triu(score_mat)
    # Reset long candidates (>16 words).
    score_mat = np.tril(score_mat, 16)
    # Find the best start-end pair.
    max_s, max_e = divmod(score_mat.flatten().argmax(), score_mat.shape[1])
    max_score = score_mat[max_s, max_e]

    return max_score, max_s, max_e

In [21]:
def get_best_answer(question, context):
    # Convert the context string to tokens.
    context_tokens, context_tokens_start_end = tokens.text_to_tokens(text=context.lower(),
                                                                     vocab=vocab)
    # Convert the question string to tokens.
    question_tokens, _ = tokens.text_to_tokens(text=question.lower(), vocab=vocab)

    results = []
    # Iterate through different parts of the context.
    for network_input, padding, start_idx in prepare_input(question_tokens=question_tokens,
                                                           context_tokens=context_tokens):
        # Get output layers.
        output_start_key = compiled_model.output("output_s")
        output_end_key = compiled_model.output("output_e")

        # OpenVINO inference.
        result = compiled_model(network_input)
        # Postprocess the result, getting the score and context range for the answer.
        score_start_end = postprocess(output_start=result[output_start_key][0],
                                      output_end=result[output_end_key][0],
                                      question_tokens=question_tokens,
                                      context_tokens_start_end=context_tokens_start_end,
                                      padding=padding,
                                      start_idx=start_idx)
        results.append(score_start_end)

    # Find the highest score.
    answer = max(results, key=operator.itemgetter(0))
    # Return the part of the context, which is already an answer.
    return context[answer[1]:answer[2]], answer[0]

In [22]:
def run_question_answering(sources, example_question=None):
    print(f"Context: {sources}", flush=True)
    context = load_context(sources)

    if len(context) == 0:
        print("Error: Empty context or outside paragraphs")
        return

    if example_question is not None:
        start_time = time.perf_counter()
        answer, score = get_best_answer(question=example_question, context=context)
        end_time = time.perf_counter()

        print(f"Question: {example_question}")
        print(f"Answer: {answer}")
        print(f"Score: {score:.2f}")
        print(f"Time: {end_time - start_time:.2f}s")
    else:
        while True:
            question = input()
            # if no question - break
            if question == "":
                break

            # measure processing time
            start_time = time.perf_counter()
            answer, score = get_best_answer(question=question, context=context)
            end_time = time.perf_counter()

            print(f"Question: {question}")
            print(f"Answer: {answer}")
            print(f"Score: {score:.2f}")
            print(f"Time: {end_time - start_time:.2f}s")

In [23]:
sources = ["A variety of medications serve diverse purposes in addressing health concerns. Aspirin, functioning as both a pain reliever and a blood thinner, is commonly employed to alleviate pain and inflammation. Ibuprofen, a nonsteroidal anti-inflammatory drug (NSAID), is utilized for pain relief, inflammation reduction, and fever reduction. Acetaminophen is favored for mild to moderate pain relief and fever reduction. Loratadine and cetirizine are antihistamines employed to manage allergy symptoms such as sneezing and itching. Diphenhydramine, another antihistamine, serves dual purposes by alleviating allergies and acting as a sleep aid. Medications like ranitidine, omeprazole, and lansoprazole are used to reduce stomach acid, treating conditions like heartburn and ulcers. Statins such as simvastatin and atorvastatin aim to lower cholesterol levels, decreasing the risk of heart disease. Metformin controls blood sugar levels in type 2 diabetes. Medications like levothyroxine address hypothyroidism by providing thyroid hormone replacement. Prednisone, a corticosteroid, suppresses inflammation and the immune system. Albuterol, a bronchodilator, is employed for asthma and chronic obstructive pulmonary disease (COPD). Sertraline, lisinopril, and amlodipine are medications for depression, hypertension, and high blood pressure, respectively. Anticoagulants like warfarin and antiplatelets like clopidogrel prevent blood clot formation. Hydrochlorothiazide and furosemide are diuretics used for hypertension and fluid retention. Duloxetine, venlafaxine, escitalopram, and citalopram are medications addressing depression and anxiety disorders. Aripiprazole, quetiapine, and olanzapine are atypical antipsychotics for schizophrenia and bipolar disorder. Benzodiazepines like alprazolam and lorazepam are used to manage anxiety and panic disorders. Zolpidem is a sedative-hypnotic for insomnia. Stimulants like methylphenidate treat attention deficit hyperactivity disorder (ADHD). Inhaled corticosteroids such as fluticasone and leukotriene receptor antagonists like montelukast address asthma and allergies. Antidiabetic medications like sitagliptin are used for type 2 diabetes. Mirtazapine, gabapentin, and pregabalin serve purposes ranging from managing depression to treating neuropathic pain. Opioid analgesics like hydrocodone/acetaminophen, oxycodone/acetaminophen, morphine, and tramadol are employed for varying degrees of pain relief. Dextromethorphan is a cough suppressant, guaifenesin is an expectorant, and miconazole is an antifungal medication for coughs, congestion, and fungal infections, respectively. Each medication addresses specific health needs, and their usage should align with professional guidance for optimal effectiveness and safety."]

question = "ranitidine is used for?"

run_question_answering(sources, example_question=question)

Context: ['A variety of medications serve diverse purposes in addressing health concerns. Aspirin, functioning as both a pain reliever and a blood thinner, is commonly employed to alleviate pain and inflammation. Ibuprofen, a nonsteroidal anti-inflammatory drug (NSAID), is utilized for pain relief, inflammation reduction, and fever reduction. Acetaminophen is favored for mild to moderate pain relief and fever reduction. Loratadine and cetirizine are antihistamines employed to manage allergy symptoms such as sneezing and itching. Diphenhydramine, another antihistamine, serves dual purposes by alleviating allergies and acting as a sleep aid. Medications like ranitidine, omeprazole, and lansoprazole are used to reduce stomach acid, treating conditions like heartburn and ulcers. Statins such as simvastatin and atorvastatin aim to lower cholesterol levels, decreasing the risk of heart disease. Metformin controls blood sugar levels in type 2 diabetes. Medications like levothyroxine address h