In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
# Microsoft's BioGPT Large fine-tuned on ChatDoctor dataset for Question Answering.

In [3]:
model_id = "Narrativaai/BioGPT-Large-finetuned-chatdoctor"

tokenizer = AutoTokenizer.from_pretrained("microsoft/BioGPT-Large")

model = AutoModelForCausalLM.from_pretrained(model_id)

In [4]:
def answer_question(
    model,
    tokenizer,
    prompt,
    temperature=0.1,
    top_p=0.75,
    top_k=40,
    num_beams=2,
    **kwargs,
):
    device = "cuda" if torch.cuda.is_available() else "cpu"

    inputs = tokenizer(prompt, return_tensors="pt")
    input_ids = inputs["input_ids"].to(device)
    attention_mask = inputs["attention_mask"].to(device)

    generation_config = GenerationConfig(
        temperature=temperature,
        top_p=top_p,
        top_k=top_k,
        num_beams=num_beams,
        **kwargs,
    )

    model.to(device)
    with torch.no_grad():
        generation_output = model.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,
            generation_config=generation_config,
            return_dict_in_generate=True,
            output_scores=True,
            max_new_tokens=512,
            eos_token_id=tokenizer.eos_token_id,
        )

    s = generation_output.sequences[0]
    output = tokenizer.decode(s, skip_special_tokens=True)
    return output.split(" Response:")[1]


In [5]:
def generate_response(question):
    example_prompt = f"""
    Below is an instruction that describes a task, paired with an input that provides further context.Write a response that appropriately completes the request.
    
    ### Instruction:
    If you are a doctor, please answer the medical questions based on the patient's description.
    
    ### Input:
    {question}
    
    ### Response:
    """
    response = answer_question(model, tokenizer, example_prompt)
    return response

In [6]:
import gradio as gr

In [7]:
iface = gr.Interface(
    fn=generate_response,
    inputs="text",
    outputs="text",
    title="Medical Question Responder",
    description="Enter your medical question and get a response.",
    examples=[
        ["Patient describes symptoms..."],
        ["What are the treatment options for..."],
    ],
)

iface.launch(share=True)

Running on local URL:  http://127.0.0.1:7860
Running on public URL: https://e143bba126fd828bee.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


