In [1]:
import os
from huggingface_hub import hf_hub_download

HUGGING_FACE_API_KEY = os.environ.get("HUGGING_FACE_API_KEY")


model_id = "lmsys/fastchat-t5-3b-v1.0"
filenames = [
        "pytorch_model.bin", "added_tokens.json", "config.json", "generation_config.json", 
        "special_tokens_map.json", "spiece.model", "tokenizer_config.json"
]

for filename in filenames:
        downloaded_model_path = hf_hub_download(
                    repo_id=model_id,
                    filename=filename,
                    token=HUGGING_FACE_API_KEY
        )
        # print(downloaded_model_path)

from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, AutoModelForSeq2SeqLM

# tokenizer = AutoTokenizer.from_pretrained(model_id, legacy=False)
from transformers import T5Tokenizer

local_model_dir = "/home/lbwdruid/.cache/huggingface/hub/models--lmsys--fastchat-t5-3b-v1.0/snapshots/0b1da230a891854102d749b93f7ddf1f18a81024"
# tokenizer = T5Tokenizer.from_pretrained(local_model_dir)
tokenizer = T5Tokenizer.from_pretrained(local_model_dir, legacy=False)

model = AutoModelForSeq2SeqLM.from_pretrained(model_id)

pipeline = pipeline("text2text-generation", model=model, device=1, tokenizer=tokenizer, max_length=1000)

In [2]:
prompt = '''I have a radiologist report, which includes where we should treat the patient on the Spine. Anatomically, the bones in the spine are labeled by these labels (these labels are in order, where C1 represents the top bone, and S5 represents the bottom bone): {C1, C2, C3, C4, C5, C6, C7, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, L1, L2, L3, L4, L5, S1, S2, S3, S4, S5}. Could you please help me to extract bone labels from the report below within the quotation marks, and keep the answer to only the labels that I provided? It may include a range of bones, please refer to the list that I provided, and find all bones. No explanation is needed, just the labels separated by commas please. List the bones explicitly one by one with out '-'.

"Plan to treat C2-T5, and T7-MidT9 diease."
'''

response = pipeline(prompt)[0]['generated_text']
print (response)

C2-T5,T7-MidT9 



In [10]:
prompt = '''Human's bones in spine are labeled by these labels 
(these labels are in order, where C1 represents the top bone, and S5 represents the bottom bone): 
{C1, C2, C3, C4, C5, C6, C7, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, L1, L2, L3, L4, L5, S1, S2, S3, S4, S5}.
The '-' in the following sentence represents a range of bones, the letters before it represent the top bone of the range,
and the letters after it represent the bottom bone of the range.
Remove '-' and print every bone in the range one by one explicitly.

"C2-T5,T7-T9"
'''

response = pipeline(prompt)[0]['generated_text']
print (response)

C2-T5,T7-T9 



In [11]:
prompt = '''
Well, how are you?
'''

response = pipeline(prompt)[0]['generated_text']
print (response)

I am doing well, thank you for asking.
