In [1]:
import os
import time

import torch
import platform
import subprocess

import transformers
from transformers import (
    AutoConfig,
    AutoModel,
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    GenerationConfig,
    PretrainedConfig,
    PreTrainedModel,
    PreTrainedTokenizerBase
)
from peft import LoraConfig, PeftModel

import json
from tqdm import tqdm
import re

In [2]:
from torch import cuda, bfloat16
hf_auth = "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"
# hf_auth = os.getenv(key="HUGGINGFACEHUB_API_TOKEN")
device_map = {"": 0}
device = f"cuda:{cuda.current_device()}" if cuda.is_available() else "cpu"

In [3]:
def create_bnb_config():
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
    )

    return bnb_config

In [4]:
def load_model(model_name, bnb_config=None):
    n_gpus = torch.cuda.device_count()
    # max_memory = f"{40960}MB"

    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        # quantization_config=bnb_config,
        # torch_dtype='auto',
        device_map="auto",  # dispatch efficiently the model on the available ressources
        use_auth_token=hf_auth,
        trust_remote_code=True
    )
    tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_auth, trust_remote_code=True)
    # Needed for LLaMA tokenizer
    # tokenizer.pad_token = tokenizer.eos_token
    
    # yi
    # tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
    # model = AutoModelForCausalLM.from_pretrained(
    #     model_name,
    #     device_map="auto",
    #     torch_dtype='auto'
    # ).eval()

    return model, tokenizer

### Load model
```
en:
berkeley-nest/Starling-LM-7B-alpha
openchat/openchat_3.5

ch:
hfl/chinese-alpaca-2-13b
01-ai/Yi-34B-Chat
Xwin-LM/Xwin-LM-13B-V0.2
WizardLM/WizardLM-13B-V1.2
yentinglin/Taiwan-LLM-13B-v2.0-chat
MediaTek-Research/Breeze-7B-Instruct-v0.1
```

In [5]:
model_checkpoint = "yentinglin/Taiwan-LLM-13B-v2.0-chat"
model_name = model_checkpoint.split("/")[-1]

# bnb_config = transformers.BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_quant_type="nf4",
#     bnb_4bit_use_double_quant=True,
#     bnb_4bit_compute_dtype=bfloat16,
# )

model, tokenizer = load_model(model_checkpoint)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


### fine-tune model

In [4]:
model_name = "hfl/chinese-llama-2-13b"
eval_model_name = "train_2023-12-25"
model_checkpoint = f'./LLaMA-Factory/saves/Custom/lora/{eval_model_name}'

base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    # low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map=device_map,
)
model = PeftModel.from_pretrained(base_model, model_checkpoint)
model = model.merge_and_unload()
tokenizer = AutoTokenizer.from_pretrained(model_name)

Loading checkpoint shards: 100%|██████████| 3/3 [00:41<00:00, 13.70s/it]


In [5]:
 tokenizer.pad_token = tokenizer.eos_token

### inference

In [6]:
def generate_response(prompt: str, tokenizer: AutoTokenizer, model):
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    generation_config = GenerationConfig(
        eos_token_id=model.config.eos_token_id,
        pad_token_id=model.config.pad_token_id,
        max_new_tokens=2048,
        # dosample=False,
        temperature=0.3,
        # repetition_penalty=1.1,
        # top_p=0.75,
        # num_beams=5,
        # early_stopping=True,
    )
    outputs = model.generate(
        input_ids=inputs["input_ids"].to("cuda"),
        attention_mask=inputs["attention_mask"],
        generation_config=generation_config,
    )
    
    # yi
    # input_ids = tokenizer.apply_chat_template(conversation=prompt, tokenize=True, add_generation_prompt=True, return_tensors='pt')
    # output_ids = model.generate(
    #                 input_ids=input_ids.to('cuda'),           
    #                 generation_config=generation_config,
    #             )
    # return tokenizer.decode(output_ids[0][input_ids.shape[1]:], skip_special_tokens=True)



    # input_ids = tokenizer(prompt, return_tensors="pt").input_ids
    # outputs = model.generate(
    #     input_ids.cuda(),
    #     max_new_tokens=1024,
    #     pad_token_id=tokenizer.pad_token_id,
    #     eos_token_id=tokenizer.eos_token_id,
    #     temperature=0.3,
    # )
    
    #     Xwin-LM/Xwin-LM-13B-V0.2
    # inputs = tokenizer(prompt, return_tensors="pt")
    # outputs = model.generate(inputs.input_ids.cuda(), generation_config=generation_config)
    return tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)

    # return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [11]:
start_time = time.time()
res = generate_response(prompt, tokenizer, model)
end_time = time.time()
execution_time = end_time - start_time
print('model:', model_name)
print('time:', execution_time)
print(res)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


model: Taiwan-LLM-13B-v2.0-chat
time: 110.29866123199463
{
 "Answer": "陳小姐您的情況下建議可供參考:(1) 飲食應減少兩成，避免食用過量。(2) 避免飲用酒精、咖啡及其他刺激性食品。(3) 維持低鹽狀態，每日鹽份攝取需控制在500-1000mg。(4) 避免吸菸、嚼檳榔。(5) 避免過度運動。(6) 避免過度勞累。(7) 避免過度熬夜。(8) 避免過度曝曬。(9) 避免過度寒冷。(10) 避免過度熱。(11) 避免過度濕潤。(12) 避免過度乾燥。(13) 避免過度寒冷。(14) 避免過度熱。(15) 避免過度濕潤。(16) 避免過度乾燥。(17) 避免過度寒冷。(18) 避免過度熱。(19) 避免過度濕潤。(20) 避免過度乾燥。(21) 避免過度寒冷。(22) 避免過度熱。(23) 避免過度濕潤。(24) 避免過度乾燥。(25) 避免過度寒冷。(26) 避免過度熱。(27) 避免過度濕潤。(28) 避免過度乾燥。(29) 避免過度寒冷。(30) 避免過度熱。(31) 避免過度濕潤。(32) 避免過度乾燥。(33) 避免過度寒冷。(34) 避免過度熱。(35) 避免過度濕潤。(36) 避免過度乾燥。(37) 避免過度寒冷。(38) 避免過度熱。(39) 避免過度濕潤。(40) 避免過度乾燥。(41) 避免過度寒冷。(42) 避免過度熱。(43) 避免過度濕潤。(44) 避免過度乾燥。(45) 避免過度寒冷。(46) 避免過度熱。(47) 避免過度濕潤。(48) 避免過度乾燥。(49) 避免過度寒冷。(50) 避免過度熱。(51) 避免過度濕潤。(52) 避免過度乾燥。(53) 避免過度寒冷。(54) 避免過度熱。(55) 避免過度濕潤。(56) 避免過度乾燥。(57) 避免過度寒冷。(58) 避免過度熱。(59) 避免過度濕潤。(60) 避免過度乾燥。(61) 避免過度寒冷。(62) 避免過度熱。(63) 避免過度濕潤。(64) 避免過度乾燥。(65) �����������度寒冷。(666) ���������度��。(67) �������度�����。(68) �������度�����。(9) �������度��。(10) ��������度�����。(1111) �������度

### prompt

In [9]:
context = '''2. Daily dietary considerations: (1) Take low salt around 1500-2000 mg a day; avoid smoking and alcohol drinking, coffee and stimulated food, etc.  (2) Suggest eat 20% less, don’t eat too mach. (3) Eat more fruits, high fiber food s 20-25gm a day and take vitami n supplements.  (4) Maintain your ideal body weight, and check it every day. (5) Don’t drink water or other bev erages too much (daily water intake according to               what suggested to you when first admitting to the hospital).  (6) Reading a food label for getting know  the “Nutrition Facts” labels on the food package. 3. Notice for daily life: (1) Quit smoking and avoid second -hand smoking. The Nicotine could cause coronary   heart disease, arteriosclerosis  and thrombosis. (2) Keep your mood stable, mood fluctuation could increase the burd en of heart.  (3) Maintain a regular life, go to  bed early and get up early. (4) Increase your exercise loading gradually to your recovery statu s. Avoid heavy lifting and doing stre nuous exercise or', '2. Daily dietary considerations: (1) Take low salt around 1500-2000 mg a day; avoid smoking and alcohol drinking, coffee and stimulated food, etc.  (2) Suggest eat 20% less, don’t eat too mach. (3) Eat more fruits, high fiber food s 20-25gm a day and take vitami n supplements.  (4) Maintain your ideal body weight, and check it every day. (5) Don’t drink water or other bev erages too much (daily water intake according to               what suggested to you when first admitting to the hospital).  (6) Reading a food label for getting know  the “Nutrition Facts” labels on the food package. 3. Notice for daily life: (1) Quit smoking and avoid second -hand smoking. The Nicotine could cause coronary   heart disease, arteriosclerosis  and thrombosis. (2) Keep your mood stable, mood fluctuation could increase the burd en of heart.  (3) Maintain a regular life, go to  bed early and get up early. (4) Increase your exercise loading gradually to your recovery statu s. Avoid heavy lifting and doing stre nuous exercise or', 'thr ough car diac r ehab, including your health car e team, e xercise and nutrition specialists, physical ther apists, and counselors or mental health pr ofessionals.How can I be healthier if I have coronary artery disease?If you have CAD , your health car e team may suggest the follo wing steps to help lo wer your risk for heart attack or worsening heart disease:Lifestyle changes, such as eating a healthier (lo wer sodium, lo wer fat) diet, incr easing physical activity , reaching a healthy weight, and quitting smokingMedicines to tr eat risk factors for CAD , such as high cholester ol, high blood pr essur e, or an irr egular heartbeatSurgical pr ocedur es to help r estor e blood ﬂo w to the heartLast Reviewed: July 19, 2021Source: National Center for Chronic Disease Prevention and Health Promotion , Division for Heart Disease and Stroke Prevention', 'thr ough car diac r ehab, including your health car e team, e xercise and nutrition specialists, physical ther apists, and counselors or mental health pr ofessionals.How can I be healthier if I have coronary artery disease?If you have CAD , your health car e team may suggest the follo wing steps to help lo wer your risk for heart attack or worsening heart disease:Lifestyle changes, such as eating a healthier (lo wer sodium, lo wer fat) diet, incr easing physical activity , reaching a healthy weight, and quitting smokingMedicines to tr eat risk factors for CAD , such as high cholester ol, high blood pr essur e, or an irr egular heartbeatSurgical pr ocedur es to help r estor e blood ﬂo w to the heartLast Reviewed: July 19, 2021Source: National Center for Chronic Disease Prevention and Health Promotion , Division for Heart Disease and Stroke Prevention', 'to quit .Discuss checking your\xa0 cholester ol and triglyceride le vels\xa0 with your health car e provider. Learn mor e about cholester ol.Mak e\xa0healthy food.  Having o verweight or obesity r aises your risk of heart disease. Learn mor e about overweight and obesity .Limit alcohol\xa0 intak e to one drink a day . Learn mor e about alcohol .Lower your str ess le vel\xa0and ﬁnd healthy ways to cope with str ess. Learn mor e about coping with str ess.More InformationCDC’s Public Health E\x00orts Related to Heart DiseaseState and Local Public Health Actions Pr ogramMillion HeartsFor mor e information on men and heart disease, visit the follo wing websites:Centers for Disease Contr ol and Pr eventionAmerican Heart AssociationNational Heart, Lung, and Blood Institute (NHLBI)89®\uf209\uf209\uf2091. National Center for Health Statistics. Multiple Cause of Death 2018–2021 on CDC W ONDER Database . Accessed February 2, 2023.2. Heron M.\xa0 Deaths: leading causes for 2016  [PDF-2.3M].\xa0 Natl Vital Stat Rep . 2018;67(6):1–77. Accessed May 9, 2019.3.'''
user_prompt = '我是王伯伯，80歲，我有高血壓、糖尿病和肺癌，想問平常的飲食跟作息要怎麼調整？'
json_schema='''```
{"properties": {"ANSWER": {"description": "The answer use the context as knowledge to response user question. Keep the answer concise. The full answer of the question including bullet point items. ANSWER must be raw string not list of string.", "title": "Answer", "type": "string"}, "WEBSITES": {"description": "Links or URLs that extracted from the context.", "items": {"type": "string"}, "title": "Websites", "type": "array"}, "NAME": {"description": "The name of the questioner described in user question. If you don't know just leave it blank.", "title": "Name", "type": "string"}, "AGE": {"description": "The age of the questioner described in user question. If you don't know just leave it blank.", "title": "Age", "type": "integer"}, "GENDER": {"description": "The gender of the questioner described in user question. 0 would be female, 1 would be male, 2 would be other. If you don't know just leave it blank.", "title": "Gender", "type": "integer"}, "SYMPTOMS": {"description": "Symptoms that the questioner is currently experiencing described in user question. Each symptom should be classified into separate item in the list.", "items": {"type": "string"}, "title": "Symptoms", "type": "array"}, "MEDICATION": {"description": "Name of the medication the questioner is taking described in user question.", "items": {"type": "string"}, "title": "Medication", "type": "array"}, "INTENTION": {"description": "Use a sentence to describe questioner's intent described in user question.", "title": "Intention", "type": "string"}}, "required": ["ANSWER", "WEBSITES", "NAME", "AGE", "GENDER", "SYMPTOMS", "MEDICATION", "INTENTION"]}
```'''
# json_schema='''```
# {"properties": {"ANSWER": {"description": "The answer use the context as knowledge to response user question. Keep the answer concise. The full answer of the question including bullet point items. ANSWER must be raw string not list of string.", "title": "Answer", "type": "string"}, "NAME": {"description": "The name of the questioner described in user question. If you don't know just leave it blank.", "title": "Name", "type": "string"}, "AGE": {"description": "The age of the questioner described in user question. If you don't know just leave it blank.", "title": "Age", "type": "integer"}, "GENDER": {"description": "The gender of the questioner described in user question. 0 would be female, 1 would be male, 2 would be other. If you don't know just leave it blank.", "title": "Gender", "type": "integer"}, "SYMPTOMS": {"description": "Symptoms that the questioner is currently experiencing described in user question. Each symptom should be classified into separate item in the list.", "items": {"type": "string"}, "title": "Symptoms", "type": "array"}, "MEDICATION": {"description": "Name of the medication the questioner is taking described in user question.", "items": {"type": "string"}, "title": "Medication", "type": "array"}, "INTENTION": {"description": "Use a sentence to describe questioner's intent described in user question.", "title": "Intention", "type": "string"}}, "required": ["ANSWER", "NAME", "AGE", "GENDER", "SYMPTOMS", "MEDICATION", "INTENTION"]}
# ```'''

demo = '''The correct output would be like: {"ANSWER": "...", "NAME": "...", "AGE": 0, "GENDER": 0,"WEBSITES": ["..."], "SYMPTOMS": ["..."], "MEDICATION":["..."], "INTENTION": "..."}'''
format_instruction = f'''1. Output a JSON object only.
2. JSON object must include the filed defined in json schema.
3. The key of the JSON object must be English.
4. The language of string type value in the JSON object must be the same language with the user's question.
5. If you don't know the value of the JSON object just leave it blank.
6. {demo}
'''

In [7]:
sys_prompt = '''Take the provided context past conversation and relevant information as knowledge base to answer user's question from the text below in a JSON format and following the JSON format instruction below:

JSON format instruction: 
{"properties":{"ANSWER":{"description":"The full answer of the question including bullet point items. ANSWER must be raw string not list of string.","title":"Answer","type":"string"},"FAMILY_HISTORY":{"description":"Any known family medical history, such as hypertension or related health issues. Leave blank if unknown.","items":{"type":"string"},"title":"Family History","type":"array"},"SYMPTOMS":{"description":"Any symptoms related to hypertension the patient is experiencing, such as headache, dizziness, chest tightness, etc. Each symptom should be a separate item in the list.","items":{"type":"string"},"title":"Symptoms","type":"array"},"LIFESTYLE":{"description":"Patient's lifestyle factors, including diet habits, exercise routines, smoking, and alcohol consumption, as these can affect blood pressure.","title":"Lifestyle","type":"string"},"MEDICATION_HISTORY":{"description":"Current and past medications, including prescription and over-the-counter drugs that may affect blood pressure.","items":{"type":"string"},"title":"Medication History","type":"array"},"STRESS_AND_MENTAL_HEALTH":{"description":"Information on the patient's stress levels and mental health status, as these can impact blood pressure.","title":"Stress And Mental Health","type":"string"},"REGULAR_CHECKUPS":{"description":"Details of the last blood pressure measurement and any regular checkup habits for monitoring blood pressure.","title":"Regular Checkups","type":"string"}},"required":["ANSWER","FAMILY_HISTORY","SYMPTOMS","LIFESTYLE","MEDICATION_HISTORY","STRESS_AND_MENTAL_HEALTH","REGULAR_CHECKUPS"]}'''

in_context_prompt = '''Context: '2. Daily dietary considerations: (1) Take low salt around 500-1000 mg a day; avoid alcohol drinking, coffee and stimulated food, etc. (2) Suggest eat 20% less, don’t 
Past conversation: The individual has taken blood pressure medication in the morning and reports current blood pressure and blood sugar levels. They are seeking dietary recommendations. AI suggests reducing intake of saturated and trans fats, cholesterol, sodium, and sugar to prevent and manage high cholesterol, lower blood pressure, and control diabetes.王宗柏先生的家族病史顯示母親有糖尿病，這增加了他的遺傳風險。加上他個人有高血壓、血管鈣化和血脂偏高，這些問題可能互相加劇，進一步增加心血管疾病的風險，因此需要密切監控其健康並採取預防措施。
Relevant information: 王宗柏聯絡方式 是 聯絡電話
王宗柏 有 抽菸習慣
王宗柏初診日期 是 2020/05/16
王宗柏BMI 是 27.7
王宗柏 有 吃檳榔習慣
王宗柏家族病史 為 母親患有糖尿病
王宗柏過去病史 為 高血壓、血管鈣化、血脂偏高
王宗柏職業 是 公務人員退休
王宗柏行動電話 是 0913426234
Text: 想問平常的飲食跟作息要怎麼調整。
JSON: {
 "Answer": "王宗柏您的情況下建議可供參考:(1) 維持低鹽狀態，每日鹽份攝取需控制在500-1000mg。請避免飲用酒精、咖啡及其他刺激性食品。(2) 飲食應減少兩成，避免食用過量。",
 "Symptoms": ["高血壓", "血管鈣化", "血脂偏高"],
 "Lifestyle": "抽菸，吃檳榔",
 "Family History": ["糖尿病"], 
 "Medication History": ["高血壓", "血管鈣化", "血脂偏高"],
 "Stress And Mental Health": "無",
 "Regular Checkups": "BMI 27.7，每日上午量測血壓與血糖數值"
}'''

user_prompt = '''Context: '2. Daily dietary considerations: (1) Take low salt around 1500-2000 mg a day; avoid smoking and alcohol drinking, coffee and stimulated food, etc. (2) Suggest eat 20% less, don’t eat too mach. (3) Eat more fruits, high fiber food s 20-25gm a day and take vitami n supplements. (4) Maintain your ideal body weight, and check it every day. (5) Don’t drink water or other bev erages too much (daily water intake according to    what suggested to you when first admitting to the hospital). (6) Reading a food label for getting know the “Nutrition Facts” labels on the food package. 3. Notice for daily life: (1) Quit smoking and avoid second -hand smoking. The Nicotine could cause coronary heart disease, arteriosclerosis and thrombosis. (2) Keep your mood stable, mood fluctuation could increase the burd en of heart. (3) Maintain a regular life, go to bed early and get up early. (4) Increase your exercise loading gradually to your recovery statu s. Avoid heavy lifting and doing stre nuous exercise or', '2. Daily dietary considerations: (1) Take low salt around 1500-2000 mg a day; avoid smoking and alcohol drinking, coffee and stimulated food, etc. (2) Suggest eat 20% less, don’t eat too mach. (3) Eat more fruits, high fiber food s 20-25gm a day and take vitami n supplements. (4) Maintain your ideal body weight, and check it every day. (5) Don’t drink water or other bev erages too much (daily water intake according to    what suggested to you when first admitting to the hospital). (6) Reading a food label for getting know the “Nutrition Facts” labels on the food package. 3. Notice for daily life: (1) Quit smoking and avoid second -hand smoking. The Nicotine could cause coronary heart disease, arteriosclerosis and thrombosis. (2) Keep your mood stable, mood fluctuation could increase the burd en of heart. (3) Maintain a regular life, go to bed early and get up early. (4) Increase your exercise loading gradually to your recovery statu s. Avoid heavy lifting and doing stre nuous exercise or', 'thr ough car diac r ehab, including your health car e team, e xercise and nutrition specialists, physical ther apists, and counselors or mental health pr ofessionals.How can I be healthier if I have coronary artery disease?If you have CAD , your health car e team may suggest the follo wing steps to help lo wer your risk for heart attack or worsening heart disease:Lifestyle changes, such as eating a healthier (lo wer sodium, lo wer fat) diet, incr easing physical activity , reaching a healthy weight, and quittingeat too mach.
Past conversation: 陳小姐的家族病史顯示父親患有大腸癌，這增加了他的遺傳風險。加上他個人有糖尿病，這些問題可能互相加劇，進一步增加心血管疾病的風險，因此需要密切監控其健康並採取預防措施。陳小姐平常有游泳、跳繩習慣。
Relevant information: 陳小姐聯絡方式 是 聯絡電話
陳小姐 無 抽菸習慣
陳小姐初診日期 是 2020/05/16
陳小姐身高 是 156公分
陳小姐體重 是 47公分
陳小姐 無 吃檳榔習慣
陳小姐家族病史 為 父親患有大腸癌
陳小姐家族病史 為 母親患有甲狀線異常
陳小姐過去病史 為 糖尿病
陳小姐職業 是 教師退休
陳小姐行動電話 是 0912345678
Text: 想瞭解如何活的健康。
JSON:'''

### llama-2
#### chinese-alpaca-2
```
[INST] <<SYS>>\nYou are a helpful assistant. 你是一个乐于助人的助手。\n<</SYS>>\n\n请列举5条文明乘车的建议 [/INST]
```

In [9]:
prompt = f'''[INST] <<SYS>>
{sys_prompt}
<</SYS>>

{in_context_prompt}
{user_prompt} [/INST]'''
print(prompt)

[INST] <<SYS>>
Take the provided context past conversation and relevant information as knowledge base to answer user's question from the text below in a JSON format and following the JSON format instruction below:

JSON format instruction: 
{"properties":{"ANSWER":{"description":"The full answer of the question including bullet point items. ANSWER must be raw string not list of string.","title":"Answer","type":"string"},"FAMILY_HISTORY":{"description":"Any known family medical history, such as hypertension or related health issues. Leave blank if unknown.","items":{"type":"string"},"title":"Family History","type":"array"},"SYMPTOMS":{"description":"Any symptoms related to hypertension the patient is experiencing, such as headache, dizziness, chest tightness, etc. Each symptom should be a separate item in the list.","items":{"type":"string"},"title":"Symptoms","type":"array"},"LIFESTYLE":{"description":"Patient's lifestyle factors, including diet habits, exercise routines, smoking, and 

In [8]:
SYSTEM_PROMPT= '''You are an assistant for question-answering tasks.
You are designed to respond a JSON object.'''


# SYSTEM_PROMPT='You are a helpful assistant. 你是一个乐于助人的助手。请你提供专业、有逻辑、内容真实、有价值的详细回复。' # Try this one, if you prefer longer response.
user_prompt = 'Here is user question: 我是王伯伯，80歲，我有高血壓、糖尿病和肺癌，想問平常的飲食跟作息要怎麼調整？'
context = '''Here is data_source: 2. Daily dietary considerations: (1) Take low salt around 1500-2000 mg a day; avoid smoking and alcohol drinking, coffee and stimulated food, etc.  (2) Suggest eat 20% less, don’t eat too mach. (3) Eat more fruits, high fiber food s 20-25gm a day and take vitami n supplements.  (4) Maintain your ideal body weight, and check it every day. (5) Don’t drink water or other bev erages too much (daily water intake according to               what suggested to you when first admitting to the hospital).  (6) Reading a food label for getting know  the “Nutrition Facts” labels on the food package. 3. Notice for daily life: (1) Quit smoking and avoid second -hand smoking. The Nicotine could cause coronary   heart disease, arteriosclerosis  and thrombosis. (2) Keep your mood stable, mood fluctuation could increase the burd en of heart.  (3) Maintain a regular life, go to  bed early and get up early. (4) Increase your exercise loading gradually to your recovery statu s. Avoid heavy lifting and doing stre nuous exercise or', '2. Daily dietary considerations: (1) Take low salt around 1500-2000 mg a day; avoid smoking and alcohol drinking, coffee and stimulated food, etc.  (2) Suggest eat 20% less, don’t eat too mach. (3) Eat more fruits, high fiber food s 20-25gm a day and take vitami n supplements.  (4) Maintain your ideal body weight, and check it every day. (5) Don’t drink water or other bev erages too much (daily water intake according to               what suggested to you when first admitting to the hospital).  (6) Reading a food label for getting know  the “Nutrition Facts” labels on the food package. 3. Notice for daily life: (1) Quit smoking and avoid second -hand smoking. The Nicotine could cause coronary   heart disease, arteriosclerosis  and thrombosis. (2) Keep your mood stable, mood fluctuation could increase the burd en of heart.  (3) Maintain a regular life, go to  bed early and get up early. (4) Increase your exercise loading gradually to your recovery statu s. Avoid heavy lifting and doing stre nuous exercise or', 'thr ough car diac r ehab, including your health car e team, e xercise and nutrition specialists, physical ther apists, and counselors or mental health pr ofessionals.How can I be healthier if I have coronary artery disease?If you have CAD , your health car e team may suggest the follo wing steps to help lo wer your risk for heart attack or worsening heart disease:Lifestyle changes, such as eating a healthier (lo wer sodium, lo wer fat) diet, incr easing physical activity , reaching a healthy weight, and quitting smokingMedicines to tr eat risk factors for CAD , such as high cholester ol, high blood pr essur e, or an irr egular heartbeatSurgical pr ocedur es to help r estor e blood ﬂo w to the heartLast Reviewed: July 19, 2021Source: National Center for Chronic Disease Prevention and Health Promotion , Division for Heart Disease and Stroke Prevention', 'thr ough car diac r ehab, including your health car e team, e xercise and nutrition specialists, physical ther apists, and counselors or mental health pr ofessionals.How can I be healthier if I have coronary artery disease?If you have CAD , your health car e team may suggest the follo wing steps to help lo wer your risk for heart attack or worsening heart disease:Lifestyle changes, such as eating a healthier (lo wer sodium, lo wer fat) diet, incr easing physical activity , reaching a healthy weight, and quitting smokingMedicines to tr eat risk factors for CAD , such as high cholester ol, high blood pr essur e, or an irr egular heartbeatSurgical pr ocedur es to help r estor e blood ﬂo w to the heartLast Reviewed: July 19, 2021Source: National Center for Chronic Disease Prevention and Health Promotion , Division for Heart Disease and Stroke Prevention', 'to quit .Discuss checking your\xa0 cholester ol and triglyceride le vels\xa0 with your health car e provider. Learn mor e about cholester ol.Mak e\xa0healthy food.  Having o verweight or obesity r aises your risk of heart disease. Learn mor e about overweight and obesity .Limit alcohol\xa0 intak e to one drink a day . Learn mor e about alcohol .Lower your str ess le vel\xa0and ﬁnd healthy ways to cope with str ess. Learn mor e about coping with str ess.More InformationCDC’s Public Health E\x00orts Related to Heart DiseaseState and Local Public Health Actions Pr ogramMillion HeartsFor mor e information on men and heart disease, visit the follo wing websites:Centers for Disease Contr ol and Pr eventionAmerican Heart AssociationNational Heart, Lung, and Blood Institute (NHLBI)89®\uf209\uf209\uf2091. National Center for Health Statistics. Multiple Cause of Death 2018–2021 on CDC W ONDER Database . Accessed February 2, 2023.2. Heron M.\xa0 Deaths: leading causes for 2016  [PDF-2.3M].\xa0 Natl Vital Stat Rep . 2018;67(6):1–77. Accessed May 9, 2019.3.'''
format_instruction = '''format_instruction: 返回一個 JSON 格式的物件，其中包含以下幾個字段：
- ANSWER (string)：ANSWER must take data_source as a knowledge to reponse user. must not contain any URLs or links. must be zh-TW.
- NAME（string)：The name extracted from uesr's question. If you don't know just leave it blank.
- AGE (integer)：The questioner's age. If you don't know just leave it blank.
- GENDER (integer)：The user's gender describe in question. Use 0 to represent female, 1 for male, 2 for others. If you don't know just leave it blank.'''

# - WEBSITES (array)：Links or URLs in data_source.
# - SOURCE (string)：source used to answer the user's question, should be documents.

INSTRUCTION = 'Answer in JSON format follow format_instruction.'

ALL_PROMPT= f'''[INST] <<SYS>>
{SYSTEM_PROMPT}
{user_prompt}
{context}
<</SYS>>

{INSTRUCTION}
{format_instruction} [/INST]'''
# ALL_PROMPT = "[INST] <<SYS>>\nYou are a helpful assistant. 你是一个乐于助人的助手。\n<</SYS>>\n\n请列举5条文明乘车的建议 [/INST]"
print(ALL_PROMPT)

[INST] <<SYS>>
You are an assistant for question-answering tasks.
You are designed to respond a JSON object.
Here is user question: 我是王伯伯，80歲，我有高血壓、糖尿病和肺癌，想問平常的飲食跟作息要怎麼調整？
Here is data_source: 2. Daily dietary considerations: (1) Take low salt around 1500-2000 mg a day; avoid smoking and alcohol drinking, coffee and stimulated food, etc.  (2) Suggest eat 20% less, don’t eat too mach. (3) Eat more fruits, high fiber food s 20-25gm a day and take vitami n supplements.  (4) Maintain your ideal body weight, and check it every day. (5) Don’t drink water or other bev erages too much (daily water intake according to               what suggested to you when first admitting to the hospital).  (6) Reading a food label for getting know  the “Nutrition Facts” labels on the food package. 3. Notice for daily life: (1) Quit smoking and avoid second -hand smoking. The Nicotine could cause coronary   heart disease, arteriosclerosis  and thrombosis. (2) Keep your mood stable, mood fluctuation could incr

### Alpaca
```
prompt_input: 
Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n

prompt_no_input: 
Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Response:\n
```

In [89]:
instruction = '''你是問答任務的助手。你的答案需依照format_instruction，回傳一個JSON對象。'''

input_prompt = f'''以下是 format_instruction：請以JSON格式回答，其中包含以下幾個字段：
- ANSWER (string)：將context作為知識回答用戶問題，必須是繁體中文。不得包含任何URL或連結。
- NAME（string)：從用戶問題中提取的名稱。如果不知道，請將其留空。
- AGE（integer)：用戶的年齡。如果不知道，請將其留空。
- GENDER (integer)：用戶的性別。用0代表女性，1代表男性，2代表其他。如果不知道，請將其留空。

以下是用戶問題: {user_prompt}

以下是context: {context}

依照format_instruction的指示回答。'''

# prompt = f'''Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Input:\n{input_prompt}\n\n### Response:\n'''
prompt = f'''Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n{input_prompt}\n\n### Response:\n'''

print(prompt)

Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
你是問答任務的助手。你的答案需依照format_instruction，回傳一個JSON對象。
以下是 format_instruction：請以JSON格式回答，其中包含以下幾個字段：
- ANSWER (string)：將context作為知識回答用戶問題，必須是繁體中文。不得包含任何URL或連結。
- NAME（string)：從用戶問題中提取的名稱。如果不知道，請將其留空。
- AGE（integer)：用戶的年齡。如果不知道，請將其留空。
- GENDER (integer)：用戶的性別。用0代表女性，1代表男性，2代表其他。如果不知道，請將其留空。

以下是用戶問題: 我是王伯伯，80歲，我有高血壓、糖尿病和肺癌，想問平常的飲食跟作息要怎麼調整？

以下是context: 2. Daily dietary considerations: (1) Take low salt around 1500-2000 mg a day; avoid smoking and alcohol drinking, coffee and stimulated food, etc.  (2) Suggest eat 20% less, don’t eat too mach. (3) Eat more fruits, high fiber food s 20-25gm a day and take vitami n supplements.  (4) Maintain your ideal body weight, and check it every day. (5) Don’t drink water or other bev erages too much (daily water intake according to               what suggested to you when first admitting to the hospital).  (6) Reading a food label for 

In [90]:
res = generate_response(prompt, tokenizer, model)
print(res[len(prompt):])

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


您好！我叫王伯伯，今年80岁，患有高血压、糖尿病和肺癌。我的饮食建议如下：每天摄入盐量控制在1500~2000毫克之间；戒烟戒酒，避免咖啡因刺激食物；少吃多餐，每顿饭吃20%的食物；多吃水果蔬菜，保证每日膳食纤维摄入量为20~25克；保持理想体重，并每天都进行测量；适量饮水，不要过量饮用水或其他饮料；阅读食品标签了解营养成分表。此外，注意生活规律，早睡早起，增加运动强度，但不可过度劳累，也不可做剧烈运动。


### Vicuna 
#### Xwin-LM/Xwin-LM-13B-V0.2 WizardLM/WizardLM-13B-V1.2
```
A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: Hi! ASSISTANT: Hello.</s>USER: Who are you? ASSISTANT: I am Xwin-LM.</s>......
```

In [9]:
prompt = f'''{sys_prompt} 
USER: {in_context_prompt}
{user_prompt} ASSISTANT:'''
print(prompt)

Take the provided context past conversation and relevant information as knowledge base to answer user's question from the text below in a JSON format and following the JSON format instruction below:

JSON format instruction: 
{"properties":{"ANSWER":{"description":"The full answer of the question including bullet point items. ANSWER must be raw string not list of string.","title":"Answer","type":"string"},"FAMILY_HISTORY":{"description":"Any known family medical history, such as hypertension or related health issues. Leave blank if unknown.","items":{"type":"string"},"title":"Family History","type":"array"},"SYMPTOMS":{"description":"Any symptoms related to hypertension the patient is experiencing, such as headache, dizziness, chest tightness, etc. Each symptom should be a separate item in the list.","items":{"type":"string"},"title":"Symptoms","type":"array"},"LIFESTYLE":{"description":"Patient's lifestyle factors, including diet habits, exercise routines, smoking, and alcohol consump

#### Taiwan-LLM-13B-v2.0
```
你是人工智慧助理，以下是用戶和人工智能助理之間的對話。你要對用戶的問題提供有用、安全、詳細和禮貌的回答。USER: {user} ASSISTANT:
```

In [10]:
# sys_prompt = f'''你是問答任務的人工智慧助理。你的回答需依照format_instruction的步驟，回傳一個JSON物件。
# context: {context}
# 以下是format_instruction：
# 步驟一、參考context的內容作為知識，回應使用者問題，回答需放在JSON物件中的ANSWER欄位
# 步驟二、生成JSON物件，包含以下欄位：
# - ANSWER (string)：步驟一產生的答案。需使用繁體中文、不可包含URL或連結。
# - NAME（string)：根據問題描述判斷出的使用者姓名。如果不知道，請留空。
# - AGE (integer)：根據問題描述判斷出的使用者年齡。如果不知道，請留空。
# - GENDER (integer)：根據問題描述判斷出的使用者性別。用0代表女性，1代表男性，2代表其他。如果不知道，請留空。'''


# prompt = f'''{sys_prompt}USER: {user_prompt}依照format_instruction的指示，用JSON格式回答。ASSISTANT:'''


# prompt = f'''A chat between a curious user and an artificial intelligence assistant. The assistant need to follow format_instruction to output response.
# format_instruction:
# {format_instruction}
# json schema:
# {json_schema}
# context: {context}
# user question: {user_prompt}
# USER: Follow format_instruction to output a JSON object only. ASSISTANT:'''

prompt = f'''你是人工智慧助理，以下是用戶和人工智能助理之間的對話。你要對用戶的問題提供有用、安全、詳細和禮貌的回答。
{sys_prompt}
USER: {in_context_prompt}
{user_prompt} ASSISTANT:'''

print(prompt)

你是人工智慧助理，以下是用戶和人工智能助理之間的對話。你要對用戶的問題提供有用、安全、詳細和禮貌的回答。
Take the provided context past conversation and relevant information as knowledge base to answer user's question from the text below in a JSON format and following the JSON format instruction below:

JSON format instruction: 
{"properties":{"ANSWER":{"description":"The full answer of the question including bullet point items. ANSWER must be raw string not list of string.","title":"Answer","type":"string"},"FAMILY_HISTORY":{"description":"Any known family medical history, such as hypertension or related health issues. Leave blank if unknown.","items":{"type":"string"},"title":"Family History","type":"array"},"SYMPTOMS":{"description":"Any symptoms related to hypertension the patient is experiencing, such as headache, dizziness, chest tightness, etc. Each symptom should be a separate item in the list.","items":{"type":"string"},"title":"Symptoms","type":"array"},"LIFESTYLE":{"description":"Patient's lifestyle factors, including diet ha

### Mistral
#### MediaTek-Research/Breeze-7B-Instruct-v0.1
```
<s> SYS_PROMPT   [INST] QUERY1 [/INST] RESPONSE1 [INST] QUERY2 [/INST]
```

In [9]:
# prompt = f'''<s> You are a helpful AI assistant built by MediaTek Research. The user you are helping speaks Traditional Chinese and comes from Taiwan. You need to follow format_instruction to output response.
# format_instruction:
# {format_instruction}
# json schema:
# {json_schema}
# context: {context}
# user question: {user_prompt}
# [INST] Follow format_instruction to output a JSON object only. [/INST]'''
prompt = f'''<s> {sys_prompt}
[INST] {in_context_prompt}
{user_prompt} [/INST]'''
print(prompt)

<s> Take the provided context past conversation and relevant information as knowledge base to answer user's question from the text below in a JSON format and following the JSON format instruction below:

JSON format instruction: 
{"properties":{"ANSWER":{"description":"The full answer of the question including bullet point items. ANSWER must be raw string not list of string.","title":"Answer","type":"string"},"FAMILY_HISTORY":{"description":"Any known family medical history, such as hypertension or related health issues. Leave blank if unknown.","items":{"type":"string"},"title":"Family History","type":"array"},"SYMPTOMS":{"description":"Any symptoms related to hypertension the patient is experiencing, such as headache, dizziness, chest tightness, etc. Each symptom should be a separate item in the list.","items":{"type":"string"},"title":"Symptoms","type":"array"},"LIFESTYLE":{"description":"Patient's lifestyle factors, including diet habits, exercise routines, smoking, and alcohol con

### (GPT4 Correct) template
### openchat/openchat_3.5 Starling-RM-7B-alpha
```
GPT4 Correct User: Hello<|end_of_turn|>GPT4 Correct Assistant: Hi<|end_of_turn|>GPT4 Correct User: How are you today?<|end_of_turn|>GPT4 Correct Assistant:
```

In [10]:
prompt = f'''GPT4 Correct User: {sys_prompt} {in_context_prompt} {user_prompt} <|end_of_turn|>GPT4 Correct Assistant:'''
print(prompt)
# Single-turn conversation
# prompt = "Hello, how are you?"
# single_turn_prompt = f"GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:"
# single_turn_prompt

GPT4 Correct User: Take the provided context past conversation and relevant information as knowledge base to answer user's question from the text below in a JSON format and following the JSON format instruction below:

JSON format instruction: 
{"properties":{"ANSWER":{"description":"The full answer of the question including bullet point items. ANSWER must be raw string not list of string.","title":"Answer","type":"string"},"FAMILY_HISTORY":{"description":"Any known family medical history, such as hypertension or related health issues. Leave blank if unknown.","items":{"type":"string"},"title":"Family History","type":"array"},"SYMPTOMS":{"description":"Any symptoms related to hypertension the patient is experiencing, such as headache, dizziness, chest tightness, etc. Each symptom should be a separate item in the list.","items":{"type":"string"},"title":"Symptoms","type":"array"},"LIFESTYLE":{"description":"Patient's lifestyle factors, including diet habits, exercise routines, smoking, 

### ChatML template
#### Yi, Qwen-14B-Chat, internlm/internlm2-chat-20b
```
<|im_start|>system
{system_message}<|im_end|>
<|im_start|>user
{prompt}<|im_end|>
<|im_start|>assistant
```

In [10]:
# prompt = f'''<|im_start|>system
# You are a helpful assistant. You need to follow format_instruction to output your response.
# format_instruction:
# 1. Output a JSON object only.
# 2. JSON object must include the filed defined in json schema.
# 3. The key of the JSON object must be English.
# 4. The language of string type value in the JSON object must be zh-TW.
# 5. {demo}
# json schema:
# {json_schema}
# context: {context}
# user question: {user_prompt}
# <|im_start|>user
# Please follow format_instruction to output a JSON object only.<|im_end|>
# <|im_start|>assistant'''

# prompt = f'''<|im_start|>system
# 你是一个乐于助人的助手。请你提供专业、有逻辑、内容真实、有价值的详细回复。<|im_end|>
# <|im_start|>user
# 我是王伯伯，80歲，我有高血壓、糖尿病和肺癌，想問平常的飲食跟作息要怎麼調整？<|im_end|>
# <|im_start|>assistant'''

prompt = f'''<|im_start|>system
{sys_prompt}<|im_end|>
<|im_start|>user
{in_context_prompt}
{user_prompt} <|im_end|>
<|im_start|>assistant'''

print(prompt)

<|im_start|>system
Take the provided context past conversation and relevant information as knowledge base to answer user's question from the text below in a JSON format and following the JSON format instruction below:

JSON format instruction: 
{"properties":{"ANSWER":{"description":"The full answer of the question including bullet point items. ANSWER must be raw string not list of string.","title":"Answer","type":"string"},"FAMILY_HISTORY":{"description":"Any known family medical history, such as hypertension or related health issues. Leave blank if unknown.","items":{"type":"string"},"title":"Family History","type":"array"},"SYMPTOMS":{"description":"Any symptoms related to hypertension the patient is experiencing, such as headache, dizziness, chest tightness, etc. Each symptom should be a separate item in the list.","items":{"type":"string"},"title":"Symptoms","type":"array"},"LIFESTYLE":{"description":"Patient's lifestyle factors, including diet habits, exercise routines, smoking, 

In [19]:
res = generate_response(prompt, tokenizer, model)
# print(res[len(prompt):])
print(res)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



JSON: {
 "Answer": "陳小姐您的情況下建議可供參考:(1) 維持健康的生活方式，包括均衡的飲食、適量的運動、保持理想的體重和定期健康檢查。(2) 由於您有糖尿病，需要嚴格控制血糖水平，遵循醫生建議的飲食計劃和藥物治療。(3) 如果有家族性的大腸癌病史，建議定期進行大腸癌篩檢。(4) 保持心態平和，避免過度壓力，因為壓力可能會影響血糖控制和心血管健康。(5) 如果您有游泳和跳繩的習慣，請在醫生指導下進行，並逐漸增加運動量以適應您的康復狀態。(6) 避免二手煙暴露，因為尼古丁可能會導致心臟病、動脈硬化和血栓形成。(7) 保持規律的生活作息，早睡早起。(8) 與您的醫療團隊合作，包括醫生、營養師、物理治療師和心理健康專家，以制定適合您的康復計劃。",
 "Symptoms": [],
 "Lifestyle": "游泳、跳繩",
 "Family History": ["大腸癌", "甲狀腺異常"],
 "Medication History": ["糖尿病"],
 "Stress And Mental Health": "無",
 "Regular Checkups": "身高156公分，體重47公分，每日上午量測血糖數值"
}
```

```json
{
 "Answer": "王宗柏您的情況下建議可供參考:(1) 維持低鹽狀態，每日鹽份攝取需控制在500-1000mg。請避免飲用酒精、咖啡及其他刺激性食品。(2) 飲食應減少兩成，避免食用過量。",
 "Symptoms": ["高血壓", "血管鈣化", "血脂偏高"],
 "Lifestyle": "抽菸，吃檳榔",
 "Family History": ["糖尿病"],
 "Medication History": ["高血壓", "血管鈣化", "血脂偏高"],
 "Stress And Mental Health": "無",
 "Regular Checkups": "BMI 27.7，每日上午量測血壓與血糖數值"
}
```
```json
{
 "Answer": "陳小姐您的情況下建議可供參考:(1) 維持健康的生活方式，包括均衡的飲食、適量的運動、保持理想的體重和定期健康檢查。(2) 由於您有糖尿病，需要嚴格控制血糖水平，遵循醫生建議的飲食計劃和藥物治療。(3) 如果有家族性

### Orion-14B
```
single-turn: <s>Human: Hello!\n\nAssistant: </s>
multi-turn:  <s>Human: Hello!\n\nAssistant: </s>Hi!</s>Human: How are you?\n\nAssistant: </s>I'm fine</s>
```

In [28]:
prompt = f'''<s>Human: {sys_prompt}
{in_context_prompt}
{user_prompt}

Assistant:'''
print(prompt)

<s>Human: Take the provided context past conversation and relevant information as knowledge base to answer user's question from the text below in a JSON format and following the JSON format instruction below:

JSON format instruction: 
{"properties":{"ANSWER":{"description":"The full answer of the question including bullet point items. ANSWER must be raw string not list of string.","title":"Answer","type":"string"},"FAMILY_HISTORY":{"description":"Any known family medical history, such as hypertension or related health issues. Leave blank if unknown.","items":{"type":"string"},"title":"Family History","type":"array"},"SYMPTOMS":{"description":"Any symptoms related to hypertension the patient is experiencing, such as headache, dizziness, chest tightness, etc. Each symptom should be a separate item in the list.","items":{"type":"string"},"title":"Symptoms","type":"array"},"LIFESTYLE":{"description":"Patient's lifestyle factors, including diet habits, exercise routines, smoking, and alcoh