In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/llama-3/transformers/8b-chat-hf/1/model.safetensors.index.json
/kaggle/input/llama-3/transformers/8b-chat-hf/1/model-00003-of-00004.safetensors
/kaggle/input/llama-3/transformers/8b-chat-hf/1/config.json
/kaggle/input/llama-3/transformers/8b-chat-hf/1/LICENSE
/kaggle/input/llama-3/transformers/8b-chat-hf/1/model-00001-of-00004.safetensors
/kaggle/input/llama-3/transformers/8b-chat-hf/1/model.py
/kaggle/input/llama-3/transformers/8b-chat-hf/1/USE_POLICY.md
/kaggle/input/llama-3/transformers/8b-chat-hf/1/tokenizer.json
/kaggle/input/llama-3/transformers/8b-chat-hf/1/tokenizer_config.json
/kaggle/input/llama-3/transformers/8b-chat-hf/1/example_text_completion.py
/kaggle/input/llama-3/transformers/8b-chat-hf/1/test_tokenizer.py
/kaggle/input/llama-3/transformers/8b-chat-hf/1/requirements.txt
/kaggle/input/llama-3/transformers/8b-chat-hf/1/tokenizer.py
/kaggle/input/llama-3/transformers/8b-chat-hf/1/model-00004-of-00004.safetensors
/kaggle/input/llama-3/transformers/8b-chat-hf

In [2]:
%%capture
%pip install -U transformers peft trl accelerate datasets bitsandbytes wandb

In [3]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import (
    LoraConfig,
    PeftModel,
    prepare_model_for_kbit_training,
    get_peft_model,
)
import os, torch, wandb
from datasets import load_dataset
from trl import SFTTrainer, setup_chat_format

In [4]:
from huggingface_hub import login
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()

hf_token = user_secrets.get_secret("HUGGINGFACE_TOKEN")
login(token = hf_token)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [5]:
wb_token = user_secrets.get_secret("wandb")

wandb.login(key=wb_token)
run = wandb.init(
    project='Fine-tune Llama 3 8B on Medical Exam Dataset', 
    job_type="training", 
    anonymous="allow"
)

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mrii_01[0m ([33mrii_01-iiitl[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Tracking run with wandb version 0.18.3
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/kaggle/working/wandb/run-20241017_172249-99s3cbuc[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33mresilient-bush-4[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/rii_01-iiitl/Fine-tune%20Llama%203%208B%20on%20Medical%20Exam%20Dataset[0m
[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/rii_01-iiitl/Fine-tune%20Llama%203%208B%20on%20Medical%

In [6]:
base_model = "/kaggle/input/llama-3/transformers/8b-chat-hf/1"
dataset_name = "GBaker/MedQA-USMLE-4-options"
new_model = "llama-3-8b-MedExam-bot"

In [7]:
if torch.cuda.get_device_capability()[0] >= 8:
    !pip install -qqq flash-attn
    torch_dtype = torch.bfloat16
    attn_implementation = "flash_attention_2"
else:
    torch_dtype = torch.float16
    attn_implementation = "eager"

In [8]:
# QLora config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)

# Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=attn_implementation
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [9]:
# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']
)
model, tokenizer = setup_chat_format(model, tokenizer)
model = get_peft_model(model, peft_config)

In [10]:
dataset = load_dataset(dataset_name, split="train")
dataset

README.md:   0%|          | 0.00/654 [00:00<?, ?B/s]

phrases_no_exclude_train.jsonl:   0%|          | 0.00/16.2M [00:00<?, ?B/s]

phrases_no_exclude_test.jsonl:   0%|          | 0.00/2.08M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/10178 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1273 [00:00<?, ? examples/s]

Dataset({
    features: ['question', 'answer', 'options', 'meta_info', 'answer_idx', 'metamap_phrases'],
    num_rows: 10178
})

In [11]:
# Function to combine question and options
def combine_question_options(example):
    options_str = "\n".join([f"{key}: {value}" for key, value in example['options'].items()])
    return f"{example['question']}\n{options_str}"

dataset = dataset.map(lambda x: {'combined': combine_question_options(x)})

print(dataset['combined'][0]) 


Map:   0%|          | 0/10178 [00:00<?, ? examples/s]

A 23-year-old pregnant woman at 22 weeks gestation presents with burning upon urination. She states it started 1 day ago and has been worsening despite drinking more water and taking cranberry extract. She otherwise feels well and is followed by a doctor for her pregnancy. Her temperature is 97.7°F (36.5°C), blood pressure is 122/77 mmHg, pulse is 80/min, respirations are 19/min, and oxygen saturation is 98% on room air. Physical exam is notable for an absence of costovertebral angle tenderness and a gravid uterus. Which of the following is the best treatment for this patient?
A: Ampicillin
B: Ceftriaxone
C: Doxycycline
D: Nitrofurantoin


In [12]:
# Function to format the dataset into a chat template for Llama3
def format_chat_template(row):
    row_json = [
        {"role": "user", "content": row["combined"]},  
        {"role": "assistant", "content": row["answer"]}  
    ]
    row["text"] = tokenizer.apply_chat_template(row_json, tokenize=False)
    return row

dataset = dataset.map(format_chat_template, num_proc=4)

  self.pid = os.fork()


Map (num_proc=4):   0%|          | 0/10178 [00:00<?, ? examples/s]

In [13]:
dataset[0]
dataset = dataset.remove_columns([col for col in dataset.column_names if col not in ['text', 'answer']])

print(dataset[0])

{'answer': 'Nitrofurantoin', 'text': '<|im_start|>user\nA 23-year-old pregnant woman at 22 weeks gestation presents with burning upon urination. She states it started 1 day ago and has been worsening despite drinking more water and taking cranberry extract. She otherwise feels well and is followed by a doctor for her pregnancy. Her temperature is 97.7°F (36.5°C), blood pressure is 122/77 mmHg, pulse is 80/min, respirations are 19/min, and oxygen saturation is 98% on room air. Physical exam is notable for an absence of costovertebral angle tenderness and a gravid uterus. Which of the following is the best treatment for this patient?\nA: Ampicillin\nB: Ceftriaxone\nC: Doxycycline\nD: Nitrofurantoin<|im_end|>\n<|im_start|>assistant\nNitrofurantoin<|im_end|>\n'}


In [14]:
dataset['text'][0]

'<|im_start|>user\nA 23-year-old pregnant woman at 22 weeks gestation presents with burning upon urination. She states it started 1 day ago and has been worsening despite drinking more water and taking cranberry extract. She otherwise feels well and is followed by a doctor for her pregnancy. Her temperature is 97.7°F (36.5°C), blood pressure is 122/77 mmHg, pulse is 80/min, respirations are 19/min, and oxygen saturation is 98% on room air. Physical exam is notable for an absence of costovertebral angle tenderness and a gravid uterus. Which of the following is the best treatment for this patient?\nA: Ampicillin\nB: Ceftriaxone\nC: Doxycycline\nD: Nitrofurantoin<|im_end|>\n<|im_start|>assistant\nNitrofurantoin<|im_end|>\n'

In [15]:
dataset = dataset.train_test_split(test_size=0.2)

In [16]:
dataset

DatasetDict({
    train: Dataset({
        features: ['answer', 'text'],
        num_rows: 8142
    })
    test: Dataset({
        features: ['answer', 'text'],
        num_rows: 2036
    })
})

In [17]:
#Hyperparamter
training_arguments = TrainingArguments(
    output_dir=new_model,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=4,
    optim="paged_adamw_32bit",
    num_train_epochs=1,
    evaluation_strategy="steps",
    eval_steps=0.2,
    logging_steps=25,
    warmup_steps=10,
    logging_strategy="steps",
    learning_rate=2e-4,
    fp16=True,
    bf16=False,
    group_by_length=True,
    report_to="wandb"
)



In [18]:
# SFT parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    peft_config=peft_config,
    max_seq_length= 512,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    packing= False,
)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/8142 [00:00<?, ? examples/s]

Map:   0%|          | 0/2036 [00:00<?, ? examples/s]

In [19]:
trainer.train()



Step,Training Loss,Validation Loss
407,1.4421,1.344841
814,1.2961,1.308685
1221,1.363,1.280279
1628,1.1995,1.262047
2035,1.3077,1.251884




TrainOutput(global_step=2035, training_loss=1.3135640024845958, metrics={'train_runtime': 10992.0176, 'train_samples_per_second': 0.741, 'train_steps_per_second': 0.185, 'total_flos': 7.973508345729024e+16, 'train_loss': 1.3135640024845958, 'epoch': 0.9997543601080816})

In [20]:
trainer.model.save_pretrained(new_model)
wandb.finish()
model.config.use_cache = True
model.eval()

[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:               eval/loss █▅▃▂▁
[34m[1mwandb[0m:            eval/runtime ▃▅▁▅█
[34m[1mwandb[0m: eval/samples_per_second ▇▅█▅▁
[34m[1mwandb[0m:   eval/steps_per_second ▇▅█▅▁
[34m[1mwandb[0m:             train/epoch ▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇████
[34m[1mwandb[0m:       train/global_step ▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇█████
[34m[1mwandb[0m:         train/grad_norm ▂▃▄▂▂▃▃▁▄▃▃▁▁▃▁▁▂▁▃▁▁▄▃▃▂█▂▁▁▃▁▁▁▃▁▃▃▁▂▁
[34m[1mwandb[0m:     train/learning_rate █████▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▃▂▂▂▂▁▁
[34m[1mwandb[0m:              train/loss █▄▅▄▄▄▄▃▄▂▄▂▃▃▄▃▃▂▃▃▃▂▂▂▄▃▁▂▂▃▁▃▂▃▁▂▃▁▃▃
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:                eval/loss 1.25188
[34m[1mwandb[0m:             eval/runtime 817.3045
[34m[1mwandb[0m:  eval/samples_per_second 2.491
[34m[1m

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(128258, 4096)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lor

In [21]:
trainer.model.push_to_hub(new_model, use_temp_dir=False)

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/2.27G [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/riddhi01/llama-3-8b-MedExam-bot/commit/bcc22a50f4e2c50fca8fd24d392b503e19191836', commit_message='Upload model', commit_description='', oid='bcc22a50f4e2c50fca8fd24d392b503e19191836', pr_url=None, repo_url=RepoUrl('https://huggingface.co/riddhi01/llama-3-8b-MedExam-bot', endpoint='https://huggingface.co', repo_type='model', repo_id='riddhi01/llama-3-8b-MedExam-bot'), pr_revision=None, pr_num=None)

In [22]:
tokenizer.save_pretrained(new_model)
tokenizer.push_to_hub(new_model)

No files have been modified since last commit. Skipping to prevent empty commit.


CommitInfo(commit_url='https://huggingface.co/riddhi01/llama-3-8b-MedExam-bot/commit/bcc22a50f4e2c50fca8fd24d392b503e19191836', commit_message='Upload tokenizer', commit_description='', oid='bcc22a50f4e2c50fca8fd24d392b503e19191836', pr_url=None, repo_url=RepoUrl('https://huggingface.co/riddhi01/llama-3-8b-MedExam-bot', endpoint='https://huggingface.co', repo_type='model', repo_id='riddhi01/llama-3-8b-MedExam-bot'), pr_revision=None, pr_num=None)

In [23]:
messages = [{"role": "user", "content": 
"""
A 39-year-old woman is brought to the emergency department because of fevers, chills, and left lower quadrant pain. Her temperature is 39.1°C (102.3°F), pulse is 126/min, respirations are 28/min, and blood pressure is 80/50 mm Hg. There is blood oozing around the site of a peripheral intravenous line. Pelvic examination shows mucopurulent discharge from the cervical os and left adnexal tenderness. Laboratory studies show: Platelet count 14,200/mm3 Fibrinogen 83 mg/mL (N = 200–430 mg/dL) D-dimer 965 ng/mL (N < 500 ng/mL) When phenol is applied to a sample of the patient's blood at 90°C, a phosphorylated N-acetylglucosamine dimer with 6 fatty acids attached to a polysaccharide side chain is identified. A blood culture is most likely to show which of the following? 
A: "Coagulase-positive, gram-positive cocci forming mauve-colored colonies on methicillin-containing agar"
B: "Encapsulated, gram-negative coccobacilli forming grey-colored colonies on charcoal blood agar"
C: "Spore-forming, gram-positive bacilli forming yellow colonies on casein agar"
D: "Lactose-fermenting, gram-negative rods forming pink colonies on MacConkey agar
"""}]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    
inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")

outputs = model.generate(**inputs, max_length=500, num_return_sequences=1)

text = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(text.split("assistant")[1])

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)



"Encapsulated, gram-negative coccobacilli forming grey-colored colonies on charcoal blood agar"  Encapsulated, gram-negative coccobacilli forming grey-colored colonies on charcoal blood agar"  Encapsulated, gram-negative coccobacilli forming grey-colored colonies on charcoal blood agar"  Encapsulated, gram-negative coccobacilli forming grey-colored colonies on charcoal blood agar"  Encapsulated, gram-negative coccobacilli forming grey-colored colonies on charcoal blood agar"  Encapsulated, gram-negative coccobacilli forming grey-colored colonies on charcoal blood agar"  Encapsulated, gram-negative coccobacilli forming grey-colored colonies on charcoal blood agar"  Encapsulated, gram-negative coccobacilli forming grey-colored colonies on charcoal blood agar"  Encapsulated, gram-negative coccobacilli forming grey-colored colonies on charcoal blood agar"  Encaps


In [24]:
messages = [{"role": "user", "content": 
"""A 35-year-old man comes to the physician because of itchy, watery eyes for the past week. He has also been sneezing multiple times a day during this period. He had a similar episode 1 year ago around springtime. He has iron deficiency anemia and ankylosing spondylitis. Current medications include ferrous sulfate, artificial tear drops, and indomethacin. He works as an elementary school teacher. His vital signs are within normal limits. Visual acuity is 20/20 without correction. Physical examination shows bilateral conjunctival injection with watery discharge. The pupils are 3 mm, equal, and reactive to light. Examination of the anterior chamber of the eye is unremarkable. Which of the following is the most appropriate treatment?
"A": "Erythromycin ointment", "B": "Ketotifen eye drops", "C": "Warm compresses", "D": "Fluorometholone eye drops"
"""}]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    
inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")

outputs = model.generate(**inputs, max_length=500, num_return_sequences=1)

text = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(text.split("assistant")[1])


Ketotifen eye drops
"Ketotifen eye drops" is the most appropriate treatment for this patient. It is a non-steroidal anti-inflammatory drug that is used to treat allergic conjunctivitis. It is also used to treat dry eye syndrome. It is a first-line treatment for allergic conjunctivitis. It is also used to treat dry eye syndrome. It is a first-line treatment for allergic conjunctivitis. It is also used to treat dry eye syndrome. It is a first-line treatment for allergic conjunctivitis. It is also used to treat dry eye syndrome. It is a first-line treatment for allergic conjunctivitis. It is also used to treat dry eye syndrome. It is a first-line treatment for allergic conjunctivitis. It is also used to treat dry eye syndrome. It is a first-line treatment for allergic conjunctivitis. It is also used to treat dry eye syndrome. It is a first-line treatment for allergic conjunctivitis. It is also used to treat dry eye syndrome. It is a first-line treatment for allergic conjunctivitis. It is

In [25]:
messages = [{"role": "user", "content": 
"""
A 24-year-old G2P1 woman at 39 weeks’ gestation presents to the emergency department complaining of painful contractions occurring every 10 minutes for the past 2 hours, consistent with latent labor. She says she has not experienced vaginal discharge, bleeding, or fluid leakage, and is currently taking no medications. On physical examination, her blood pressure is 110/70 mm Hg, heart rate is 86/min, and temperature is 37.6°C (99.7°F). She has had little prenatal care and uses condoms inconsistently. Her sexually transmitted infections status is unknown. As part of the patient’s workup, she undergoes a series of rapid screening tests that result in the administration of zidovudine during delivery. The infant is also given zidovudine to reduce the risk of transmission. A confirmatory test is then performed in the mother to confirm the diagnosis of HIV. Which of the following is most true about the confirmatory test?
"A": "It is a Southwestern blot, identifying the presence of DNA-binding proteins", "B": "It is a Northern blot, identifying the presence of RNA", "C": "It is a Northern blot, identifying the presence of DNA", "D": "It is an HIV-1/HIV2 antibody differentiation immunoassay"
"""}]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    
inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")

outputs = model.generate(**inputs, max_length=500, num_return_sequences=1)

text = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(text.split("assistant")[1])


"It is an HIV-1/HIV2 antibody differentiation immunoassay"


In [26]:
messages = [{"role": "user", "content": 
"""
A 20-year-old man comes to the physician because of worsening gait unsteadiness and bilateral hearing loss for 1 month. He has had intermittent tingling sensations on both cheeks over this time period. He has no history of serious medical illness and takes no medications. Audiometry shows bilateral sensorineural hearing loss. Genetic evaluation shows a mutation of a tumor suppressor gene on chromosome 22 that encodes merlin. This patient is at increased risk for which of the following conditions?
"A": "Renal cell carcinoma", "B": "Meningioma", "C": "Astrocytoma", "D": "Vascular malformations"
"""}]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    
inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")

outputs = model.generate(**inputs, max_length=500, num_return_sequences=1)

text = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(text.split("assistant")[1])


Meningioma
A: "Renal cell carcinoma", "B": "Meningioma", "C": "Astrocytoma", "D": "Vascular malformations"  B: "Meningioma"  C: "Astrocytoma"  D: "Vascular malformations" 


In [27]:
messages = [{"role": "user", "content": 
"""
A 59-year-old overweight woman presents to the urgent care clinic with the complaint of severe abdominal pain for the past 2 hours. She also complains of a dull pain in her back with nausea and vomiting several times. Her pain has no relation with food. Her past medical history is significant for recurrent abdominal pain due to cholelithiasis. Her father died at the age of 60 with some form of abdominal cancer. Her temperature is 37°C (98.6°F), respirations are 15/min, pulse is 67/min, and blood pressure is 122/98 mm Hg. Physical exam is unremarkable. However, a CT scan of the abdomen shows a calcified mass near her gallbladder. Which of the following diagnoses should be excluded first in this patient?
A": "Acute cholecystitis", "B": "Gallbladder cancer", "C": "Choledocholithiasis", "D": "Pancreatitis"
"""}]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    
inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")

outputs = model.generate(**inputs, max_length=500, num_return_sequences=1)

text = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(text.split("assistant")[1])


"Acute cholecystitis" and "Choledocholithiasis" are the most likely causes of this patient’s symptoms. However, gallbladder cancer should be excluded first. Gallbladder cancer is a more serious condition that requires immediate attention. Pancreatitis is a less likely cause of this patient’s symptoms. The patient’s age and family history of abdominal cancer are risk factors for gallbladder cancer. The patient’s symptoms are not related to food, which is a common symptom of acute cholecystitis. The patient’s symptoms are not related to the passage of a stone, which is a common symptom of choledocholithiasis. The patient’s symptoms are not related to the passage of a stone, which is a common symptom of choledocholithiasis. The patient’s symptoms are not related to the passage of a stone, which is a common symptom of choledocholithiasis. The patient’s symptoms are not related to the passage of a stone, which is a common symptom of choledocholithiasis. The patient’s symptoms are not relat

In [28]:
messages = [{"role": "user", "content": 
"""
A 17-year-old football player with no significant past medical history, social history, or family history presents to his pediatrician with itching in his groin. He says this started during summer workouts leading up to this season. He denies having any rash anywhere else on his body. The blood pressure is 123/78 mm Hg, pulse is 67/min, respiratory rate is 15/min, and temperature is 38.1°C (98.7°F). Physical examination reveals an erythematous, well-demarcated patch with peripheral scale on the left thigh, pubic region, and perineum. There is no apparent scrotal involvement with the rash. How can you confirm the suspected diagnosis?
"A": "KOH examination of lesion scrapings", "B": "Nikolsky's sign on physical exam", "C": "Gram stain of skin scrapings", "D": "AFB stain of skin scrapings"
"""}]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    
inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")

outputs = model.generate(**inputs, max_length=500, num_return_sequences=1)

text = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(text.split("assistant")[1])


"KOH examination of lesion scrapings"  "B": "Nikolsky's sign on physical exam"  "C": "Gram stain of skin scrapings"  "D": "AFB stain of skin scrapings"  "


In [29]:
messages = [{"role": "user", "content": 
"""
A 24-year-old man is brought to the emergency department 15 minutes after he sustained a stab wound to the left chest just below the clavicle. On arrival, he has rapid, shallow breathing and appears anxious. His pulse is 135/min, respirations are 30/min and shallow, and palpable systolic blood pressure is 80 mm Hg. He is intubated and mechanically ventilated. Infusion of 0.9% saline is begun. Five minutes later, his pulse is 133/min and blood pressure is 82/45 mm Hg. Examination shows no active external bleeding. There is a 2.5-cm single stab wound to the left chest at the 4th intercostal space at the midclavicular line. Cardiovascular examination shows muffled heart sounds and jugular venous distention. Breath sounds are normal. Further evaluation of this patient is most likely to show which of the following findings?
"A": "Tracheal deviation toward the right side", "B": "Hemoptysis", "C": "A drop in systolic blood pressure of 14 mmHg during inspiration", "D": "Paradoxical motion of part of the chest with breathing\n\""
"""}]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    
inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")

outputs = model.generate(**inputs, max_length=500, num_return_sequences=1)

text = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(text.split("assistant")[1])


A drop in systolic blood pressure of 14 mmHg during inspiration" and "Paradoxical motion of part of the chest with breathing" and "Tracheal deviation toward the right side" and "Hemoptysis" and "Jugular venous distention" and "Muffled heart sounds" and "Normal breath sounds" and "Palpable systolic blood pressure of 80 mm Hg" and "Rapid, shallow breathing" and "Respirations of 30/min" and "Systolic blood pressure of 82/45 mm Hg" and "Tachycardia of 135/min" and "Tachypnea of 30/min" and "Ventilation of 30/min" and "Ventilation of 30/min" and "Ventilation of 30/min" and "Ventilation of 30/min" and "Ventilation of 30/min" and "Ventilation of 30/min" and "Ventilation of 30/min" and "Ventilation of 30/min" and "Ventilation of 30/min" and "Ventilation of 30/min" and "Ventilation


In [30]:
messages = [{"role": "user", "content": 
"""
A 3-month-old girl is brought to a pediatrician by her parents. She has central cyanosis without signs of respiratory distress or signs of heart failure. An echocardiogram reveals severe pulmonary outflow obstruction, right ventricular hypertrophy, a ventricular septal defect, and an overriding of the aorta. An elective primary surgical repair is planned at 4 months of age. Which of the following statements is true about this girl’s condition?
"A": "The tricuspid valve is the most common valve affected by bacterial endocarditis in uncorrected tetralogy of Fallot.", "B": "Normal hemoglobin in patients with tetralogy of Fallot does not rule out iron deficiency anemia.", "C": "Cerebral arterial thrombosis is more common than cerebral venous thrombosis.", "D": "Refractory heart failure is a common complication of tetralogy of Fallot."
"""}]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    
inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")

outputs = model.generate(**inputs, max_length=500, num_return_sequences=1)

text = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(text.split("assistant")[1])


Normal hemoglobin in patients with tetralogy of Fallot does not rule out iron deficiency anemia.


In [31]:
messages = [{"role": "user", "content": 
"""
A 44-year-old African-American woman comes to the physician for a routine examination. She is concerned about cancer because her uncle died of metastatic melanoma 1 year ago. She has no history of serious illness and does not take any medication. She has been working in a law firm for the past 20 years and travels to the Carribean regularly with her husband. Examination of her skin shows no abnormal moles or warts. This woman is at greatest risk of which of the following types of melanoma?
"A": "Desmoplastic", "B": "Nodular", "C": "Acral lentiginous", "D": "Superficial spreading"
"""}]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    
inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")

outputs = model.generate(**inputs, max_length=500, num_return_sequences=1)

text = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(text.split("assistant")[1])


"Desmoplastic"  "Nodular"  "Acral lentiginous"  "Superficial spreading"  "Desmoplastic"  "Nodular"  "Acral lentiginous"  "Superficial spreading"  "Desmoplastic"  "Nodular"  "Acral lentiginous"  "Superficial spreading"  "Desmoplastic"  "Nodular"  "Acral lentiginous"  "Superficial spreading"  "Desmoplastic"  "Nodular"  "Acral lentiginous"  "Superficial spreading"  "Desmoplastic"  "Nodular"  "Acral lentiginous"  "Superficial spreading"  "Desmoplastic"  "Nodular"  "Acral lentiginous"  "Superficial spreading"  "Desmoplastic"  "Nodular"  "Acral lentiginous"  "Superficial spreading"  "Desmoplastic"  "Nodular"  "Acral lentiginous"  "Superficial spreading"  "Desmoplastic"  "Nodular"  "Acral lentiginous"  "Superficial spreading"  "Desmoplastic"  "Nodular"  "Acral lentiginous"  "Superficial spreading"  "Desmoplastic"  "Nodular"  "Acral lentiginous"  "Superficial spreading"  "Desmoplastic"  "Nodular"  "Acral


In [32]:
messages = [{"role": "user", "content": 
"""
A 54-year-old male makes an appointment with his primary care physician due to chronic fatigue that has left him unable to hike or do other physically demanding activities with his friends. He has well-controlled hypertension and diabetes but has otherwise been healthy. He does not smoke but drinks socially with his friends. Physical exam reveals enlarged nodules that are not tender to palpation. A biopsy is obtained showing a characteristic change in chromosome 18. The regulation of which of the following proteins will be most abnormal in this patient?
"A": "Caspase-9", "B": "CD15", "C": "Cyclin-dependent kinase 4", "D": "Ras pathway transcription factors"
"""}]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    
inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")

outputs = model.generate(**inputs, max_length=500, num_return_sequences=1)

text = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(text.split("assistant")[1])


"Cyclin-dependent kinase 4"  "Cyclin-dependent kinase 4"  "Cyclin-dependent kinase 4"  "Cyclin-dependent kinase 4"  "Cyclin-dependent kinase 4"  "Cyclin-dependent kinase 4"  "Cyclin-dependent kinase 4"  "Cyclin-dependent kinase 4"  "Cyclin-dependent kinase 4"  "Cyclin-dependent kinase 4"  "Cyclin-dependent kinase 4"  "Cyclin-dependent kinase 4"  "Cyclin-dependent kinase 4"  "Cyclin-dependent kinase 4"  "Cyclin-dependent kinase 4"  "Cyclin-dependent kinase 4"  "Cyclin-dependent kinase 4"  "Cyclin-dependent kinase 4"  "Cyclin-dependent kinase 4"  "Cyclin-dependent kinase 4"  "Cyclin-dependent kinase 4"  "Cyclin-dependent kinase 4"  "Cyclin-dependent kinase 4"  "Cyclin-dependent kinase 4"  "Cyclin-dependent kinase 4"  "Cyclin-dependent kinase 4"  "Cyclin-dependent kinase 4"  "Cyclin-dependent kinase 4"  "Cyclin-dependent kinase 4"  "Cyclin-dependent kinase 4"  "Cyclin-dependent kinase 4"  "Cyclin-dependent kinase 4"  "Cyclin-dependent kinase 4"  "Cyclin-dependent kinase 4"  "Cyclin-depen

In [33]:
model.save_pretrained(new_model)