In [None]:
!pip install --no-cache-dir transformers accelerate datasets peft bitsandbytes trl

Collecting bitsandbytes
  Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting trl
  Downloading trl-0.17.0-py3-none-any.whl.metadata (12 kB)
Collecting datasets
  Downloading datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=

In [None]:
import json
from datasets import Dataset

# Load JSON
with open("medquad_5000.json") as f:
    data = json.load(f)

# Convert to Hugging Face Dataset
dataset = Dataset.from_list(data)

# Preview
dataset[0]

{'instruction': 'What is (are) keratoderma with woolly hair ?',
 'input': '',
 'output': 'Keratoderma with woolly hair is a group of related conditions that affect the skin and hair and in many cases increase the risk of potentially life-threatening heart problems. People with these conditions have hair that is unusually coarse, dry, fine, and tightly curled. In some cases, the hair is also sparse. The woolly hair texture typically affects only scalp hair and is present from birth. Starting early in life, affected individuals also develop palmoplantar keratoderma, a condition that causes skin on the palms of the hands and the soles of the feet to become thick, scaly, and calloused.  Cardiomyopathy, which is a disease of the heart muscle, is a life-threatening health problem that can develop in people with keratoderma with woolly hair. Unlike the other features of this condition, signs and symptoms of cardiomyopathy may not appear until adolescence or later. Complications of cardiomyopa

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    load_in_4bit=True,  # saves memory
    device_map="auto"
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/1.29k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [None]:
def tokenize(example):
    prompt = f"### Instruction:\n{example['instruction']}\n\n### Response:\n{example['output']}"
    tokens = tokenizer(prompt, truncation=True, padding="max_length", max_length=512)
    tokens["labels"] = tokens["input_ids"].copy()
    return tokens

tokenized_dataset = dataset.map(tokenize, remove_columns=["instruction", "input", "output"])


Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

In [None]:
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model

# Prepare for PEFT
model = prepare_model_for_kbit_training(model)

# LoRA configuration
config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)
model.print_trainable_parameters()


trainable params: 1,126,400 || all params: 1,101,174,784 || trainable%: 0.1023


In [None]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="./llama2-medqa",
    per_device_train_batch_size=2,         # Smaller batch size
    gradient_accumulation_steps=4,         # Accumulate more to reduce memory load
    num_train_epochs=1,                    # Only one epoch to reduce time
    max_steps=100,                         # Stops after 100 steps
    learning_rate=2e-4,
    fp16=True,                             # Mixed precision (requires GPU)
    logging_steps=10,                      # Log less often
    save_strategy="no",                    # Don’t save checkpoints (faster)
    report_to="none"
)

trainer = Trainer(
    model=model,
    train_dataset=tokenized_dataset,
    args=training_args,
    tokenizer=tokenizer
)

trainer.train()


  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
  return fn(*args, **kwargs)


Step,Training Loss
10,0.4077
20,0.5441
30,0.4929
40,0.499
50,0.4534
60,0.4576
70,0.506
80,0.5208
90,0.5003
100,0.4797


TrainOutput(global_step=100, training_loss=0.48613436698913576, metrics={'train_runtime': 294.2543, 'train_samples_per_second': 2.719, 'train_steps_per_second': 0.34, 'total_flos': 2545185875558400.0, 'train_loss': 0.48613436698913576, 'epoch': 0.16})

In [None]:
model.save_pretrained("llama2-medqa")
tokenizer.save_pretrained("llama2-medqa")

('llama2-medqa/tokenizer_config.json',
 'llama2-medqa/special_tokens_map.json',
 'llama2-medqa/tokenizer.model',
 'llama2-medqa/added_tokens.json',
 'llama2-medqa/tokenizer.json')

In [None]:
prompt = "### Instruction:\nWhat are the symptoms of diabetes?\n\n### Response:\n"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens=200)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))




### Instruction:
What are the symptoms of diabetes?

### Response:
The symptoms of diabetes can vary widely depending on the type of diabetes and the severity of the condition. The most common symptoms of diabetes include:  - Hypoglycemia (low blood sugar): This is the most common symptom of diabetes. Hypoglycemia can occur when blood sugar levels are too low, especially during sleep. Hypoglycemia can cause confusion, irritability, and difficulty concentrating.  - Numbness and tingling: Numbness and tingling can occur in the fingers, toes, and other parts of the body. These symptoms are often the first signs of diabetes.  - Blurred vision: Blurred vision can occur in people with diabetes.  - Leg cramps: Leg cramps can occur in people with diabetes.  - Frequent urination: Frequent urination can occur in people with diab


In [None]:
prompt = "### Instruction:\nIs keratoderma with woolly hair inherited ?\n\n### Response:\n"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens=200)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


### Instruction:
Is keratoderma with woolly hair inherited ?

### Response:
This condition is inherited in an autosomal dominant pattern, which means one copy of the altered gene in each cell is sufficient to cause the disorder. The parents of an individual with an autosomal dominant condition each carry one copy of the altered gene, but they typically do not show signs and symptoms of the condition.


In [None]:
prompt = "### Instruction:\nIs 5-alpha reductase deficiency inherited ?\n\n### Response:\n"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens=200)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


### Instruction:
Is 5-alpha reductase deficiency inherited ?

### Response:
This condition is inherited in an autosomal recessive pattern, which means both copies of the gene in each cell have mutations. The parents of an individual with an autosomal recessive condition each carry one copy of the mutated gene, but they typically do not show signs and symptoms of the condition.
