<a href="https://colab.research.google.com/github/swapnildahare/Solar-Internship-DS/blob/main/%22AlpaCare_Medical_Instruction_Assistant%22.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install -q git+https://github.com/huggingface/peft.git

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


In [None]:
pip install -q transformers accelerate datasets bitsandbytes safetensors evaluate

In [None]:
pip install -q einops

In [None]:
from pathlib import Path
import os
import math
import random
import json
from datasets import load_dataset
import transformers
import torch

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

In [None]:
print('Transformers', transformers.__version__)
print('Torch', torch.__version__)

Transformers 4.53.2
Torch 2.6.0+cu124


In [None]:
ARTIFACT_DIR = Path('/content/adapters')
ARTIFACT_DIR.mkdir(parents=True, exist_ok=True)

In [None]:
BASE_MODEL = "togethercomputer/RedPajama-INCITE-Chat-3B-v1"

MAX_LENGTH = 1024
BATCH_SIZE = 4
GRAD_ACCUM = 8
NUM_EPOCHS = 3
LEARNING_RATE = 2e-4
OUTPUT_DIR = '/content/outputs'
ADAPTER_NAME = 'alpacare-lora'
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Device:', DEVICE)

Device: cpu


In [None]:
from datasets import Dataset, concatenate_datasets
# removed the redundant dataset['train'] = dataset['train'].map(...) line
# dataset = {split: dataset[split].map(make_prompt, remove_columns=dataset[split].column_names) for split in dataset if split in ['train', 'test']}

In [None]:
#  small curated refusal dataset to teach safe refusals
refusal_examples = [
{
'prompt': "### Instruction:\nI have chest pain and shortness of breath, what is wrong with me?\n\n\n### Response:\n",
'response': ("I’m sorry — I can’t provide medical diagnoses. I can offer general instructional information about first aid and when to contact medical services. "
"If you or someone else may be experiencing a medical emergency (difficulty breathing, severe chest pain, fainting, sudden confusion), please call your local emergency number immediately."),
},
{
'prompt': "### Instruction:\nCan you prescribe a medication for my fever?\n\n\n### Response:\n",
'response': ("I can’t prescribe medications. Medication prescriptions must come from a licensed healthcare professional after an assessment. "
"If you are unwell, please consult a doctor or pharmacist.")
}
]

In [None]:
from datasets import Dataset, concatenate_datasets
ref_ds = Dataset.from_list(refusal_examples)
dataset['train'] = concatenate_datasets([dataset['train'], ref_ds])
print('Training size after adding refusals:', len(dataset['train']))

In [None]:
print('Loading tokenizer for', BASE_MODEL)
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=True)
if tokenizer.pad_token is None:
   tokenizer.pad_token = tokenizer.eos_token

In [None]:
from google.colab import userdata
import os


hf_token = userdata.get('HE_token')


if hf_token is None:
    print("HE_TOKEN not found in Colab secrets. Please add your Hugging Face token as a secret named 'HE_token'.")
else:

    os.environ['HE_token'] = hf_token
    print("HE_token successfully loaded and set as environment variable.")

In [None]:
MAX_LENGTH = 512

def tokenize_fn(examples):
   input_texts = [p + r for p, r in zip(examples['prompt'], examples['response'])]
   tokenized_full = tokenizer(input_texts, truncation=True, max_length=MAX_LENGTH, padding='max_length')

   prompt_tokenized = tokenizer(examples['prompt'], truncation=True, max_length=MAX_LENGTH)
   labels = []
   for i in range(len(input_texts)):
       ids = tokenized_full['input_ids'][i].copy()
       prompt_len = len(prompt_tokenized['input_ids'][i])

       ids[:prompt_len] = [-100] * prompt_len
       labels.append(ids)

   tokenized_full['labels'] = labels
   return tokenized_full

In [None]:
print('Tokenizing dataset...')
tokenized = {split: dataset[split].map(tokenize_fn, batched=True, remove_columns=dataset[split].column_names) for split in dataset}
print('Tokenized sample:')
print({k: tokenized['train'][0][k] for k in ['input_ids','labels']})

In [None]:
print('Loading base model (8-bit) - this may take a while...')
model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL,
load_in_8bit=True,
device_map='auto',
)

Loading base model (8-bit) - this may take a while...


config.json:   0%|          | 0.00/630 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


pytorch_model.bin:   0%|          | 0.00/5.69G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/5.69G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

In [None]:
model = prepare_model_for_kbit_training(model)
TARGET_MODULES = ["query_key_value"]
lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=TARGET_MODULES,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

In [None]:
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

In [None]:
print(model)

In [None]:
training_args = TrainingArguments(
output_dir=OUTPUT_DIR,
per_device_train_batch_size=BATCH_SIZE,
gradient_accumulation_steps=GRAD_ACCUM,
num_train_epochs=NUM_EPOCHS,
learning_rate=LEARNING_RATE,
fp16=True,
logging_steps=50,
save_total_limit=2,
remove_unused_columns=False,
report_to='none',
)

In [None]:
trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized['train'],
tokenizer=tokenizer,
)

In [None]:
subset = dataset["train"].shuffle(seed=42).select(range(500))  # only 500 samples
tokenized_small = subset.map(tokenize_fn, batched=True, remove_columns=subset.column_names)

In [None]:
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    num_train_epochs=1,             # 1 epoch
    learning_rate=2e-4,
    fp16=True,
    logging_steps=20,
    save_strategy="no",
    report_to="none",
)

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_small,
    tokenizer=tokenizer,
)

trainer.train()

In [None]:
adapter_path = ARTIFACT_DIR / ADAPTER_NAME
adapter_path.mkdir(parents=True, exist_ok=True)
model.save_pretrained(adapter_path)
print('Adapter saved to', adapter_path)


In [None]:
tokenizer.save_pretrained(adapter_path)

In [None]:
from peft import PeftModel


print('Loading base model for inference...')
base = AutoModelForCausalLM.from_pretrained(BASE_MODEL, load_in_8bit=True, device_map='auto')
model_inf = PeftModel.from_pretrained(base, adapter_path)
model_inf.eval()

In [None]:
from transformers import GenerationConfig


def generate(prompt_text, max_new_tokens=256, do_sample=False):
   inputs = tokenizer(prompt_text, return_tensors='pt').to(model_inf.device)
   gen_cfg = GenerationConfig(max_new_tokens=max_new_tokens, do_sample=do_sample)
   with torch.no_grad():
     out = model_inf.generate(**inputs, generation_config=gen_cfg)
   text = tokenizer.decode(out[0], skip_special_tokens=True)
   return text

In [None]:
DIAGNOSTIC_KEYWORDS = ['diagnose', 'diagnosis', "what's wrong", 'what is wrong', 'do i have', 'am i sick']
PRESCRIPTION_KEYWORDS = ['prescribe', 'prescription', 'dosage', 'mg', 'take mg', 'take this']



In [None]:
def is_diagnostic_request(text):
   t = text.lower()
   return any(k in t for k in DIAGNOSTIC_KEYWORDS)

In [None]:
def is_prescription_request(text):
   t = text.lower()
   return any(k in t for k in PRESCRIPTION_KEYWORDS)



In [None]:
def safe_generate(instruction, input_text=''):
    combined = instruction + ' ' + (input_text or '')
    if is_diagnostic_request(combined) or is_prescription_request(combined):
        return ("I’m not able to provide diagnoses or prescribe medication. I can provide general instructional guidance (for example: how to clean a wound or use a medical device). "
                "If this is an emergency, please contact local emergency services.")
    prompt = f"### Instruction:\n{instruction}\n\n\n"
    if input_text:
        prompt += f"### Input:\n{input_text}\n\n\n"
    prompt += "### Response:\n"
    out = generate(prompt)
    # strip prompt prefix
    if out.startswith(prompt):
        return out[len(prompt):].strip()
    return out

In [None]:
prompts = [
'I have chest pain and shortness of breath, what is wrong with me?',
'Can you prescribe me 500 mg of paracetamol for fever?',
'How to perform basic wound cleaning and dressing?',
'I have a rash and fever; do I need antibiotics?'
]

In [None]:
for p in prompts:
   print('PROMPT:', p)
   print('SAFE RESPONSE:', safe_generate(p))
   print('-------------------------')

In [None]:
!pip install gradio -q
import gradio as gr


In [None]:
def chat_with_alpacare(instruction):
    return safe_generate(instruction)

In [None]:
gr.Interface(
    fn=chat_with_alpacare,
    inputs=gr.Textbox(lines=4, placeholder="Enter your medical instruction query..."),
    outputs=gr.Textbox(lines=10),
    title="🩺 AlpaCare Medical Instruction Assistant",
    description="A safe, non-diagnostic assistant fine-tuned with LoRA on AlpaCare-MedInstruct dataset."
).launch()