In [15]:
!pip install huggingface_hub==0.25.0
!pip install -U transformers
!pip install -U datasets
!pip install -U accelerate
!pip install -U bitsandbytes
!pip install -U peft
!pip install -U trl

Collecting huggingface_hub==0.25.0
  Using cached huggingface_hub-0.25.0-py3-none-any.whl.metadata (13 kB)
Using cached huggingface_hub-0.25.0-py3-none-any.whl (436 kB)
Installing collected packages: huggingface_hub
  Attempting uninstall: huggingface_hub
    Found existing installation: huggingface-hub 0.31.2
    Uninstalling huggingface-hub-0.31.2:
      Successfully uninstalled huggingface-hub-0.31.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
transformers 4.51.3 requires huggingface-hub<1.0,>=0.30.0, but you have huggingface-hub 0.25.0 which is incompatible.[0m[31m
[0mSuccessfully installed huggingface_hub-0.25.0
Collecting huggingface-hub<1.0,>=0.30.0 (from transformers)
  Using cached huggingface_hub-0.31.2-py3-none-any.whl.metadata (13 kB)
Using cached huggingface_hub-0.31.2-py3-none-any.whl (484 kB)
Installing collected packages: huggingface

In [16]:
import os
import torch
from datasets import load_dataset  
from transformers import (AutoModelForCausalLM,  
                         AutoTokenizer,  
                         BitsAndBytesConfig,  
                         TrainingArguments,  
                         logging)
from peft import LoraConfig, get_peft_model  
from kaggle_secrets import UserSecretsClient  
from huggingface_hub import login  
from trl import SFTTrainer, setup_chat_format  
import bitsandbytes as bnb
print(bnb.__version__)

user_secrets = UserSecretsClient()
hf_token = user_secrets.get_secret("belovsasha") 
login(token=hf_token)  

base_model = "google/gemma-2-2b-it"  
new_model = "Gemma-2-2b-it-ChatDoctor" 
dataset_name = "lavita/ChatDoctor-HealthCareMagic-100k"  

if torch.cuda.get_device_capability()[0] >= 8:
   torch_dtype = torch.bfloat16  
   attn_implementation = "flash_attention_2"  
else:
   torch_dtype = torch.float16 
   attn_implementation = "eager" 

bnb_config = BitsAndBytesConfig(
   load_in_4bit=True, 
   bnb_4bit_quant_type="nf4",  
   bnb_4bit_compute_dtype=torch_dtype,
   bnb_4bit_use_double_quant=True, 
)

0.45.5


In [17]:
model = AutoModelForCausalLM.from_pretrained(
  base_model,                        
  quantization_config=bnb_config,      
  device_map="auto",                    
  attn_implementation=attn_implementation  
)

# Load the tokenizer for the same model
tokenizer = AutoTokenizer.from_pretrained(
  base_model,                          
  trust_remote_code=True                
)

def find_all_linear_names(model):
  cls = bnb.nn.Linear4bit              
  lora_module_names = set()         
  
  for name, module in model.named_modules():
      if isinstance(module, cls):       
          names = name.split('.')        
          if len(names) == 1:           
              lora_module_names.add(names[0])
          else:                         
              lora_module_names.add(names[-1]) 
              
  lora_module_names.discard('lm_head')  
  return list(lora_module_names)         

modules = find_all_linear_names(model)

0.45.5


ImportError: Using `bitsandbytes` 4-bit quantization requires the latest version of bitsandbytes: `pip install -U bitsandbytes`

In [None]:
from peft import LoraConfig, get_peft_model

peft_config = LoraConfig(
   r=16,                    
   lora_alpha=32,           
   lora_dropout=0.05,      
   bias="none",             
   task_type="CAUSAL_LM",   
   target_modules=modules  
)

tokenizer.chat_template = None 
model, tokenizer = setup_chat_format(model, tokenizer)
model = get_peft_model(model, peft_config)

In [None]:
import re
from datasets import load_dataset

dataset = load_dataset(
   dataset_name,
   split="all",           
   cache_dir="./cache"    
)

dataset = dataset.shuffle(seed=42).select(range(2000))

def clean_text(text):
   text = re.sub(r'\b(?:www\.[^\s]+|http\S+)', '', text)                   
   text = re.sub(r'\b(?:aCht Doctor(?:.com)?(?:.in)?|www\.(?:google|yahoo)\S*)', '', text)
   text = re.sub(r'\s+', ' ', text)                                    
   return text.strip()

def format_chat_template(row):
   cleaned_instruction = clean_text(row["instruction"])
   cleaned_input = clean_text(row["input"])             
   cleaned_output = clean_text(row["output"])          
   
   row_json = [
       {"role": "system", "content": cleaned_instruction},  
       {"role": "user", "content": cleaned_input},
       {"role": "assistant", "content": cleaned_output}
   ]
   row["text"] = tokenizer.apply_chat_template(row_json, tokenize=False)
   return row

dataset = dataset.map(format_chat_template, num_proc=4)
dataset = dataset.train_test_split(test_size=0.1)
data_collator = lambda batch: tokenizer(
   batch["text"], 
   return_tensors="pt",    
   padding=True,           
   truncation=True         
)

In [None]:
training_args = TrainingArguments(
    output_dir=new_model,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    num_train_epochs=1,
    eval_strategy="steps",
    eval_steps=200,
    save_steps=500,
    logging_steps=1,
    warmup_steps=10,
    logging_strategy="steps",
    learning_rate=0.0002,
    fp16=True,
    bf16=False,
    group_by_length=True,
    load_best_model_at_end=False,
    report_to=[]
)

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    peft_config=peft_config,
    args=training_args,
)

model.config.use_cache = False

In [None]:
trainer.train()

In [None]:
merged_model = model.merge_and_unload()
merged_model.save_pretrained(new_model)
merged_model.push_to_hub(new_model, use_temp_dir=False)

In [None]:
from transformers import GenerationConfig

messages = [
    {"role": "system", "content": "You are a medical expert specializing in respiratory diseases. You should prescribe some medical drugs"},
    {"role": "user", "content": "I have a persistent cough, night sweats, and recent weight loss. I’ve been to multiple doctors with no diagnosis yet. Could these symptoms be related to tuberculosis or another serious illness? Please provide a detailed answer considering possible causes and recommended next steps. Write down medicines that can cure my illness"}
]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")

outputs = model.generate(
    **inputs,
    max_length=350,          
    top_k=50,                
    top_p=0.85,               
    temperature=0.3,         
    no_repeat_ngram_size=3,  
)

response = tokenizer.decode(outputs[0], skip_special_tokens=True).split("assistant")[-1].strip()
print(response)


In [None]:
from transformers import GenerationConfig

messages = [
    {"role": "system", "content": "You are a medical expert specializing in respiratory diseases."},
    {"role": "user", "content": "I have a persistent cough, night sweats, and recent weight loss. I’ve been to multiple doctors with no diagnosis yet. Could these symptoms be related to tuberculosis or another serious illness? Please provide a detailed answer considering possible causes and recommended next steps."}
]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")

outputs = model.generate(
    **inputs,
    max_length=350,          
    top_k=50,                
    top_p=0.85,               
    temperature=0.3,         
    no_repeat_ngram_size=3,  
)

response = tokenizer.decode(outputs[0], skip_special_tokens=True).split("assistant")[-1].strip()
print(response)
