### This is the training notebook for Zephyr-beta-GPTQ finetuning with Lora for Medical text Translation

In [None]:
!pip install -q bitsandbytes transformers peft accelerate trl

In [None]:
!pip install -q optimum auto-gptq

In [44]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
    GPTQConfig,
    GenerationConfig
)
from peft import (
    LoraConfig,
    PeftModel,
    prepare_model_for_kbit_training,
    get_peft_model,
    AutoPeftModelForCausalLM
)
import os, torch, wandb
from datasets import load_dataset
from trl import SFTTrainer

In [45]:
base_model = "TheBloke/zephyr-7B-beta-GPTQ"
dataset_name = "cbasu/Med-EASi"

In [46]:
dataset = load_dataset(dataset_name)

dataset

DatasetDict({
    train: Dataset({
        features: ['Expert', 'Simple', 'Annotation', 'sim', 'sentence_sim', 'compression', 'expert_fk_grade', 'expert_ari', 'layman_fk_grade', 'layman_ari', 'umls_expert', 'umls_layman', 'expert_terms', 'layman_terms', 'idx'],
        num_rows: 1397
    })
    validation: Dataset({
        features: ['Expert', 'Simple', 'Annotation', 'sim', 'sentence_sim', 'compression', 'expert_fk_grade', 'expert_ari', 'layman_fk_grade', 'layman_ari', 'umls_expert', 'umls_layman', 'expert_terms', 'layman_terms', 'idx'],
        num_rows: 196
    })
    test: Dataset({
        features: ['Expert', 'Simple', 'Annotation', 'sim', 'sentence_sim', 'compression', 'expert_fk_grade', 'expert_ari', 'layman_fk_grade', 'layman_ari', 'umls_expert', 'umls_layman', 'expert_terms', 'layman_terms', 'idx'],
        num_rows: 300
    })
})

In [47]:
def format_prompt(sample):
    medical_text = sample['Expert']
    simple_text = sample['Simple']
    
    formatted_prompt = f"""
    Please simplify the following medical summary so that it is easily understandable.
    Ensure that the key information is retained, but use simpler language and explanations.
    Avoid overly technical jargon and aim for clarity and readability.
    
    <medical_text>
    {medical_text}
    <medical_text/>
    
    <simple_text>
    {simple_text}
    <simple_text/>
    """
    
    sample["text"] = formatted_prompt
    
    return sample

In [48]:
dataset = dataset.map(
    format_prompt
)
dataset["train"]["text"][0]

"\n    Please simplify the following medical summary so that it is easily understandable.\n    Ensure that the key information is retained, but use simpler language and explanations.\n    Avoid overly technical jargon and aim for clarity and readability.\n    \n    <medical_text>\n    75-90 % of the affected people have mild intellectual disability.\n    <medical_text/>\n    \n    <simple_text>\n    People with syndromic intellectual disabilities may have a `` typical look. ''\n    <simple_text/>\n    "

In [49]:
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.padding_side = 'right'
tokenizer.pad_token = tokenizer.eos_token
tokenizer.add_eos_token = True
tokenizer.add_bos_token, tokenizer.add_eos_token

bnb_config = GPTQConfig(bits=4,
                        disable_exllama=True,
                        device_map="auto",
                        use_cache=False,
                        lora_r=16,
                        lora_alpha=16,
                        tokenizer=tokenizer
                                )

model = AutoModelForCausalLM.from_pretrained(
        base_model,
        quantization_config=bnb_config,
        torch_dtype=torch.bfloat16,
        device_map="auto",
        trust_remote_code=True,
)

model.config.use_cache = False
model.config.pretraining_tp = 1
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

Using `disable_exllama` is deprecated and will be removed in version 4.37. Use `use_exllama` instead and specify the version with `exllama_config`.The value of `use_exllama` will be overwritten by `disable_exllama` passed in `GPTQConfig` or stored in your config file.
Using `disable_exllama` is deprecated and will be removed in version 4.37. Use `use_exllama` instead and specify the version with `exllama_config`.The value of `use_exllama` will be overwritten by `disable_exllama` passed in `GPTQConfig` or stored in your config file.
Some weights of the model checkpoint at TheBloke/zephyr-7B-beta-GPTQ were not used when initializing MistralForCausalLM: ['model.layers.0.mlp.down_proj.bias', 'model.layers.0.mlp.gate_proj.bias', 'model.layers.0.mlp.up_proj.bias', 'model.layers.0.self_attn.k_proj.bias', 'model.layers.0.self_attn.o_proj.bias', 'model.layers.0.self_attn.q_proj.bias', 'model.layers.0.self_attn.v_proj.bias', 'model.layers.1.mlp.down_proj.bias', 'model.layers.1.mlp.gate_proj.bias

In [50]:
peft_config = LoraConfig(
                            r=16,
                            lora_alpha=16,
                            lora_dropout=0.05,
                            bias="none",
                            task_type="CAUSAL_LM",
                            target_modules=["q_proj", "v_proj"]
                        )
model = get_peft_model(model, peft_config)

In [51]:

OUTPUT_DIR='med-zephyr-beta'
OPTIMIZER = "paged_adamw_32bit"

training_arguments = TrainingArguments(
                                        output_dir='/kaggle/working/',
                                        per_device_train_batch_size=8,
                                        gradient_accumulation_steps=1,
                                        optim=OPTIMIZER,
                                        learning_rate=2e-4,
                                        lr_scheduler_type="cosine",
                                        save_strategy="epoch",
                                        logging_steps=50,
                                        num_train_epochs=1,
                                        max_steps=100,
                                        fp16=True,
                                        evaluation_strategy="steps",
                                        eval_steps=50,
                                        push_to_hub=False,
)



In [55]:
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset['train'],
    eval_dataset=dataset['validation'],
    peft_config=peft_config,
    max_seq_length= 512,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    packing= False,
)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/1397 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs


In [56]:
trainer.train()



Step,Training Loss,Validation Loss
50,0.65,0.763059
100,0.6637,0.750094


max_steps is given, it will override any value given in num_train_epochs


TrainOutput(global_step=100, training_loss=0.6568189239501954, metrics={'train_runtime': 900.8879, 'train_samples_per_second': 0.888, 'train_steps_per_second': 0.111, 'total_flos': 148496743858176.0, 'train_loss': 0.6568189239501954, 'epoch': 0.5714285714285714})

In [None]:
#data preprocessing for the sample input data
def input_data_preprocessing(example):

    processed_example = "<|system|>\n You are a support chatbot who helps with user queries chatbot who always responds in the style of a professional.\n<|user|>\n" + example["instruction"] + "\n<|assistant|>\n"

    return processed_example
input_string = input_data_preprocessing(
    {
        "instruction": "Most strabismus is caused by Refractive error; Muscle imbalance.",
    }
)

model = AutoPeftModelForCausalLM.from_pretrained(
    "/content/zephyr-finetuning/checkpoint-100",
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map="cuda")

inputs = tokenizer(input_string, return_tensors="pt").to("cuda")

generation_config = GenerationConfig(
    do_sample=True,
    top_k=1,
    temperature=0.1,
    max_new_tokens=256,
    pad_token_id=tokenizer.eos_token_id
)

outputs = model.generate(**inputs, generation_config=generation_config)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))