In [27]:
import torch
from transformers import GPT2Config, GPT2LMHeadModel,AutoModelForCausalLM,BitsAndBytesConfig,TrainingArguments,AutoTokenizer
from peft import LoraConfig,get_peft_model
from datasets import load_dataset,Dataset,load_dataset
# tokenizer=AutoTokenizer.from_pretrained('tirthadagr8/custom-mbart-large-50')
from trl import setup_chat_format

In [2]:
tokenizer=AutoTokenizer.from_pretrained('meta-llama/Llama-3.2-1B')
tokenizer.pad_token = tokenizer.eos_token

In [3]:
import os

os.environ['NEPTUNE_API_TOKEN'] = "eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI2ZGUwMDYyOC04NmE0LTQyM2UtOTVjNi0wZjQ3ZGU2ZjM4M2IifQ=="
os.environ['NEPTUNE_PROJECT'] = 'tirthadagr8/model-feed'
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [4]:
# config=GPT2Config(vocab_size=len(tokenizer),bos_token_id=tokenizer.bos_token_id,eos_token_id=tokenizer.eos_token_id,n_embd=384,n_layer=12,n_head=8)
# no_of_parameters=config.vocab_size*config.n_embd+config.n_layer*config.n_embd+config.n_layer*(4*config.n_embd*config.n_embd+4*config.n_embd+2*config.n_embd*4*config.n_embd+9*config.n_embd)+2*config.n_embd
# size_of_model=no_of_parameters/(1.6*100000000)
# print(f'Number of parameters would be:{no_of_parameters} the size would be:{size_of_model}')

In [5]:
torch_dtype=torch.float16
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)


In [6]:
model = AutoModelForCausalLM.from_pretrained('meta-llama/Llama-3.2-1B',quantization_config=bnb_config,device_map="auto",)

In [7]:
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']
)
model = get_peft_model(model, peft_config)

In [28]:
model,tokenizer=setup_chat_format(model,tokenizer)

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


In [20]:
dataset = load_dataset('ruslanmv/ai-medical-chatbot', split="all")
split_dataset = dataset.train_test_split(test_size=0.1)  # 80% train, 20% test

In [30]:
tokenizer.chat_template

"{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"

In [35]:
# Tokenize the dataset
def tokenize_function(examples):
    '''
    *** this will be used in future to train it for conversation format training
    messages = [{"role": "user", "content": "What is the capital of France."}]
    input_text=tokenizer.apply_chat_template(messages, tokenize=False)
    *** you can make your own custom template as well, like alpaca prompt below:-
    alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

    ### Instruction:
    {}

    ### Input:
    {}

    ### Response:
    {}"""
    '''
    messages = [{"role": "user", "content": examples['Patient']},
                {"role": "assistant", "content": examples['Doctor']}]
    return {'text':tokenizer.apply_chat_template(messages,tokenize=False)}

tokenized_dataset = split_dataset.map(tokenize_function)
train_dataset = tokenized_dataset["train"]
test_dataset = tokenized_dataset["test"]

Map:   0%|          | 0/231224 [00:00<?, ? examples/s]

Map:   0%|          | 0/25692 [00:00<?, ? examples/s]

In [36]:
train_dataset['text'][0]

'<|im_start|>user\nSymptoms are lower back pain - sometimes radiates down left leg Blood pressure usually run low - it has been running higher Urine - may have a little foul smell - Have had headaches a few days - but not today Recently protein was found in urine - last urine test it was clear<|im_end|>\n<|im_start|>assistant\nHello and Welcome to ‘Ask A Doctor’ service. I have reviewed your query and here is my advice. I have gone through the details and in my opinion you have kidney disease causing proteins in the urine, Backache and mild fever. Even blood pressure variations are known in kidney disease. Please get urine examination including culture and ultrasound abdomen. Till the diagnosis is clear take only acetaminophen ,when there is pain. Hope I have answered your query. Let me know if I can assist you further.<|im_end|>\n'

In [42]:
from trl import SFTConfig

training_args = SFTConfig(
    output_dir = "outputs",
    overwrite_output_dir=True,
    max_seq_length = 128,
    dataset_num_proc = 2,
    dataset_text_field = "text",
    per_device_train_batch_size = 1,
    gradient_accumulation_steps = 2,
    warmup_steps = 5,
    # num_train_epochs = 1, # Set this for 1 full training run.
    max_steps = 60,
    evaluation_strategy="steps",
    eval_steps=0.2,
    learning_rate = 2e-4,
    logging_steps = 1,
    optim = "adamw_8bit",
    weight_decay = 0.01,
    lr_scheduler_type = "linear",
    seed = 3407,
    report_to = "none", # Use this for WandB etc
)



In [43]:
from trl import SFTTrainer,SFTConfig
from transformers import TrainingArguments,Trainer

trainer = SFTTrainer(
    model = model,
    processing_class = tokenizer,
    train_dataset = train_dataset,
    eval_dataset = test_dataset,
    packing = False, # Can make training 5x faster for short sequences.
    args = training_args
)

Map (num_proc=2):   0%|          | 0/231224 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/25692 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs


In [41]:
trainer.train()

  0%|          | 0/60 [00:00<?, ?it/s]

{'loss': 3.6236, 'grad_norm': 4.508493900299072, 'learning_rate': 4e-05, 'epoch': 0.0}
{'loss': 3.0699, 'grad_norm': 2.656003475189209, 'learning_rate': 8e-05, 'epoch': 0.0}
{'loss': 3.5535, 'grad_norm': 4.132633209228516, 'learning_rate': 0.00012, 'epoch': 0.0}
{'loss': 3.5903, 'grad_norm': 3.653149127960205, 'learning_rate': 0.00016, 'epoch': 0.0}
{'loss': 3.3466, 'grad_norm': 3.6871070861816406, 'learning_rate': 0.0002, 'epoch': 0.0}
{'loss': 3.2498, 'grad_norm': 4.367644309997559, 'learning_rate': 0.00019636363636363636, 'epoch': 0.0}
{'loss': 3.3682, 'grad_norm': 4.806777000427246, 'learning_rate': 0.00019272727272727274, 'epoch': 0.0}
{'loss': 3.7933, 'grad_norm': 4.472368240356445, 'learning_rate': 0.0001890909090909091, 'epoch': 0.0}
{'loss': 3.2014, 'grad_norm': 5.618427753448486, 'learning_rate': 0.00018545454545454545, 'epoch': 0.0}
{'loss': 3.8976, 'grad_norm': 3.2326624393463135, 'learning_rate': 0.00018181818181818183, 'epoch': 0.0}
{'loss': 3.2003, 'grad_norm': 2.7177734



{'train_runtime': 51.3826, 'train_samples_per_second': 2.335, 'train_steps_per_second': 1.168, 'train_loss': 3.245014444986979, 'epoch': 0.0}


TrainOutput(global_step=60, training_loss=3.245014444986979, metrics={'train_runtime': 51.3826, 'train_samples_per_second': 2.335, 'train_steps_per_second': 1.168, 'total_flos': 89731697246208.0, 'train_loss': 3.245014444986979, 'epoch': 0.0005189772687956267})

In [44]:
messages = [
    {
        "role": "user",
        "content": "I often feel anxious in social situations. What are some ways to manage anxiety without medication?"
    }
]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, 
                                       add_generation_prompt=True)

inputs = tokenizer(prompt, return_tensors='pt', padding=True, 
                   truncation=True, max_length=512).to("cuda")


outputs = model.generate(**inputs, max_length=150, num_return_sequences=1)#, num_beams=10, early_stopping=False, repetition_penalty=2.2)

text = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(text.split("assistant")[1])


Hi.  Anxiety is a very common condition and it is very important to identify the cause and treat it.  There are many types of anxiety and all of them have the common symptoms.  Anxiety is a feeling of unease, nervousness, restlessness, fear, worry, apprehension, apprehension, nervousness, restlessness, fear, worry, apprehension, apprehension, apprehension, apprehension, apprehension, apprehension, apprehension, apprehension, apprehension, apprehension, apprehension, apprehension, apprehension, apprehension, apprehension, apprehension, apprehension


In [45]:
msg='hi! how are you?'
inputs = tokenizer(msg, return_tensors='pt', padding=True, 
                   truncation=True, max_length=128).to("cuda")
outputs = model.generate(**inputs, max_length=100, num_return_sequences=1)

In [46]:
tokenizer.batch_decode(outputs)

['<|begin_of_text|>hi! how are you? i have a problem, i have a 2003 honda accord, and it has a problem, it starts to smoke, it is a black smoke, it is a normal smoke, but the smoke is not normal, it is like a white smoke, like a white cloud, like a smoke, like a smoke, like a smoke, like a smoke, like a smoke, like a smoke, like a smoke, like a smoke, like a smoke, like']

In [27]:
tokenizer.batch_decode(inputs['input_ids'])

['<|begin_of_text|>hi! how are you?']