In [None]:
import os
import torch
from datasets import load_dataset, Dataset
import pandas as pd
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from trl import SFTTrainer
import transformers
# from peft import AutoPeftModelForCausalLM
from transformers import GenerationConfig
from pynvml import *
import glob



In [None]:
base_model = "NousResearch/Llama-2-7b-chat-hf"
lora_output = 'models/lora_KUET_LLM_llama'
full_output = 'models/full_KUET_LLM_llama'
DEVICE = 'cuda'

In [None]:
# from huggingface_hub import login
# # huggingface token for uploading
# token = ""
# login(token) 

In [None]:
bnb_config = BitsAndBytesConfig(  
    load_in_8bit= True,
#     bnb_4bit_quant_type= "nf4",
#     bnb_4bit_compute_dtype= torch.bfloat16,
#     bnb_4bit_use_double_quant= False,
)
model = AutoModelForCausalLM.from_pretrained(
        base_model,
        # load_in_4bit=True,
        quantization_config=bnb_config,
        torch_dtype=torch.bfloat16,
        device_map="auto",
        trust_remote_code=True,
)
model.config.use_cache = False # silence the warnings
model.config.pretraining_tp = 1
model.gradient_checkpointing_enable()

In [None]:
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.padding_side = 'right'
tokenizer.pad_token = tokenizer.eos_token
tokenizer.add_eos_token = True
tokenizer.add_bos_token, tokenizer.add_eos_token

In [None]:
data_location = r"/home/sdm/Desktop/shakib/KUET LLM/data/dataset_shakibV2.xlsx" ## replace here
data_df=pd.read_excel( data_location )

In [None]:

for i in range(len(data_df)):

    data_df.loc[i,'Text']="### Instruction:"+str(data_df.loc[i,'Prompt'])+"### Response:"+str(data_df.loc[i,'Reply'])

In [None]:
dataset = Dataset.from_pandas(data_df)

In [None]:
print(dataset[0]['Text'])

In [None]:
# Set PEFT adapter config (16:32)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

# target modules are currently selected for zephyr base model
config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj","k_proj","o_proj","gate_proj","up_proj","down_proj"],   # target all the linear layers for full finetuning
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
    )

In [None]:
# stabilize output layer and layernorms
model = prepare_model_for_kbit_training(model, 8)
# Set PEFT adapter on model (Last step)
model = get_peft_model(model, config)

In [None]:
# Set Hyperparameters
MAXLEN=512
BATCH_SIZE=4
GRAD_ACC=4
OPTIMIZER='paged_adamw_8bit' # save memory
LR=5e-06                      # slightly smaller than pretraining lr | and close to LoRA standard

In [None]:
# Set training config
training_config = transformers.TrainingArguments(per_device_train_batch_size=BATCH_SIZE,
                                                 gradient_accumulation_steps=GRAD_ACC,
                                                 optim=OPTIMIZER,
                                                 learning_rate=LR,
                                                 fp16=True,            # consider compatibility when using bf16
                                                 logging_steps=10,
                                                 num_train_epochs = 2,
                                                 output_dir=lora_output,
                                                 remove_unused_columns=True,
                                                
                                                 )

# Set collator
data_collator = transformers.DataCollatorForLanguageModeling(tokenizer,mlm=False)

# Setup trainer
trainer = SFTTrainer(model=model,
                               train_dataset=dataset,
                               data_collator=data_collator,
                               args=training_config,
                               dataset_text_field="Text",
                            #    callbacks=[early_stop], need to learn, lora easily overfits
                              )

In [None]:
trainer.train()

In [None]:
trainer.save_model(lora_output)

In [None]:
from transformers import pipeline
import transformers
base_model = "NousResearch/Llama-2-7b-chat-hf"
full_output = 'models/full_KUET_LLM_llama'
tokenizer = transformers.AutoTokenizer.from_pretrained(base_model)
pipe = pipeline(task="text-generation", model=full_output, tokenizer=tokenizer, max_length=200)
from langchain import HuggingFacePipeline
llm = HuggingFacePipeline(pipeline = pipe, model_kwargs = {'temperature':0})

In [None]:
prompt="what is KUET?"
result = pipe(f"<s>[INST] {prompt} [/INST]")
ans=(result[0]['generated_text'])
ans.split("[/INST]")[1]

In [None]:
import tqdm
import pandas as pd
df=pd.read_excel(r"data/KUET information2k20.xlsx")
dat=[]
for id,ques in tqdm.tqdm(zip(df['id'],df['Question'])):
    result = pipe(f"<s>[INST] {ques} [/INST]")
    ans=(result[0]['generated_text'])
    ans.split("[/INST]")[1]
    dat.append({
        "id":id,
        "question":ques,
        "answer":ans
    })

In [None]:
sa=pd.DataFrame(dat)
model_ans="model_ans_llama_finetuned_without_rag"
sa.to_excel(f"data//{model_ans}.xlsx",index=False)

In [None]:
# merged_model = model.merge_and_unload()

In [None]:
# merged_model.save_pretrained(full_output)
# tokenizer.save_pretrained(full_output)

In [None]:
# from transformers import pipeline
# from transformers import AutoTokenizer, AutoModelForCausalLM
# import torch
# full_output = 'models/full_KUET_LLM_llama'
# prompt = "What is KUET?"
# tokenizer = AutoTokenizer.from_pretrained(full_output)
# model = AutoModelForCausalLM.from_pretrained(
#     full_output,
#     # quantization_config=bnb_config,
#     device_map="auto",
#     trust_remote_code=True,
#     attn_implementation="flash_attention_2",
#     torch_dtype=torch.bfloat16,

# )

In [None]:
assert(False)

In [None]:
# # push model to hub
# merged_model.push_to_hub(full_output)
# tokenizer.push_to_hub(full_output)