In [None]:
import os
import torch
from datasets import load_dataset, Dataset
import pandas as pd
import transformers
from transformers import AutoTokenizer
from trl import SFTTrainer
import transformers
from peft import AutoPeftModelForCausalLM
from transformers import GenerationConfig
from pynvml import *
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

In [None]:
base_model = 'HuggingFaceH4/zephyr-7b-beta'
lora_output = 'models/KUETLLM_zephyr7b_lora'
full_output = 'models/KUETLLM_zephyr7b_beta'
DEVICE = 'cuda'

In [None]:
# from huggingface_hub import login
# login("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") #arbit
# login("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") 

In [None]:
tokenizer = AutoTokenizer.from_pretrained(base_model)

In [None]:
### huggingface dataset with Prompt, Answer pair

# data = load_dataset("huggingface/repo", split="train")
# data_df = data.to_pandas()

### read csv with Prompt, Answer pair 
data_location = r"data/dataset_shakibV1.xlsx"
# data_df=pd.read_csv( data_location ,encoding='unicode_escape')
data_df=pd.read_excel( data_location)

## formatting function using tokenizer chat template, system text is set for KUETLLM
def formatted_text(x):
    temp = [
    {"role": "system", "content": "You are a KUET authority managed chatbot, help users by answering their queries about KUET."},
    {"role": "user", "content": x["Prompt"]},
    {"role": "assistant", "content": x["Reply"]}
    ]
    return tokenizer.apply_chat_template(temp, add_generation_prompt=False, tokenize=False)


## set formatting
data_df["text"] = data_df[["Prompt", "Reply"]].apply(lambda x: formatted_text(x), axis=1)
print(data_df.iloc[0])
dataset = Dataset.from_pandas(data_df)

In [None]:
# import pandas as pd
# from datasets import Dataset


# tokenized_texts = tokenizer(dataset['text'])


# tokenized_text_lengths = [len(tokens) for tokens in tokenized_texts['input_ids']]


# filtered_indices = [i for i, length in enumerate(tokenized_text_lengths) if length <= 200]


# filtered_dataset = dataset.select(filtered_indices)


# print(filtered_dataset)


In [None]:
dataset

In [None]:
# def formatted_text(x):
#     return f'''<s>### Instruction
#     Use the provided input to create an instruction that could have been used to generate the response with an LLM.

#     ### Input:
#     {x["Prompt"]}

#     ### Response:
#     {x["Reply"]}</s>
#     '''
# data_df["text"] = data_df[["Prompt", "Reply"]].apply(lambda x: formatted_text(x), axis=1)
# print(data_df.iloc[0])
# dataset = Dataset.from_pandas(data_df)

In [None]:
# print(dataset['text'][0])

In [None]:

# Get quantized model
model = transformers.AutoModelForCausalLM.from_pretrained(base_model,
                                                          load_in_8bit=True,     # call for the 8 bit bnb quantized version
                                                          device_map='auto'
                                                          )

In [None]:
# Set PEFT adapter config (16:32)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

# target modules are currently selected for zephyr base model
config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj","k_proj","o_proj","gate_proj","up_proj","down_proj"],   # target all the linear layers for full finetuning
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM")

In [None]:
# stabilize output layer and layernorms
model = prepare_model_for_kbit_training(model, 8)
# Set PEFT adapter on model (Last step)
model = get_peft_model(model, config)

In [None]:
# Set Hyperparameters
MAXLEN=512
BATCH_SIZE=4
GRAD_ACC=4
OPTIMIZER='paged_adamw_8bit' # save memory
LR=5e-06                      # slightly smaller than pretraining lr | and close to LoRA standard

In [None]:
# Set training config
training_config = transformers.TrainingArguments(per_device_train_batch_size=BATCH_SIZE,
                                                 gradient_accumulation_steps=GRAD_ACC,
                                                 optim=OPTIMIZER,
                                                 learning_rate=LR,
                                                 fp16=True,            # consider compatibility when using bf16
                                                 logging_steps=10,
                                                 num_train_epochs = 2,
                                                 output_dir=lora_output,
                                                 remove_unused_columns=False,
                                                 )

In [None]:


# Set collator
data_collator = transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False, )


# Setup trainer
trainer = SFTTrainer(model=model,
                               train_dataset=dataset,
                               data_collator=data_collator,
                              # tokenizer= tokenizer,
                               args=training_config,
                               dataset_text_field="text",
                               max_seq_length= MAXLEN
                              
                            #    callbacks=[early_stop], need to learn, lora easily overfits
                              )

In [None]:
model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
trainer.train()

In [None]:
# trainer.save_model(lora_output)

In [None]:
# Get peft config
from peft import PeftConfig
config = PeftConfig.from_pretrained(lora_output)

In [None]:
# Get base model
model = transformers.AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path,
                                                          return_dict=True,
                                                          )

In [None]:
tokenizer = transformers.AutoTokenizer.from_pretrained(base_model,
                                                       add_eos_token=True
                                                       )
tokenizer.pad_token = tokenizer.eos_token
tokenizer.save_pretrained("zephyr-7b-beta-base-full")

In [None]:
# Load the Lora model
from peft import PeftModel
model = PeftModel.from_pretrained(model, lora_output)

# Get tokenizer
tokenizer = transformers.AutoTokenizer.from_pretrained(config.base_model_name_or_path,
                                                       add_eos_token=True
                                                       )
tokenizer.pad_token = tokenizer.eos_token

In [None]:
merged_model = model.merge_and_unload()

In [None]:
merged_model.save_pretrained(full_output)
tokenizer.save_pretrained(full_output)

In [None]:
# # push model to hub
# merged_model.push_to_hub(full_output)
# tokenizer.push_to_hub(full_output)

In [None]:
assert(False)

In [None]:
# load for inferencing
tokenizer = AutoTokenizer.from_pretrained(full_output)
model = transformers.AutoModelForCausalLM.from_pretrained(full_output)

In [None]:
from huggingface_hub import notebook_login

notebook_login()

In [None]:
# model.push_to_hub("shahidul034/KUETLLM_zephyr_base")
# tokenizer.push_to_hub("shahidul034/KUETLLM_zephyr_base")

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
# model_name = r"/home/rtx3090/Desktop/shakib/sql/sqlcoder/model"
tokenizer = AutoTokenizer.from_pretrained(full_output)
model = AutoModelForCausalLM.from_pretrained(
    full_output,
    trust_remote_code=True,
    # torch_dtype=torch.bfloat16,
    # load_in_8bit=True,
    load_in_4bit=True,
    device_map="auto",
    use_cache=True,
)

In [None]:
# # load base for comparison
# tokenizer = AutoTokenizer.from_pretrained(base_model)
# model = transformers.AutoModelForCausalLM.from_pretrained(base_model)

In [None]:
# model.save_pretrained("zephyr7b-beta-full")
# tokenizer.save_pretrained("zephyr7b-beta-full")

In [None]:

inp=f'''<s>### Instruction
    Use the provided input to create an instruction that could have been used to generate the response with an LLM.

    ### Input:
    Can you describe the location of Khulna City?

    ### Response:
    </s>
    '''

In [None]:
def answer_generate(inp,history):    
    inputs = tokenizer(inp, return_tensors="pt")
    generation_config = GenerationConfig(
        do_sample=True,
        top_k=1,
        temperature=0.1,
        max_new_tokens=256,
        pad_token_id=tokenizer.eos_token_id
    )

    import time
    st_time = time.time()
    outputs = model.generate(**inputs, generation_config=generation_config)
    ans=(tokenizer.decode(outputs[0], skip_special_tokens=True).split("<|assistant|>")[1])
    print(time.time()-st_time)
    return ans

In [None]:
import gradio as gr

demo = gr.ChatInterface(fn=answer_generate, examples=["hello", "hola", "merhaba"], title="Echo Bot")
demo.launch()

In [None]:
# Finetuned:
# <|system|>
#  You are a support chatbot who helps with user queries chatbot who always responds in the style of a professional.
# <|user|>
# What started Hajj historically?
# <|assistant|>
# Hajj started as a pilgrimage to the Kaaba, which was a sacred site for the ancient Arab tribes. The Kaaba was believed to be the house of Allah, and the pilgrimage was a way to honor and worship Him. The practice of Hajj became more formalized and structured over time, with specific rituals and traditions developing. Today, Hajj is a significant religious event for Muslims around the world.
# 108.65042996406555

# <|system|>
#  You are a support chatbot who helps with user queries chatbot who always responds in the style of a professional.
# <|user|>
# Tell me about the history of Hajj.
# <|assistant|>
# Hajj is one of the five pillars of Islam, and its history dates back to the time of the Prophet Muhammad. The first recorded Hajj was in 632 CE, during the Prophet's lifetime. The rituals of Hajj have evolved over time, with some changes made by the Prophet himself and others added by subsequent generations of Muslims. The Hajj pilgrimage has also played a significant role in Islamic history, serving as a symbol of unity and solidarity among Muslims from different parts of the world. Today, Hajj remains a central part of Islamic practice, attracting millions of pilgrims each year.
# 159.5786316394806

# <|system|>
#  You are a support chatbot who helps with user queries chatbot who always responds in the style of a professional.
# <|user|>
# what is the origin of hajj?
# <|assistant|>
# Hajj is a religious pilgrimage to Mecca, Saudi Arabia, that is an essential part of Islamic faith. The practice of hajj dates back to the time of the Prophet Muhammad, who established it as a religious obligation for Muslims. The pilgrimage is a symbolic reenactment of the experiences of the Prophet Muhammad and his wife, Khadijah, during their pilgrimage to Mecca. The practice of hajj has been an integral part of Islamic tradition for over 1,400 years.
# 134.81562113761902

# Base:
# <|system|>
#  You are a support chatbot who helps with user queries chatbot who always responds in the style of a professional.
# <|user|>
# What started Hajj historically?
# <|assistant|>
# The practice of Hajj, as one of the five pillars of Islam, can be traced back to the time of Prophet Muhammad (peace be upon him) in the 7th century CE. However, the concept of pilgrimage to the holy city of Mecca for worship and devotion can be traced back to pre-Islamic times, as evidenced by historical records and archaeological findings. The ancient Arab tribes used to visit the Kaaba, a cube-shaped structure in the center of the Grand Mosque in Mecca, as a place of worship and pilgrimage. The Prophet Muhammad's mission to unify and spread Islam brought a new dimension to the practice of Hajj, making it a mandatory religious obligation for all able-bodied Muslims who can afford it to perform once in their lifetime.
# 208.76106905937195

# <|system|>
#  You are a support chatbot who helps with user queries chatbot who always responds in the style of a professional.
# <|user|>
# Tell me about the history of Hajj.
# <|assistant|>
# Certainly! The practice of Hajj, which is one of the five pillars of Islam, has a rich and fascinating history that dates back over 1,400 years.

# The origins of Hajj can be traced back to the time of the Prophet Muhammad, who received the first revelation of the Quran in 610 CE. According to Islamic tradition, the Prophet Muhammad was commanded by God to make a pilgrimage to the holy city of Mecca, which at the time was a pagan center of worship.

# The Prophet Muhammad completed his first pilgrimage, known as the Farewell Pilgrimage, in 632 CE, just a few months before his death. This pilgrimage is considered to be the first official Hajj, and it established many of the rituals and traditions that are still observed today.

# Over the centuries, Hajj has played a significant role in the history of Islam and the Muslim world. It has been a source of spiritual renewal and inspiration for countless generations of Muslims, and it has also been a powerful force for unity and solidarity among the faithful.

# Throughout history, Hajj
# 297.3512797355652

# <|system|>
#  You are a support chatbot who helps with user queries chatbot who always responds in the style of a professional.
# <|user|>
# what is the origin of hajj?
# <|assistant|>
# The origin of Hajj can be traced back to the time of Prophet Muhammad (peace be upon him) in the 7th century CE. According to Islamic tradition, Hajj was first performed by Prophet Ibrahim (Abraham) and his son Prophet Ismail (Ishmael) as an act of submission to Allah (God).

# However, the annual pilgrimage to the holy city of Mecca, which is an integral part of Hajj, was instituted by Prophet Muhammad during his lifetime. The Prophet's teachings and practices related to Hajj have been preserved and followed by Muslims ever since.

# The purpose of Hajj, as stated in the Quran, is to commemorate the unity and brotherhood of the human race, and to affirm the oneness of Allah. It is also a time for Muslims to seek forgiveness, make supplications, and renew their faith and commitment to Allah.

# In summary, the origin of Hajj can be traced back to the time of Prophet Ibrahim, but its current form and significance are rooted in the teachings and practices of Prophet Muhammad.
# 295.48454308509827


In [None]:
print(model.get_memory_footprint()//1024**2)

In [None]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(torch.cuda.current_device())
# additional Info when using cuda
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))

In [None]:
def print_gpu_utilization():
    nvmlInit()
    handle = nvmlDeviceGetHandleByIndex(0)
    info = nvmlDeviceGetMemoryInfo(handle)
    print(f"GPU memory occupied: {info.used//1024**2} MB.")

print_gpu_utilization()