In [None]:
import torch
from datasets import load_dataset, Dataset
from peft import LoraConfig, AutoPeftModelForCausalLM, prepare_model_for_kbit_training, get_peft_model
from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig, TrainingArguments
from trl import SFTTrainer
import os
import pandas as pd
from pynvml import *
 
def print_gpu_utilization():
    nvmlInit()
    handle = nvmlDeviceGetHandleByIndex(0)
    info = nvmlDeviceGetMemoryInfo(handle)
    print(f"GPU memory occupied: {info.used//1024**2} MB.")
 
 
def print_summary(result):
    print(f"Time: {result.metrics['train_runtime']:.2f}")
    print(f"Samples/second: {result.metrics['train_samples_per_second']:.2f}")
    print_gpu_utilization()
 
# zephyr_base_model = r'/ddn/data/fcit/wsalhalabi/wsalhalabi/zephyr7b'
zephyr_base_model="/ddn/data/fcit/wsalhalabi/wsalhalabi/thebloke_mistral"
# login("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")
 
# data = load_dataset("arbitropy/kuetdata", split="train")
# data_df = data.to_pandas()
# data_df["text"] = data_df[["Question", "Answer"]].apply(lambda x: "###Human: " + str(x["Question"]) + "n###Assistant: " + str(x["Answer"]), axis=1)
data_df=pd.read_csv(r"/ddn/data/fcit/wsalhalabi/wsalhalabi/data/train.csv")
print(data_df.iloc[0])
dataset = Dataset.from_pandas(data_df)
 
tokenizer = AutoTokenizer.from_pretrained(zephyr_base_model)
tokenizer.pad_token = tokenizer.eos_token
 
 
model = AutoModelForCausalLM.from_pretrained(
                              zephyr_base_model,
                              load_in_8bits
                              device_map="auto"
                     
                        )
print("model is ready!!!")
model.config.use_cache=False
model.config.pretraining_tp=1
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)
peft_config = LoraConfig(
        r=16, lora_alpha=16, lora_dropout=0.05, bias="none", task_type="CAUSAL_LM", target_modules=["q_proj", "v_proj"]
    )
model = get_peft_model(model, peft_config)
training_arguments = TrainingArguments(
        output_dir="mistral7b-finetuned",
        per_device_train_batch_size=8,
        gradient_accumulation_steps=1,
        optim="paged_adamw_32bit",
        learning_rate=2e-4,
        lr_scheduler_type="cosine",
        save_strategy="epoch",
        logging_steps=100,
        num_train_epochs=1,
        max_steps=250, #250*8=2000 data samples only trained
        fp16=False,
        push_to_hub=False
    )
trainer = SFTTrainer(
        model=model,
        train_dataset=dataset,
        peft_config=peft_config,
        dataset_text_field="text",
        args=training_arguments,
        tokenizer=tokenizer,
        packing=False,
        max_seq_length=512
    )
print_gpu_utilization()
trainer.train()
print_gpu_utilization()