In [1]:
from huggingface_hub import notebook_login
from dotenv import load_dotenv
import os

import torch 
from datasets import load_dataset, Dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig, TrainingArguments

from peft import prepare_model_for_kbit_training
from peft import LoraConfig
from peft import get_peft_model

from trl import SFTTrainer

In [2]:
# Load environment variables from .env file
load_dotenv()
# 
# Retrieve the OPENAI_API_KEY
# OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
# print(OPENAI_API_KEY)

HUGGINGFACE_WRITE_TOKEN = os.getenv("HUGGINGFACE_WRITE_TOKEN")
# print(HUGGINGFACE_WRITE_TOKEN)

WEAVIATE_API_KEY = os.getenv("WEAVIATE_API_KEY")
WEAVIATE_CLUSTER = os.getenv("WEAVIATE_CLUSTER")



In [3]:
data = load_dataset("samsum", split="train",
                    trust_remote_code=True)
data

Dataset({
    features: ['id', 'dialogue', 'summary'],
    num_rows: 14732
})

In [4]:
df = data.to_pandas()
df.head()

Unnamed: 0,id,dialogue,summary
0,13818513,Amanda: I baked cookies. Do you want some?\r\...,Amanda baked cookies and will bring Jerry some...
1,13728867,Olivia: Who are you voting for in this electio...,Olivia and Olivier are voting for liberals in ...
2,13681000,"Tim: Hi, what's up?\r\nKim: Bad mood tbh, I wa...",Kim may try the pomodoro technique recommended...
3,13730747,"Edward: Rachel, I think I'm in ove with Bella....",Edward thinks he is in love with Bella. Rachel...
4,13728094,Sam: hey overheard rick say something\r\nSam:...,"Sam is confused, because he overheard Rick com..."


In [5]:
df['text'] = df[['dialogue', 'summary']].apply(lambda x: "###Human: Summarize this following dialogue: " + x["dialogue"] + "\n###Assistant: " +x["summary"], axis=1)

In [6]:
df['text'][0]

"###Human: Summarize this following dialogue: Amanda: I baked  cookies. Do you want some?\r\nJerry: Sure!\r\nAmanda: I'll bring you tomorrow :-)\n###Assistant: Amanda baked cookies and will bring Jerry some tomorrow."

In [7]:
data = Dataset.from_pandas(df)
data

Dataset({
    features: ['id', 'dialogue', 'summary', 'text'],
    num_rows: 14732
})

In [8]:
import torch
print(torch.cuda.is_available())  # Should return True


True


# llm Model and Tokenizer

In [9]:
# # from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
# # from optimum.gptq import GPTQConfig



# # Load tokenizer
# tokenizer = AutoTokenizer.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.1-GPTQ")

# # Configure quantization
# quantization_config = GPTQConfig(
#     bits=4, 
#     disable_exllama=False,  # Disable Exllama as per the error suggestion
#     tokenizer=tokenizer
# )

# # Load model with device map
# model = AutoModelForCausalLM.from_pretrained(
#     "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ",
#     quantization_config=quantization_config,
#     device_map="auto"
# )

In [10]:
# aa

In [11]:
# https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GPTQ
tokenizer = AutoTokenizer.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.1-GPTQ",
                                          token=HUGGINGFACE_WRITE_TOKEN)


tokenizer.eos_token
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right'


# https://huggingface.co/docs/transformers/en/main_classes/quantization
quantization_confiq_loading = GPTQConfig(bits=4, 
                                         use_exllama= False,
                                         tokenizer=tokenizer)


model = AutoModelForCausalLM.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.1-GPTQ",
                                             quantization_config=quantization_confiq_loading,
                                             device_map="auto",
                                             token=HUGGINGFACE_WRITE_TOKEN)

CUDA extension not installed.
CUDA extension not installed.


In [12]:
print(model)

MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32000, 4096, padding_idx=0)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralSdpaAttention(
          (rotary_emb): MistralRotaryEmbedding()
          (k_proj): QuantLinear()
          (o_proj): QuantLinear()
          (q_proj): QuantLinear()
          (v_proj): QuantLinear()
        )
        (mlp): MistralMLP(
          (act_fn): SiLU()
          (down_proj): QuantLinear()
          (gate_proj): QuantLinear()
          (up_proj): QuantLinear()
        )
        (input_layernorm): MistralRMSNorm()
        (post_attention_layernorm): MistralRMSNorm()
      )
    )
    (norm): MistralRMSNorm()
  )
  (lm_head): Linear(in_features=4096, out_features=32000, bias=False)
)


In [13]:
model.config.use_cache = False
model.config.pretraining_tp=1
model.gradient_checkpointing_enable()

## Quantization with peft LoRa

In [14]:
model = prepare_model_for_kbit_training(model)
# model

In [15]:
peft_config = LoraConfig(r=16, 
                         lora_alpha=18, 
                         lora_dropout=0.05, 
                         bias="none", 
                         task_type="CASUAL_LM", 
                         target_modules= ["q_proj", "v_proj"]
                         )

In [16]:
model = get_peft_model(model, peft_config)
model

PeftModel(
  (base_model): LoraModel(
    (model): MistralForCausalLM(
      (model): MistralModel(
        (embed_tokens): Embedding(32000, 4096, padding_idx=0)
        (layers): ModuleList(
          (0-31): 32 x MistralDecoderLayer(
            (self_attn): MistralSdpaAttention(
              (rotary_emb): MistralRotaryEmbedding()
              (k_proj): QuantLinear()
              (o_proj): QuantLinear()
              (q_proj): lora.QuantLinear(
                (base_layer): QuantLinear()
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict(

## Training

In [17]:
training_arguments = TrainingArguments(output_dir="misteral-finetuned-samsum",
                                       per_device_train_batch_size=8,
                                       gradient_accumulation_steps=1,
                                       optim="paged_adamw_32bit",
                                       learning_rate=2e-4,
                                       warmup_ratio=0.1,
                                       lr_scheduler_type="cosine",
                                       save_strategy="epoch",
                                       weight_decay=0.01,
                                       logging_steps=100,
                                       num_train_epochs=1,
                                       max_steps=259,
                                       fp16=True,
                                       push_to_hub=True
                                       )

In [19]:
trainer =SFTTrainer(model=model,
                    train_dataset=data,
                    peft_config=peft_config,
                    dataset_text_field="text",
                    args=training_arguments,
                    tokenizer=tokenizer,
                    packing=False,
                    max_seq_length=512
                    )

Map:   0%|          | 0/14732 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs


In [20]:
trainer.train()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33msahilkumar158[0m ([33msahilthegnius[0m). Use [1m`wandb login --relogin`[0m to force relogin


  0%|          | 0/259 [00:00<?, ?it/s]

  attn_output = torch.nn.functional.scaled_dot_product_attention(


{'loss': 1.9404, 'grad_norm': 0.7724002003669739, 'learning_rate': 0.0001542186650271374, 'epoch': 0.05}
{'loss': 1.7698, 'grad_norm': 0.7294683456420898, 'learning_rate': 3.000798140601e-05, 'epoch': 0.11}
{'train_runtime': 934.6192, 'train_samples_per_second': 2.217, 'train_steps_per_second': 0.277, 'train_loss': 1.8313502013453185, 'epoch': 0.14}


TrainOutput(global_step=259, training_loss=1.8313502013453185, metrics={'train_runtime': 934.6192, 'train_samples_per_second': 2.217, 'train_steps_per_second': 0.277, 'total_flos': 689014811787264.0, 'train_loss': 1.8313502013453185, 'epoch': 0.14060803474484257})

In [21]:
trainer.push_to_hub()

events.out.tfevents.1717472218.MSI:   0%|          | 0.00/6.25k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/sahilkumar4ai/misteral-finetuned-samsum/commit/bbabcfe33e786d1795a55f1e15cff9f861d8cf62', commit_message='End of training', commit_description='', oid='bbabcfe33e786d1795a55f1e15cff9f861d8cf62', pr_url=None, pr_revision=None, pr_num=None)