In [1]:
## Nikolay Vorontsov,
## Fine-tuning llama dataset for hallucination detection
## Reference: https://chatgpt.com/share/67729fee-da9c-800b-808a-28a722cd3174

In [1]:
# Install necessary dependencies
!pip install transformers peft accelerate huggingface_hub
!pip install -q trl xformers wandb datasets einops sentencepiece bitsandbytes
!pip install -U datasets



In [2]:
from transformers import AutoTokenizer, BitsAndBytesConfig, TrainingArguments #, TextStreamer, AutoModelForCausalLM
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training
import torch, wandb
from datasets import load_dataset, Dataset
from trl import SFTTrainer
#import os
import string
from huggingface_hub import login
from transformers import AutoModelForTokenClassification, AutoTokenizer, TrainingArguments #, Trainer
import torch
import wandb
from google.colab import drive
from peft import LoraConfig #, get_peft_model

from google.colab import userdata
HUGGING_API = userdata.get('HUGGINGFACE_READ_AND_WRITE')
GOOGLE_API = userdata.get('GOOGLE_API_KEY')
WANDB_key = userdata.get('WANDB')


In [3]:
# Pre trained model
model_name = "meta-llama/Llama-2-7b-hf"
# Dataset name
dataset_name = "UNDEFINED"
# Hugging face repository link to save fine-tuned model(Create new repository in huggingface,copy and paste here)
new_model = "nicksnlp/llama-7B-hallucination"

In [4]:

# Login to Hugging Face
login(token=HUGGING_API)


In [5]:

# Login to Weights & Biases
wandb.login(key=WANDB_key)


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mnicksnlp[0m ([33mnicksnlp-university-of-helsinki[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [6]:

run = wandb.init(project='llama-7b-hallucination', job_type="training", anonymous="allow", name="test_1")


In [7]:

# Mount Google Drive to save models
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [8]:

# Step 5: Create and Prepare Dataset for Hallucination Detection
# This dataset will have two fields: 'text' and 'labels'


In [9]:

# 1. Load your dataset
data = [
    {"text": "The Eiffel Tower is located in Berlin, Germany.", "labels": [0, 0, 0, 0, 0, 0, 1, 1]},  # Hallucinated words: "Berlin", "Germany"
    {"text": "The capital of France is Paris.", "labels": [0, 0, 0, 0, 0, 0]},  # Correct sentence
    {"text": "The Amazon River flows through Asia.", "labels": [0, 0, 0, 0, 0, 1]},  # Hallucinated word: "Asia"
]


In [10]:
# 2. Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.add_eos_token = True
tokenizer.pad_token = tokenizer.eos_token
tokenizer.add_eos_token
tokenizer.padding_side = "right"

In [11]:

def preprocess_data(batch, tokenizer, max_length=512):
    """
    Tokenize sentences and align labels to the tokens, including attention masks, input_ids, and tokens.
    """
    # Tokenize all the sentences in the batch at once
    tokenized_input = tokenizer(batch['text'], truncation=True, padding='max_length', max_length=max_length, return_tensors="pt", return_offsets_mapping=True)

    # Initialize aligned labels list to store labels for each example in the batch
    aligned_labels = []
    original_labels = [] # Store original sentence-level labels for each sentence
    input_ids = tokenized_input['input_ids']  # Get the input_ids for the batch

    # Iterate over each sentence in the batch
    for i, sentence in enumerate(batch['text']):
        labels = batch['labels'][i]  # Get the labels for the current sentence
        tokens = tokenizer.convert_ids_to_tokens(tokenized_input['input_ids'][i])  # Get the tokens for the current sentence
        attention_mask = tokenized_input['attention_mask'][i]  # Get the attention mask for the current sentence

        sentence_labels = []
        word_idx = 0  # To track the index of the word label

        # Align labels with tokens
        for token in tokens:
            if token in tokenizer.all_special_tokens:  # Special tokens like <s>, </s>, padding
                sentence_labels.append(-100)  # Special tokens get label -100
            elif token.startswith("▁"):  # Word token (starts with '▁')
                sentence_labels.append(labels[word_idx])  # Assign the word label
                word_idx += 1  # Move to the next label in the list
            elif token in string.punctuation:  # Check if token is a punctuation mark
                sentence_labels.append(0)  # Assign label 0 for punctuation
            else:  # Subword tokens (non-word token)
                sentence_labels.append(sentence_labels[-1])  # Take the same label as the previous token

        # Append the tokenized data including labels, attention mask, and input_ids
        aligned_labels.append(sentence_labels)
        original_labels.append(labels)  # Save the original labels (sentence-level)


    # Return the processed data including input_ids, attention_mask, aligned labels, tokens, and sentences
    return {
        'original_word_labels': original_labels,  # Sentence-level labels
        'input_ids': input_ids,               # The actual token IDs
        'tokens': [tokenizer.convert_ids_to_tokens(ids) for ids in input_ids],  # The tokens corresponding to input_ids
        'labels': aligned_labels,  # The aligned labels for each token
        'attention_mask': tokenized_input['attention_mask'],
        #'sentences': batch['text']  # Output the original sentences
    }


In [12]:

# 4. Convert data to Hugging Face dataset format
dataset = Dataset.from_dict({
    'text': [item['text'] for item in data],
    'labels': [item['labels'] for item in data]
})


In [13]:

# 5. Apply the preprocessing in batches
tokenized_data = dataset.map(lambda x: preprocess_data(x, tokenizer, max_length=128), batched=True)


Map:   0%|          | 0/3 [00:00<?, ? examples/s]

In [14]:

# Display the processed data
for x in tokenized_data[0].items():
  print(x, "LENGTH: ", len(x[1]))


('text', 'The Eiffel Tower is located in Berlin, Germany.') LENGTH:  47
('labels', [-100, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100]) LENGTH:  128
('original_word_labels', [0, 0, 0, 0, 0, 0, 1, 1]) LENGTH:  8
('input_ids', [1, 450, 382, 2593, 295, 23615, 338, 5982, 297, 5115, 29892, 9556, 29889, 2, 2, 2, 2, 2, 2, 2,

In [15]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit= True,
    bnb_4bit_quant_type= "nf4",
    bnb_4bit_compute_dtype= torch.float16,
    bnb_4bit_use_double_quant= False,
)

# Load model for token classification
model = AutoModelForTokenClassification.from_pretrained(
    model_name, #"meta-llama/Llama-2-7b-hf" defined earlier
    num_labels=2,
    quantization_config=bnb_config,
    device_map={"": 0}
)

#model = AutoModelForCausalLM.from_pretrained(
#    model_name,
#    quantization_config=bnb_config,
#    device_map={"": 0}
#)

model = prepare_model_for_kbit_training(model)
model.config.use_cache = False # silence the warnings. Please re-enable for inference!
model.config.pretraining_tp = 1


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of LlamaForTokenClassification were not initialized from the model checkpoint at meta-llama/Llama-2-7b-hf and are newly initialized: ['score.bias', 'score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [16]:
# Inspecting the layers of the model
for name, module in model.named_modules():
    print(name)


model
model.embed_tokens
model.layers
model.layers.0
model.layers.0.self_attn
model.layers.0.self_attn.q_proj
model.layers.0.self_attn.k_proj
model.layers.0.self_attn.v_proj
model.layers.0.self_attn.o_proj
model.layers.0.self_attn.rotary_emb
model.layers.0.mlp
model.layers.0.mlp.gate_proj
model.layers.0.mlp.up_proj
model.layers.0.mlp.down_proj
model.layers.0.mlp.act_fn
model.layers.0.input_layernorm
model.layers.0.post_attention_layernorm
model.layers.1
model.layers.1.self_attn
model.layers.1.self_attn.q_proj
model.layers.1.self_attn.k_proj
model.layers.1.self_attn.v_proj
model.layers.1.self_attn.o_proj
model.layers.1.self_attn.rotary_emb
model.layers.1.mlp
model.layers.1.mlp.gate_proj
model.layers.1.mlp.up_proj
model.layers.1.mlp.down_proj
model.layers.1.mlp.act_fn
model.layers.1.input_layernorm
model.layers.1.post_attention_layernorm
model.layers.2
model.layers.2.self_attn
model.layers.2.self_attn.q_proj
model.layers.2.self_attn.k_proj
model.layers.2.self_attn.v_proj
model.layers.2.

In [18]:
peft_config = LoraConfig(
    lora_alpha=8,
    lora_dropout=0.1,
    r=16,
    bias="none",
    task_type="TOKEN_CLS",  # For token classification
# Must be one of the following task types: SEQ_CLS, SEQ_2_SEQ_LM, CAUSAL_LM, TOKEN_CLS, QUESTION_ANS, FEATURE_EXTRACTION.
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",  # Attention layers
        "gate_proj", "up_proj",  # MLP components
        "score"  # The final classification head layer
    ]
)


In [19]:

# Define label names (0 = correct, 1 = hallucinated)
model.config.id2label = {0: "correct", 1: "hallucinated"}
model.config.label2id = {"correct": 0, "hallucinated": 1}


In [20]:
checkpoint_dir = "/content/drive/MyDrive/NLP/MODELS/FineTunedModel"

In [21]:
training_arguments = TrainingArguments(
    output_dir= checkpoint_dir,
    num_train_epochs=1,
    per_device_train_batch_size=8,
    gradient_accumulation_steps=2,
    optim="paged_adamw_8bit",
    save_steps=300,
    save_total_limit=3, # keep only the last 3 checkpoints
    logging_steps=10,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.3,
    group_by_length=True,
    lr_scheduler_type="linear",
    report_to="wandb",
    run_name="test_1",
    resume_from_checkpoint=True  # Automatically resume from the last checkpoint
)


In [22]:
# Setting sft parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=tokenized_data,
    #eval_dataset=tokenized_data,  # Optional: Use the same dataset for evaluation
    peft_config=peft_config,
    #max_seq_length=None,
    #dataset_text_field="text",
    processing_class=tokenizer,
    args=training_arguments,
    #packing=False
)


TypeError: modules_to_save cannot be applied to modules of type <class 'peft.tuners.lora.layer.Linear'>

In [None]:

# Start training
trainer.train()


In [None]:
# Save the fine-tuned model
new_model_local_path = checkpoint_dir + "/new_model_local"
trainer.model.save_pretrained(new_model_local_path)
wandb.finish()
model.config.use_cache = True
model.eval()

"""
# Save the fine-tuned model to Google Drive
model_save_path = "/content/drive/MyDrive/NLP/MODELS/FineTunedModel"
trainer.save_model(model_save_path)


In [None]:

# Inference: Using the Fine-Tuned Model for Inference
def infer_with_model(input_text):
    # Tokenize the input text
    inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=128)

    # Predict the token labels (hallucination vs. correct)
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits  # Raw logits output from the model

    # Get the predicted labels (0 for correct, 1 for hallucinated)
    predicted_labels = torch.argmax(logits, dim=-1)

    # Decode the tokens from the input text
    tokens = tokenizer.tokenize(input_text)

    # Get the corresponding predicted labels for each token
    labeled_tokens = list(zip(tokens, predicted_labels[0].tolist()))

    # Create a list of hallucinated words
    hallucinated_words = [token for token, label in labeled_tokens if label == 1]

    return hallucinated_words


In [None]:

# Example usage of the inference function
input_text = "The Eiffel Tower is located in Berlin, Germany."
hallucinated_words = infer_with_model(input_text)

# Print the list of hallucinated words
print("Hallucinated words:")
print(hallucinated_words)


In [None]:
## NEXT MERGE AND PUSH MODEL

In [None]:
# Load base model with quantization to reduce memory usage
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map={"": 0},
    quantization_config=bnb_config,  # Defined earlier
)

# Load the PEFT model and merge weights
model = PeftModel.from_pretrained(base_model, new_model_local_path)
model = model.merge_and_unload()

# Reload tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [None]:
model.push_to_hub(new_model)
tokenizer.push_to_hub(new_model)