In [2]:
# Install necessary dependencies
!pip install huggingface_hub transformers datasets peft accelerate wandb




In [3]:

import os
import string
from huggingface_hub import login
from transformers import AutoModelForTokenClassification, AutoTokenizer, Trainer, TrainingArguments
from datasets import Dataset
import torch
import wandb
from google.colab import drive
from peft import LoraConfig, get_peft_model

from google.colab import userdata
HUGGING_API = userdata.get('HUGGINGFACE_READ_AND_WRITE')
GOOGLE_API = userdata.get('GOOGLE_API_KEY')
WANDB_key = userdata.get('WANDB')


In [4]:

# Login to Hugging Face
login(token=HUGGING_API)


In [5]:

# Login to Weights & Biases
wandb.login(key=WANDB_key)


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mnicksnlp[0m ([33mnicksnlp-university-of-helsinki[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [6]:

run = wandb.init(project='llama-7b-hallucination', job_type="training", anonymous="allow", name="test_1")


In [7]:

# Mount Google Drive to save models
drive.mount('/content/drive/')

Mounted at /content/drive/


In [8]:

# Step 5: Create and Prepare Dataset for Hallucination Detection
# This dataset will have two fields: 'text' and 'labels'


In [8]:

# 1. Load your dataset
data = [
    {"text": "The Eiffel Tower is located in Berlin, Germany.", "labels": [0, 0, 0, 0, 0, 0, 1, 1]},  # Hallucinated words: "Berlin", "Germany"
    {"text": "The capital of France is Paris.", "labels": [0, 0, 0, 0, 0, 0]},  # Correct sentence
    {"text": "The Amazon River flows through Asia.", "labels": [0, 0, 0, 0, 0, 1]},  # Hallucinated word: "Asia"
]


In [9]:

# 2. Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

tokenizer_config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [18]:
import string

def preprocess_data(batch, tokenizer, max_length=512):
    """
    Tokenize sentences and align labels to the tokens, including attention masks, input_ids, and tokens.
    """
    # Tokenize all the sentences in the batch at once
    tokenized_input = tokenizer(batch['text'], truncation=True, padding='max_length', max_length=max_length, return_tensors="pt", return_offsets_mapping=True)

    # Initialize aligned labels list to store labels for each example in the batch
    aligned_labels = []
    original_labels = [] # Store original sentence-level labels for each sentence
    input_ids = tokenized_input['input_ids']  # Get the input_ids for the batch

    # Iterate over each sentence in the batch
    for i, sentence in enumerate(batch['text']):
        labels = batch['labels'][i]  # Get the labels for the current sentence
        tokens = tokenizer.convert_ids_to_tokens(tokenized_input['input_ids'][i])  # Get the tokens for the current sentence
        attention_mask = tokenized_input['attention_mask'][i]  # Get the attention mask for the current sentence

        sentence_labels = []
        word_idx = 0  # To track the index of the word label

        # Align labels with tokens
        for token in tokens:
            if token in tokenizer.all_special_tokens:  # Special tokens like <s>, </s>, padding
                sentence_labels.append(-100)  # Special tokens get label -100
            elif token.startswith("▁"):  # Word token (starts with '▁')
                sentence_labels.append(labels[word_idx])  # Assign the word label
                word_idx += 1  # Move to the next label in the list
            elif token in string.punctuation:  # Check if token is a punctuation mark
                sentence_labels.append(0)  # Assign label 0 for punctuation
            else:  # Subword tokens (non-word token)
                sentence_labels.append(sentence_labels[-1])  # Take the same label as the previous token

        # Append the tokenized data including labels, attention mask, and input_ids
        aligned_labels.append(sentence_labels)
        original_labels.append(labels)  # Save the original labels (sentence-level)


    # Return the processed data including input_ids, attention_mask, aligned labels, tokens, and sentences
    return {
        'original_word_labels': original_labels,  # Sentence-level labels
        'input_ids': input_ids,               # The actual token IDs
        'tokens': [tokenizer.convert_ids_to_tokens(ids) for ids in input_ids],  # The tokens corresponding to input_ids
        'labels': aligned_labels,  # The aligned labels for each token
        'attention_mask': tokenized_input['attention_mask'],
        #'sentences': batch['text']  # Output the original sentences
    }


In [19]:

# 4. Convert data to Hugging Face dataset format
dataset = Dataset.from_dict({
    'text': [item['text'] for item in data],
    'labels': [item['labels'] for item in data]
})


In [20]:

# 5. Apply the preprocessing in batches
tokenized_data = dataset.map(lambda x: preprocess_data(x, tokenizer, max_length=128), batched=True)


Map:   0%|          | 0/3 [00:00<?, ? examples/s]

In [21]:

# Display the processed data
for x in tokenized_data[0].items():
  print(x, "LENGTH: ", len(x[1]))


('text', 'The Eiffel Tower is located in Berlin, Germany.') LENGTH:  47
('labels', [-100, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100]) LENGTH:  128
('input_ids', [1, 450, 382, 2593, 295, 23615, 338, 5982, 297, 5115, 29892, 9556, 29889, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2

In [None]:

# Load model for token classification
model = AutoModelForTokenClassification.from_pretrained("meta-llama/Llama-2-7b-hf", num_labels=2)


config.json:   0%|          | 0.00/609 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

In [None]:

# Apply PEFT (LoRA)
lora_config = LoraConfig(
    r=8,  # Low-rank factor (adjust this based on your needs)
    lora_alpha=16,  # Scaling factor for LoRA
    lora_dropout=0.1,  # Dropout rate for LoRA
    bias="none"  # Specify whether or not to use biases in LoRA layers
)


In [None]:

# Apply LoRA to the model
model = get_peft_model(model, lora_config)


In [None]:

# Define label names (0 = correct, 1 = hallucinated)
model.config.id2label = {0: "correct", 1: "hallucinated"}
model.config.label2id = {"correct": 0, "hallucinated": 1}


In [None]:

# Set up training arguments
training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/NLP/MODELS/FineTunedModel",  # Path to save the model
    evaluation_strategy="epoch",  # Evaluate the model every epoch
    learning_rate=2e-5,  # Learning rate
    per_device_train_batch_size=4,  # Training batch size
    per_device_eval_batch_size=8,  # Evaluation batch size
    num_train_epochs=3,  # Number of epochs
    weight_decay=0.01,  # Weight decay for optimization
    logging_dir="/content/drive/MyDrive/NLP/MODELS/Logs",  # Save logs to Google Drive
    logging_steps=10,  # Log every 10 steps
    push_to_hub=False,  # Set to True to upload the model after training
    report_to="wandb",  # Report metrics to Weights & Biases
    run_name="test_1",
    resume_from_checkpoint=True,  # Automatically resume from the last checkpoint
    save_steps=300,
    save_total_limit=3, # keep only the last 3 checkpoints
)


In [None]:

# Initialize the Trainer with model, training arguments, and datasets
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_data,  # Use the tokenized dataset for training
    eval_dataset=tokenized_data,  # Optional: Use the same dataset for evaluation
    tokenizer=tokenizer,
)


In [None]:

# Start training
trainer.train()


In [None]:

# Save the fine-tuned model to Google Drive
model_save_path = "/content/drive/MyDrive/NLP/MODELS/FineTunedModel"
trainer.save_model(model_save_path)


In [None]:

# Optionally, push the fine-tuned model to Hugging Face Hub
model.push_to_hub("your_huggingface_username/your_model_repo_name")
tokenizer.push_to_hub("your_huggingface_username/your_model_repo_name")


In [None]:

wandb.finish()


In [None]:

# Inference: Using the Fine-Tuned Model for Inference
def infer_with_model(input_text):
    # Tokenize the input text
    inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=128)

    # Predict the token labels (hallucination vs. correct)
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits  # Raw logits output from the model

    # Get the predicted labels (0 for correct, 1 for hallucinated)
    predicted_labels = torch.argmax(logits, dim=-1)

    # Decode the tokens from the input text
    tokens = tokenizer.tokenize(input_text)

    # Get the corresponding predicted labels for each token
    labeled_tokens = list(zip(tokens, predicted_labels[0].tolist()))

    # Create a list of hallucinated words
    hallucinated_words = [token for token, label in labeled_tokens if label == 1]

    return hallucinated_words


In [None]:

# Example usage of the inference function
input_text = "The Eiffel Tower is located in Berlin, Germany."
hallucinated_words = infer_with_model(input_text)

# Print the list of hallucinated words
print("Hallucinated words:")
print(hallucinated_words)
