In [None]:
!pip install huggingface_hub transformers datasets peft accelerate wandb


In [None]:
import os
from huggingface_hub import login
from transformers import AutoModelForTokenClassification, AutoTokenizer, Trainer, TrainingArguments
from datasets import Dataset
import torch
import wandb
from google.colab import drive


In [None]:
# Use your Hugging Face and Weights & Biases API keys for authentication
HUGGINGFACE_API_KEY = 'your_huggingface_api_key'  # Replace with your Hugging Face API Key
WANDB_API_KEY = 'your_wandb_api_key'  # Replace with your Weights & Biases API Key

# Login to Hugging Face
login(token=HUGGINGFACE_API_KEY)

# Login to Weights & Biases
wandb.login(key=WANDB_API_KEY)


In [None]:
# Mount Google Drive to save models
drive.mount('/content/drive/')


Step 5: Create and Prepare Dataset for Hallucination Detection
This dataset will have two fields:

text: The sentence.
labels: The label for each token in the sentence (0 for correct, 1 for hallucinated).
For simplicity, we'll manually label some sentences.

In [None]:
# Example dataset with manual labeling of hallucinations
data = [
    {"text": "The Eiffel Tower is located in Berlin, Germany.", "labels": [0, 1, 1, 0, 0]},  # "Berlin, Germany" is hallucinated
    {"text": "The capital of France is Paris.", "labels": [0, 0, 0, 0, 0]},  # Correct sentence, no hallucinations
    {"text": "The Amazon River flows through Asia.", "labels": [0, 0, 0, 1]},  # "Asia" is hallucinated
]

# Convert the dictionary to a Hugging Face dataset
dataset = Dataset.from_dict(data)


In [None]:

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")

# Define the tokenization function
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=128)

# Apply the tokenization function
tokenized_datasets = dataset.map(tokenize_function, batched=True)


In [None]:
# Load model for token classification
model = AutoModelForTokenClassification.from_pretrained("meta-llama/Llama-2-7b-hf", num_labels=2)

# Define label names (0 = correct, 1 = hallucinated)
model.config.id2label = {0: "correct", 1: "hallucinated"}
model.config.label2id = {"correct": 0, "hallucinated": 1}


In [None]:
# Set up training arguments
training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/NLP/MODELS/FineTunedModel",  # Path to save the model
    evaluation_strategy="epoch",  # Evaluate the model every epoch
    learning_rate=2e-5,  # Learning rate
    per_device_train_batch_size=4,  # Training batch size
    per_device_eval_batch_size=8,  # Evaluation batch size
    num_train_epochs=3,  # Number of epochs
    weight_decay=0.01,  # Weight decay for optimization
    logging_dir="/content/drive/MyDrive/NLP/MODELS/Logs",  # Save logs to Google Drive
    logging_steps=10,  # Log every 10 steps
    push_to_hub=False,  # Set to True to upload the model after training
    report_to="wandb",  # Report metrics to Weights & Biases
)


In [None]:
# Initialize the Trainer with model, training arguments, and datasets
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets,  # Use the tokenized dataset for training
    eval_dataset=tokenized_datasets,  # Optional: Use the same dataset for evaluation
    tokenizer=tokenizer,
)

In [None]:

# Start training
trainer.train()

In [None]:
# Save the fine-tuned model to Google Drive
model_save_path = "/content/drive/MyDrive/NLP/MODELS/FineTunedModel"
trainer.save_model(model_save_path)


In [None]:

# Optionally, push the fine-tuned model to Hugging Face Hub
model.push_to_hub("your_huggingface_username/your_model_repo_name")
tokenizer.push_to_hub("your_huggingface_username/your_model_repo_name")


INFERENCE:

In [None]:
import torch

# Inference function for hallucination detection
def infer_with_model(input_text):
    # Tokenize the input text
    inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=128)
    
    # Predict the token labels (hallucination vs. correct)
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits  # Raw logits output from the model

    # Get the predicted labels (0 for correct, 1 for hallucinated)
    predicted_labels = torch.argmax(logits, dim=-1)

    # Decode the tokens from the input text
    tokens = tokenizer.tokenize(input_text)
    
    # Get the corresponding predicted labels for each token
    labeled_tokens = list(zip(tokens, predicted_labels[0].tolist()))
    
    # Create a list of hallucinated words
    hallucinated_words = [token for token, label in labeled_tokens if label == 1]

    return hallucinated_words

# Example usage of the inference function
input_text = "The Eiffel Tower is located in Berlin, Germany."
hallucinated_words = infer_with_model(input_text)

# Print the list of hallucinated words
print("Hallucinated words:")
print(hallucinated_words)


REFERENCE:
https://chatgpt.com/share/67729fee-da9c-800b-808a-28a722cd3174