## Preprocessing

In [12]:
import pandas as pd
from nltk.tokenize import word_tokenize, sent_tokenize
import stanza
import re
from nltk.corpus import stopwords
import contractions
from stanza.utils.conll import CoNLL

# Initialize Stanza NLP model
stanza.download('en')
nlp = stanza.Pipeline('en', processors='tokenize,lemma,pos')

# Load Datasets 
train_df = pd.read_json('data/labeled_data/labeled-train.model-agnostic.json')
test_df = pd.read_json('data/labeled_data/labeled-test.model-agnostic.json')

Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.9.0.json:   0%|   …

2024-12-10 20:51:55 INFO: Downloaded file to C:\Users\Admin\stanza_resources\resources.json
2024-12-10 20:51:55 INFO: Downloading default packages for language: en (English) ...
2024-12-10 20:51:56 INFO: File exists: C:\Users\Admin\stanza_resources\en\default.zip
2024-12-10 20:51:58 INFO: Finished downloading models and saved to C:\Users\Admin\stanza_resources
2024-12-10 20:51:58 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.9.0.json:   0%|   …

2024-12-10 20:51:59 INFO: Downloaded file to C:\Users\Admin\stanza_resources\resources.json
2024-12-10 20:51:59 INFO: Loading these models for language: en (English):
| Processor | Package           |
---------------------------------
| tokenize  | combined          |
| mwt       | combined          |
| pos       | combined_charlm   |
| lemma     | combined_nocharlm |

2024-12-10 20:51:59 INFO: Using device: cpu
2024-12-10 20:51:59 INFO: Loading: tokenize
2024-12-10 20:51:59 INFO: Loading: mwt
2024-12-10 20:51:59 INFO: Loading: pos
2024-12-10 20:52:00 INFO: Loading: lemma
2024-12-10 20:52:00 INFO: Done loading processors!


Unnamed: 0,hyp,tgt,src,ref,task,model,label,p(Hallucination)
0,"Don't worry, it's only temporary.",Don't worry. It's only temporary.,Не волнуйся. Это только временно.,either,MT,,Not Hallucination,0.0
1,Tom is never where he should be.,Tom is never where he's supposed to be.,"Тома никогда нет там, где он должен быть.",either,MT,,Not Hallucination,0.25
2,It's hard for me to work with Tom.,I have trouble working with Tom.,Мне сложно работать с Томом.,either,MT,,Not Hallucination,0.25
3,"Water, please.",I'd like some water.,"Воду, пожалуйста.",either,MT,,Hallucination,1.0
4,I didn't expect Tom to betray me.,I didn't think that Tom would betray me.,"Я не ожидал, что Том предаст меня.",either,MT,,Not Hallucination,0.25


In [14]:
# Filter for Specific Task
def load_and_filter_data(df, task_type, column_to_keep1, column_to_keep2, column_to_keep3):
    # Filter for the specified task type
    df = df[df['task'].isin([task_type])].reset_index(drop=True)
    
    # Define columns to drop, keeping only "hyp" and the specified column ("src" or "tgt")
    columns_to_drop = [col for col in df.columns if col not in ['hyp','label', column_to_keep1, column_to_keep2, column_to_keep3]]
    df = df.drop(columns=columns_to_drop)
    
    return df

# Load train, test, and validation datasets with specified task type and column to keep
pg_train_df = load_and_filter_data(train_df, 'PG', 'src', 'label', 'p(Hallucination)')
pg_test_df = load_and_filter_data(test_df, 'PG', 'src', 'label', 'p(Hallucination)')

pg_train_df.head()

Unnamed: 0,hyp,src,label,p(Hallucination)
0,"You're not alone, claire- -","You're not alone, Claire.",Not Hallucination,0.0
1,"Who told you to throw acid at Vargas, hmmm?","Who told you to throw acid at Vargas, hmm?",Not Hallucination,0.0
2,♪ Where the pure angel merges with the antic s...,Where the pure angel merges with the antic Sphinx,Not Hallucination,0.0
3,Where is it written what is it I'm meant to be?,Where is it written what is it I'm meant to be,Not Hallucination,0.0
4,We'll find the skipper and then we'll go home.,We'll find the skipper and then we'll go home.,Not Hallucination,0.0


In [26]:
# Text Segmentation, Normalization, and Stopword Removal Function
def normalize_text(text, remove_stopwords=False):
    expanded_text = contractions.fix(text).lower()  # Expand contractions and lowercase
    text_no_punctuation = re.sub(r'[^\w\s]', '', expanded_text) # Remove punctuation
    
    # Optionally remove stopwords
    if remove_stopwords:
        stop_words = set(stopwords.words('english'))
        # Tokenize to remove stopwords and then join back to a single string
        text_no_stopwords = ' '.join(word for word in text_no_punctuation.split() if word not in stop_words)
        return text_no_stopwords
    
    return text_no_punctuation

# Lemmatization Function
def lemmatize_text(text):
    doc = nlp(text)
    return [word.lemma for sentence in doc.sentences for word in sentence.words]

# Apply Preprocessing Steps to Dataset
def preprocess_dataset(df, column):
    # Normalize text
    df['hyp_normalized'] = df['hyp'].apply(normalize_text)
    df[f'{column}_normalized'] = df[column].apply(normalize_text)
    
    # Sentence Segmentation
    df['hyp_sentences'] = df['hyp_normalized'].apply(sent_tokenize)
    df[f'{column}_sentences'] = df[f'{column}_normalized'].apply(sent_tokenize)
    
    # Tokenization
    df['hyp_tokens'] = df['hyp_sentences'].apply(lambda sentences: [word_tokenize(sentence) for sentence in sentences])
    df[f'{column}_tokens'] = df[f'{column}_sentences'].apply(lambda sentences: [word_tokenize(sentence) for sentence in sentences])
    
    # Lemmatization
    df['hyp_lemmas'] = df['hyp_normalized'].apply(lemmatize_text)
    df[f'{column}_lemmas'] = df[f'{column}_normalized'].apply(lemmatize_text)
    
    return df

# pg_train_df = preprocess_dataset(pg_train_df, 'src')
# pg_test_df = preprocess_dataset(pg_test_df, 'src')

In [25]:
# Save preprocessed dataset to a CSV file
preprocessed_file_path = "data/labeled_data/preprocessed/pg_preprocessed_dataset.csv"
pg_train_df.to_csv(preprocessed_file_path, index=False)

print(f"Preprocessed dataset saved to {preprocessed_file_path}")

Preprocessed dataset saved to data/labeled_data/preprocessed/pg_preprocessed_dataset.csv


In [44]:
# Check class distribution
print(pg_train_df['label'].value_counts())

label
Not Hallucination    5326
Hallucination        4674
Name: count, dtype: int64


## Naive Bayes
**1. Importing Required Libraries**

**2. Loading the Data**
- Load the labeled dataset from the CSV file (`pg_train_label.csv`).
- Remove any rows with missing values to ensure clean data.

**3. Combining Text Features**
- Combine the two columns (`hyp_lemmas` and `src_lemmas`) that are most likely the core inputs in our dataset for detecting hallucinations. By combining these columns, you're essentially merging the two key pieces of information that your model needs.
- This merged text will be used as input for the model, as both columns may contain complementary information about the task.

**4. Splitting the Data**
- **`X`**: Features (in this case, the combined text data).
- **`y`**: Labels (`label` column).

**5. Converting Text to Numerical Features (TF-IDF Vectorization)**
- **TF-IDF Vectorization**:
  - Converts the text into numerical features by analyzing the importance of each word (or n-grams) in the document.
  - **`ngram_range=(1, 2)`**: Includes single words (unigrams) and pairs of consecutive words (bigrams).
  - **`max_features=5000`**: Limits the vocabulary to the top 5,000 most important words or phrases based on their TF-IDF score.
- Apply the vectorizer to the training data (`fit_transform`) and the testing data (`transform`).

**6. Training a Naive Bayes Classifier**
- A **Multinomial Naive Bayes classifier** is initialized and trained using the vectorized training data (`X_train_vec`) and corresponding labels (`y_train`).
- This algorithm is suitable for text classification problems as it assumes word frequencies follow a multinomial distribution.

**7. Making Predictions**
- Use the trained model to predict the labels for the test set (`X_test_vec`).

**8. Evaluating the Model**
- **`accuracy_score`**: Computes the percentage of correctly predicted labels in the test set.
- **`classification_report`**: Provides a detailed evaluation of the model’s performance, including:
  - **Precision**: How many of the predicted positives are true positives.
  - **Recall**: How many of the actual positives were correctly identified.
  - **F1-score**: Harmonic mean of precision and recall.
  - **Support**: Number of actual occurrences for each label.

In [41]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

# Use preprocessed training and testing datasets
train_data = pg_train_df.dropna()
test_data = pg_test_df.dropna()

# Combine `hyp_lemmas` and `src_lemmas` into a single text feature for both train and test sets
train_data['combined_text'] = train_data['hyp_lemmas'].apply(lambda x: " ".join(x)) + " " + train_data['src_lemmas'].apply(lambda x: " ".join(x))
test_data['combined_text'] = test_data['hyp_lemmas'].apply(lambda x: " ".join(x)) + " " + test_data['src_lemmas'].apply(lambda x: " ".join(x))

# Extract features and labels
X_train = train_data['combined_text']
y_train = train_data['label']

X_test = test_data['combined_text']
y_test = test_data['label']

# Vectorize text using TF-IDF
vectorizer = TfidfVectorizer(ngram_range=(1, 2), max_features=5000)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Train a Naive Bayes classifier
naive_bayes = MultinomialNB()
naive_bayes.fit(X_train_vec, y_train)

# Predict on test set
y_pred = naive_bayes.predict(X_test_vec)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 0.536
Classification Report:
                   precision    recall  f1-score   support

    Hallucination       0.63      0.44      0.52       212
Not Hallucination       0.48      0.66      0.55       163

         accuracy                           0.54       375
        macro avg       0.55      0.55      0.54       375
     weighted avg       0.56      0.54      0.53       375



### Hyperparameter

In [43]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report
from nltk.corpus import stopwords

# Use preprocessed training and testing datasets
train_data = pg_train_df.dropna()
test_data = pg_test_df.dropna()

# Combine `hyp_lemmas` and `src_lemmas` into a single text feature for both train and test sets
train_data['combined_text'] = train_data['hyp_lemmas'].apply(lambda x: " ".join(x)) + " " + train_data['src_lemmas'].apply(lambda x: " ".join(x))
test_data['combined_text'] = test_data['hyp_lemmas'].apply(lambda x: " ".join(x)) + " " + test_data['src_lemmas'].apply(lambda x: " ".join(x))

# Extract features and labels
X_train = train_data['combined_text']
y_train = train_data['label']

X_test = test_data['combined_text']
y_test = test_data['label']

# Define the pipeline
pipeline = Pipeline([
    ('tfidf', TfidfVectorizer()),  # Vectorizer
    ('nb', MultinomialNB())       # Naive Bayes classifier
])

# Define the parameter grid
param_grid = {
    'tfidf__max_features': [2000, 3000, 5000, 10000],  # Increase feature size
    'tfidf__ngram_range': [(1, 1), (1, 2)],            # Focus on unigrams and bigrams
    'tfidf__min_df': [1, 2, 3],                        # Tune minimum document frequency
    'tfidf__stop_words': [None, stopwords.words('english')],  # Include stopword removal
    'nb__alpha': [1.0, 0.5, 0.1, 0.01]                # Fine-tune smoothing
}

# Perform grid search with stratified cross-validation
stratified_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
grid_search = GridSearchCV(pipeline, param_grid, cv=stratified_cv, scoring='accuracy', verbose=1, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Get the best parameters and model
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

# Evaluate on the test set
y_pred = best_model.predict(X_test)
print(f"Best Parameters: {best_params}")
print("Classification Report:")
print(classification_report(y_test, y_pred))


Fitting 5 folds for each of 192 candidates, totalling 960 fits
Best Parameters: {'nb__alpha': 1.0, 'tfidf__max_features': 3000, 'tfidf__min_df': 1, 'tfidf__ngram_range': (1, 1), 'tfidf__stop_words': None}
Classification Report:
                   precision    recall  f1-score   support

    Hallucination       0.66      0.38      0.48       212
Not Hallucination       0.48      0.75      0.59       163

         accuracy                           0.54       375
        macro avg       0.57      0.56      0.53       375
     weighted avg       0.58      0.54      0.53       375



This script appears to evaluate the performance of a sequence classification model for detecting hallucinations in text pairs. Below is a step-by-step explanation of what the script does:

---

### **1. Preparing Data**
- **Loading Validation Data**:
  ```python
  data_val = open("data/val.model-agnostic.json")
  data = json.load(data_val)
  df_val = pd.DataFrame(data)
  df_dm = df_val[df_val["task"] == "DM"]
  ```
  - Loads validation data from a JSON file.
  - Converts the data into a DataFrame (`df_val`) and filters it for entries related to the task `"DM"`.
  
- **Loading Test Data**:
  ```python
  data_test = open("data/test.model-agnostic.json")
  data = json.load(data_test)
  df_test = pd.DataFrame(data)
  df_dm_test = df_test[df_test["task"] == "DM"]
  ```
  - Similar to the validation data process, loads and filters test data for the `"DM"` task.

- **Extracting True Labels**:
  ```python
  def extract_truelabels(df):
      true_labels = []
      for label in df["label"]:
          if label == "Not Hallucination":
              true_labels.append(0)
          else:
              true_labels.append(1)
      df["true_labels"] = true_labels
      return true_labels
  ```
  - Converts labels (`"Not Hallucination"` or `"Hallucination"`) into numeric values (`0` or `1`) for easier evaluation.
  - Adds a `true_labels` column to the validation and test DataFrames.

---

### **2. Functions for Model Evaluation**
- **Assess Performance**:
  ```python
  def assess_performance(true_labels, pred_labels):
      ...
      accuracy = (tp + tn) / len(true_labels)
      ...
      return accuracy, precision, recall, f1
  ```
  - Calculates performance metrics for the classifier:
    - **Accuracy**: Overall percentage of correct classifications.
    - **Precision**: Proportion of true positives among predicted positives.
    - **Recall**: Proportion of true positives among actual positives.
    - **F1-Score**: Harmonic mean of precision and recall.
  - Returns these metrics and prints them.

- **Finding the Best Threshold**:
  ```python
  def finding_threshold(similarities):
      ...
      accs.append(acc)
      ...
      plt.plot(thresholds, accs, color="red", label="accuracy")
      ...
      return max_acc
  ```
  - Finds the best threshold for converting probabilities (or similarities) into binary predictions.
  - Evaluates classifier performance over a range of thresholds (`0.0` to `1.0`) and returns the threshold with the highest accuracy.

- **Get Predictions**:
  ```python
  def get_prediction(probs, thresh):
      pred_labels = []
      for sim in probs:
          if sim > thresh:
              pred_labels.append(0)
          else:
              pred_labels.append(1)
      return pred_labels
  ```
  - Converts probabilities into binary predictions (`0` or `1`) based on a threshold.

---

### **3. Model Preparation**
- **Input Preparation**:
  ```python
  sentences_hyp = df_dm["hyp"]
  sentences_tgt = df_dm["tgt"]
  pairs_val = [(hyp, tgt) for hyp, tgt in zip(sentences_hyp, sentences_tgt)]
  ```
  - Prepares pairs of hypothesis (`hyp`) and target (`tgt`) sentences for the validation data.

- **Loading the Model**:
  ```python
  model = AutoModelForSequenceClassification.from_pretrained(
      'vectara/hallucination_evaluation_model', trust_remote_code=True)
  ```
  - Loads a pre-trained model for hallucination evaluation from the Hugging Face library.

---

### **4. Model Evaluation on Validation Data**
- **Finding the Best Threshold**:
  ```python
  pred_vectara_val = model.predict(pairs_val)
  thresh_vectara = finding_threshold(pred_vectara_val)
  ```
  - Runs the model on validation pairs to obtain probabilities or similarity scores.
  - Finds the best threshold for classification by maximizing accuracy on the validation data.

---

### **5. Model Testing**
- **Testing on Test Data**:
  ```python
  pairs_test = [(hyp, tgt) for hyp, tgt in zip(sentences_hyp_test, sentences_tgt_test)]
  pred_vectara_test = model.predict(pairs_test)
  labels_vectara_test = get_prediction(pred_vectara_test, thresh_vectara)
  ```
  - Prepares hypothesis-target pairs for the test data.
  - Runs the model to get predictions for the test pairs.
  - Converts these predictions into binary labels (`0` or `1`) using the best threshold (`thresh_vectara`).

- **Assessing Test Performance**:
  ```python
  print("Performance of Vectara on Test Data:")
  assess_performance(true_labels_test, labels_vectara_test)
  ```
  - Evaluates the test set performance using accuracy, precision, recall, and F1-score metrics.

---

### **Summary**
This script performs the following steps:
1. Prepares validation and test datasets for the `"DM"` task by filtering relevant pairs and extracting true labels.
2. Implements functions to evaluate performance, find the best classification threshold, and convert probabilities to binary predictions.
3. Loads a pre-trained model (`vectara/hallucination_evaluation_model`) for hallucination evaluation.
4. Uses the validation set to find the best threshold for classification.
5. Tests the model on unseen test data and evaluates its performance using classification metrics.

If you have additional questions or need refinements for specific sections, let me know!

In [1]:
import json
import pandas as pd
import numpy as np
import torch
import transformers
import matplotlib.pyplot as plt


###################### preparing data #########################

# loading validation data
data_val = open("data/val.model-agnostic.json")
data = json.load(data_val)
df_val = pd.DataFrame(data)
df_dm = df_val[df_val["task"] == "PG"]


# loading test data
data_test = open("data/test.model-agnostic.json")
data = json.load(data_test)
df_test = pd.DataFrame(data)
df_dm_test = df_test[df_test["task"] == "PG"]

pg_train_df.head()

ImportError: cannot import name 'OfflineModeIsEnabled' from 'huggingface_hub.utils' (C:\Users\Admin\anaconda3\Lib\site-packages\huggingface_hub\utils\__init__.py)

In [None]:


# extracting true labels
def extract_truelabels(df):

    true_labels= []

    for label in df["label"]:

        # 0 for not Hallucination, 1 for Hallucination

        if label == "Not Hallucination":
            true_labels.append(0)

        else:
            true_labels.append(1)

    df["true_labels"] = true_labels
    return true_labels

true_labels_val = extract_truelabels(df_dm)
true_labels_test = extract_truelabels(df_dm_test)



################### Functions to Choose and Evaluate Model ############################


# function to assess performance of classifier

def assess_performance(true_labels, pred_labels):

    # initialize counter for the correct vs. incorrect classifications
    tp = 0
    tn = 0
    fp = 0
    fn = 0


    for true, pred in zip(true_labels, pred_labels):

        if true == 1 and pred == 1:
            tp += 1
        elif true == 1 and pred == 0:
            fn += 1
        elif true == 0 and pred == 0:
            tn += 1
        else:
            fp += 1

    accuracy = (tp + tn) / len(true_labels)
    try:
        precision = tp / (tp + fp)
    except:
        precision = 0
    recall = tp / (tp + fn)
    try:
       f1 = 2* (precision * recall) / (precision + recall)
    except:
        f1 = 0

    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1-Score:", f1)

    return accuracy, precision, recall, f1


# function for finding best threshold on validation data

def finding_threshold(similarities):

    # now testing which threshold leads to the highest accuracy
    accs = []
    recs = []
    precs = []

    # defining thresholds
    thresholds = np.linspace(0, 1, 20).tolist()


    for thresh in thresholds:

        # convert similarities to a label based on threshold
        pred_label = []

        for sim in similarities:

            if sim > thresh:
                pred_label.append(0)

            else:
                pred_label.append(1)

        # calculate performance based on threshold
        acc, prec, rec, f1 = assess_performance(true_labels_val, pred_label)
        accs.append(acc)
        recs.append(rec)
        precs.append(prec)


    
    # return threshold with highest accuracy
    max_acc = thresholds[accs.index(max(accs))]
    print("highest accuracy at a threshold of:", max_acc)
    
    plt.plot(thresholds, accs, color = "red", label = "accuracy")
    plt.plot(thresholds, precs, color = "green", label = "precision")
    plt.plot(thresholds, recs, color = "blue", label = "recall")
    plt.legend()
    plt.axvline(max_acc, color = "grey")
    plt.show()


    return(max_acc)



# Function to transform probabilities into labels
 
def get_prediction(probs, thresh):

    pred_labels = []

    for sim in probs:

        if sim > thresh:
            pred_labels.append(0)

        else:
            pred_labels.append(1)

    return pred_labels




######################### Defining Model ##############################

# prepating input
sentences_hyp = df_dm["hyp"]
sentences_tgt = df_dm["tgt"]

pairs_val = [(hyp, tgt) for hyp, tgt in zip(sentences_hyp, sentences_tgt)]

# loading model
from transformers import AutoModelForSequenceClassification
model = AutoModelForSequenceClassification.from_pretrained(
    'vectara/hallucination_evaluation_model', trust_remote_code=True)

# finding best model based on validation data
pred_vectara_val = model.predict(pairs_val)
thresh_vectara = finding_threshold(pred_vectara_val)


# now testing model
sentences_hyp_test = df_dm_test["hyp"]
sentences_tgt_test = df_dm_test["tgt"]

pairs_test = [(hyp, tgt) for hyp, tgt in zip(sentences_hyp_test, sentences_tgt_test)]

pred_vectara_test = model.predict(pairs_test)

labels_vectara_test = get_prediction(pred_vectara_test, thresh_vectara)
print("Performance of Vectara on Test Data:")
assess_performance(true_labels_test, labels_vectara_test)
