In [None]:
# importing dataset to train the model
import pickle

DATASET_PATH = "/content/drive/MyDrive/hallucination_dataset.pkl"

with open(DATASET_PATH, "rb") as f:
    dataset = pickle.load(f)

len(dataset)


43115

In [None]:
#QUICK SANITY CHECK
from collections import Counter

Counter(d["label"] for d in dataset)


Counter({0: 14037, 1: 29078})

In [None]:
from sentence_transformers import SentenceTransformer
embedder = SentenceTransformer("all-MiniLM-L6-v2")

questions = [s["question"] for s in dataset]
contexts  = [s["context"] for s in dataset]
answers   = [s["answer"] if s["answer"] else s["question"] for s in dataset]

q_embs = embedder.encode(
    questions,
    batch_size=64,
    show_progress_bar=True
)

c_embs = embedder.encode(
    contexts,
    batch_size=64,
    show_progress_bar=True
)

a_embs = embedder.encode(
    answers,
    batch_size=64,
    show_progress_bar=True
)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/674 [00:00<?, ?it/s]

Batches:   0%|          | 0/674 [00:00<?, ?it/s]

Batches:   0%|          | 0/674 [00:00<?, ?it/s]

In [None]:
def token_overlap_ratio(answer, context):
    answer_tokens = set(answer.lower().split())
    context_tokens = set(context.lower().split())

    if not answer_tokens:
        return 0.0

    overlap = answer_tokens.intersection(context_tokens)
    return len(overlap) / len(answer_tokens)

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

def extract_features_from_embeddings(i):
    q_emb = q_embs[i]
    c_emb = c_embs[i]
    a_emb = a_embs[i]

    q = dataset[i]["question"]
    c = dataset[i]["context"]
    a = dataset[i]["answer"]

    features = [
        cosine_similarity([q_emb], [c_emb])[0][0],
        cosine_similarity([a_emb], [c_emb])[0][0],
        cosine_similarity([q_emb], [a_emb])[0][0],
        token_overlap_ratio(a, c),
        len(c.split()),
        len(a.split())
    ]

    return features


In [None]:
import numpy as np

X = []
y = []

for i in range(len(dataset)):
    X.append(extract_features_from_embeddings(i))
    y.append(dataset[i]["label"])

X = np.array(X)
y = np.array(y)

X.shape, y.shape


((43115, 6), (43115,))

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

In [None]:
from sklearn.ensemble import RandomForestClassifier

rfc = RandomForestClassifier(
    random_state=42,
    class_weight="balanced",  # IMPORTANT for hallucination task
    n_jobs=-1
)
my_params = {
    'n_estimators': [100, 200, 300],
    'criterion': ['gini', 'entropy'],
    'max_depth': [8, 10, 12, None],
    'min_samples_split': [5, 10],
    'min_samples_leaf': [2, 5],
    'max_features': ['sqrt', 'log2']
}
from sklearn.model_selection import RandomizedSearchCV

random_search = RandomizedSearchCV(
    estimator=rfc,
    param_distributions=my_params,
    n_iter=20,                # number of random combinations
    cv=5,                     # 5-fold cross-validation
    scoring='f1',              # correct metric for imbalance
    verbose=2,
    random_state=42,
    n_jobs=-1
)

random_search.fit(X_train, y_train)
print("Best Parameters:")
print(random_search.best_params_)

Fitting 5 folds for each of 20 candidates, totalling 100 fits
Best Parameters:
{'n_estimators': 200, 'min_samples_split': 10, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_depth': 8, 'criterion': 'gini'}


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import joblib
rf_model = RandomForestClassifier(
    n_estimators=200,          # number of trees
    max_depth=8,              # prevents overfitting
    min_samples_split=10,
    min_samples_leaf=2,
    max_features="log2",
    criterion= 'gini',
    class_weight="balanced",   # VERY IMPORTANT for imbalance
    random_state=42,
    n_jobs=-1                  # use all cores
)

rf_model.fit(X_train, y_train)


In [None]:
y_pred = rf_model.predict(X_train)

print("📊 Classification Report:\n")
print(classification_report(y_train, y_pred))

print("\n📉 Confusion Matrix:\n")
print(confusion_matrix(y_train, y_pred))


📊 Classification Report:

              precision    recall  f1-score   support

           0       1.00      0.40      0.57     11230
           1       0.77      1.00      0.87     23262

    accuracy                           0.80     34492
   macro avg       0.89      0.70      0.72     34492
weighted avg       0.85      0.80      0.77     34492


📉 Confusion Matrix:

[[ 4456  6774]
 [    1 23261]]


In [None]:
y_pred = rf_model.predict(X_test)

print("📊 Classification Report:\n")
print(classification_report(y_test, y_pred))

print("\n📉 Confusion Matrix:\n")
print(confusion_matrix(y_test, y_pred))


📊 Classification Report:

              precision    recall  f1-score   support

           0       1.00      0.41      0.58      2807
           1       0.78      1.00      0.88      5816

    accuracy                           0.81      8623
   macro avg       0.89      0.70      0.73      8623
weighted avg       0.85      0.81      0.78      8623


📉 Confusion Matrix:

[[1148 1659]
 [   0 5816]]


In [None]:
feature_names = [
    "Q_Context_Sim",
    "A_Context_Sim",
    "Q_A_Sim",
    "Token_Overlap",
    "Context_Length",
    "Answer_Length"
]

for name, score in zip(feature_names, rf_model.feature_importances_):
    print(f"{name}: {score:.4f}")


Q_Context_Sim: 0.0271
A_Context_Sim: 0.0662
Q_A_Sim: 0.1592
Token_Overlap: 0.3364
Context_Length: 0.1736
Answer_Length: 0.2375


In [None]:
MODEL_PATH = "/content/drive/MyDrive/hallucination_RF_model.pkl"

joblib.dump(rf_model, MODEL_PATH)

print("✅ RF model saved at:", MODEL_PATH)

✅ RF model saved at: /content/drive/MyDrive/hallucination_RF_model.pkl


Logistic

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
import pandas as pd

feature_names = [
    "Q_Context_Sim",
    "A_Context_Sim",
    "Q_A_Sim",
    "Token_Overlap",
    "Context_Length",
    "Answer_Length"
]

df_logit = pd.DataFrame(X, columns=feature_names)
df_logit["Hallucinated"] = y

df_logit.head()

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    df_logit.drop("Hallucinated", axis=1),
    df_logit["Hallucinated"],
    test_size=0.2,
    random_state=42,
    stratify=df_logit["Hallucinated"]
)
import statsmodels.api as sm

X_train_sm = sm.add_constant(X_train, prepend=False)

logit_model = sm.Logit(y_train, X_train_sm).fit()

print(logit_model.summary())


Optimization terminated successfully.
         Current function value: 0.447121
         Iterations 17
                           Logit Regression Results                           
Dep. Variable:           Hallucinated   No. Observations:                34492
Model:                          Logit   Df Residuals:                    34485
Method:                           MLE   Df Model:                            6
Date:                Tue, 30 Dec 2025   Pseudo R-squ.:                  0.2914
Time:                        03:39:54   Log-Likelihood:                -15422.
converged:                       True   LL-Null:                       -21765.
Covariance Type:            nonrobust   LLR p-value:                     0.000
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
Q_Context_Sim     -5.0476     16.212     -0.311      0.756     -36.823      26.728
A_Context_Sim   

In [None]:
mod1 = sm.Logit(y_train, X_train).fit()
print(mod1.summary())

Optimization terminated successfully.
         Current function value: 0.457574
         Iterations 10
                           Logit Regression Results                           
Dep. Variable:           Hallucinated   No. Observations:                34492
Model:                          Logit   Df Residuals:                    34486
Method:                           MLE   Df Model:                            5
Date:                Tue, 30 Dec 2025   Pseudo R-squ.:                  0.2749
Time:                        03:39:55   Log-Likelihood:                -15783.
converged:                       True   LL-Null:                       -21765.
Covariance Type:            nonrobust   LLR p-value:                     0.000
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
Q_Context_Sim     13.8906      0.706     19.673      0.000      12.507      15.275
A_Context_Sim   

In [None]:
X_test_sm = sm.add_constant(X_test, prepend=False)
y_test_prob = logit_model.predict(X_test_sm)
threshold = 0.5
y_test_pred = (y_test_prob >= threshold).astype(int)

In [None]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, y_test_pred)
print(cm)


[[1147 1660]
 [   0 5816]]


In [None]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_test_pred))


              precision    recall  f1-score   support

           0       1.00      0.41      0.58      2807
           1       0.78      1.00      0.88      5816

    accuracy                           0.81      8623
   macro avg       0.89      0.70      0.73      8623
weighted avg       0.85      0.81      0.78      8623



In [None]:
MODEL_PATH = "/content/drive/MyDrive/hallucination_logistic_model.pkl"

joblib.dump(logit_model, MODEL_PATH)

print("✅ logistic model saved at:", MODEL_PATH)

✅ logistic model saved at: /content/drive/MyDrive/hallucination_logistic_model.pkl


XGBoost

In [None]:
!pip install xgboost



In [None]:
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
import joblib

In [None]:
pos = (y_train == 1).sum()   # hallucinated
neg = (y_train == 0).sum()   # grounded

scale_pos_weight = neg / pos
scale_pos_weight

np.float64(0.48276158541827874)

In [None]:
xgb_model = XGBClassifier(
    n_estimators=300,
    max_depth=6,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    scale_pos_weight=scale_pos_weight,
    objective="binary:logistic",
    eval_metric="logloss",
    random_state=42,
    n_jobs=-1
)

In [None]:
xgb_model.fit(X_train, y_train)

In [None]:
y_train_pred = xgb_model.predict(X_train)

print("📊 TRAINING PERFORMANCE")
print(classification_report(y_train, y_train_pred))

📊 TRAINING PERFORMANCE
              precision    recall  f1-score   support

           0       0.95      0.41      0.58     11230
           1       0.78      0.99      0.87     23262

    accuracy                           0.80     34492
   macro avg       0.86      0.70      0.72     34492
weighted avg       0.83      0.80      0.77     34492



In [None]:
y_test_pred = xgb_model.predict(X_test)
y_test_prob = xgb_model.predict_proba(X_test)[:, 1]

print("📊 TEST PERFORMANCE")
print(classification_report(y_test, y_test_pred))

print("\n📉 Confusion Matrix")
print(confusion_matrix(y_test, y_test_pred))

print("\nROC-AUC:", roc_auc_score(y_test, y_test_prob))

📊 TEST PERFORMANCE
              precision    recall  f1-score   support

           0       0.93      0.42      0.58      2807
           1       0.78      0.99      0.87      5816

    accuracy                           0.80      8623
   macro avg       0.86      0.70      0.72      8623
weighted avg       0.83      0.80      0.78      8623


📉 Confusion Matrix
[[1178 1629]
 [  86 5730]]

ROC-AUC: 0.7456158189709456


In [None]:
MODEL_PATH = "/content/drive/MyDrive/hallucination_xgb_model.pkl"

joblib.dump(xgb_model, MODEL_PATH)

print("✅ XGBoost model saved at:", MODEL_PATH)

✅ XGBoost model saved at: /content/drive/MyDrive/hallucination_xgb_model.pkl
