<a href="https://colab.research.google.com/github/kavya6170/AI-Agent/blob/main/Model_training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# importing dataset to train the model
import pickle

DATASET_PATH = "/content/drive/MyDrive/hallucination_dataset.pkl"

with open(DATASET_PATH, "rb") as f:
    dataset = pickle.load(f)

len(dataset)


In [None]:
#QUICK SANITY CHECK
from collections import Counter

Counter(d["label"] for d in dataset)


In [None]:
from sentence_transformers import SentenceTransformer
embedder = SentenceTransformer("all-MiniLM-L6-v2")

questions = [s["question"] for s in dataset]
contexts  = [s["context"] for s in dataset]
answers   = [s["answer"] if s["answer"] else s["question"] for s in dataset]

q_embs = embedder.encode(
    questions,
    batch_size=64,
    show_progress_bar=True
)

c_embs = embedder.encode(
    contexts,
    batch_size=64,
    show_progress_bar=True
)

a_embs = embedder.encode(
    answers,
    batch_size=64,
    show_progress_bar=True
)


In [None]:
def token_overlap_ratio(answer, context):
    answer_tokens = set(answer.lower().split())
    context_tokens = set(context.lower().split())

    if not answer_tokens:
        return 0.0

    overlap = answer_tokens.intersection(context_tokens)
    return len(overlap) / len(answer_tokens)

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

def extract_features_from_embeddings(i):
    q_emb = q_embs[i]
    c_emb = c_embs[i]
    a_emb = a_embs[i]

    q = dataset[i]["question"]
    c = dataset[i]["context"]
    a = dataset[i]["answer"]

    features = [
        cosine_similarity([q_emb], [c_emb])[0][0],
        cosine_similarity([a_emb], [c_emb])[0][0],
        cosine_similarity([q_emb], [a_emb])[0][0],
        token_overlap_ratio(a, c),
        len(c.split()),
        len(a.split())
    ]

    return features


In [None]:
import numpy as np

X = []
y = []

for i in range(len(dataset)):
    X.append(extract_features_from_embeddings(i))
    y.append(dataset[i]["label"])

X = np.array(X)
y = np.array(y)

X.shape, y.shape


In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

In [None]:
from sklearn.ensemble import RandomForestClassifier

rfc = RandomForestClassifier(
    random_state=42,
    class_weight="balanced",  # IMPORTANT for hallucination task
    n_jobs=-1
)
my_params = {
    'n_estimators': [100, 200, 300],
    'criterion': ['gini', 'entropy'],
    'max_depth': [8, 10, 12, None],
    'min_samples_split': [5, 10],
    'min_samples_leaf': [2, 5],
    'max_features': ['sqrt', 'log2']
}
from sklearn.model_selection import RandomizedSearchCV

random_search = RandomizedSearchCV(
    estimator=rfc,
    param_distributions=my_params,
    n_iter=20,                # number of random combinations
    cv=5,                     # 5-fold cross-validation
    scoring='f1',              # correct metric for imbalance
    verbose=2,
    random_state=42,
    n_jobs=-1
)

random_search.fit(X_train, y_train)
print("Best Parameters:")
print(random_search.best_params_)

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import joblib
rf_model = RandomForestClassifier(
    n_estimators=200,          # number of trees
    max_depth=8,              # prevents overfitting
    min_samples_split=10,
    min_samples_leaf=2,
    max_features="log2",
    criterion= 'gini',
    class_weight="balanced",   # VERY IMPORTANT for imbalance
    random_state=42,
    n_jobs=-1                  # use all cores
)

rf_model.fit(X_train, y_train)


In [None]:
y_pred = rf_model.predict(X_train)

print("ðŸ“Š Classification Report:\n")
print(classification_report(y_train, y_pred))

print("\nðŸ“‰ Confusion Matrix:\n")
print(confusion_matrix(y_train, y_pred))


In [None]:
y_pred = rf_model.predict(X_test)

print("ðŸ“Š Classification Report:\n")
print(classification_report(y_test, y_pred))

print("\nðŸ“‰ Confusion Matrix:\n")
print(confusion_matrix(y_test, y_pred))


In [None]:
feature_names = [
    "Q_Context_Sim",
    "A_Context_Sim",
    "Q_A_Sim",
    "Token_Overlap",
    "Context_Length",
    "Answer_Length"
]

for name, score in zip(feature_names, rf_model.feature_importances_):
    print(f"{name}: {score:.4f}")


In [None]:
MODEL_PATH = "/content/drive/MyDrive/hallucination_RF_model.pkl"

joblib.dump(rf_model, MODEL_PATH)

print("âœ… RF model saved at:", MODEL_PATH)

Logistic

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
import pandas as pd

feature_names = [
    "Q_Context_Sim",
    "A_Context_Sim",
    "Q_A_Sim",
    "Token_Overlap",
    "Context_Length",
    "Answer_Length"
]

df_logit = pd.DataFrame(X, columns=feature_names)
df_logit["Hallucinated"] = y

df_logit.head()

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    df_logit.drop("Hallucinated", axis=1),
    df_logit["Hallucinated"],
    test_size=0.2,
    random_state=42,
    stratify=df_logit["Hallucinated"]
)
import statsmodels.api as sm

X_train_sm = sm.add_constant(X_train, prepend=False)

logit_model = sm.Logit(y_train, X_train_sm).fit()

print(logit_model.summary())


In [None]:
mod1 = sm.Logit(y_train, X_train).fit()
print(mod1.summary())

In [None]:
X_test_sm = sm.add_constant(X_test, prepend=False)
y_test_prob = logit_model.predict(X_test_sm)
threshold = 0.5
y_test_pred = (y_test_prob >= threshold).astype(int)

In [None]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, y_test_pred)
print(cm)


In [None]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_test_pred))


In [None]:
MODEL_PATH = "/content/drive/MyDrive/hallucination_logistic_model.pkl"

joblib.dump(logit_model, MODEL_PATH)

print("âœ… logistic model saved at:", MODEL_PATH)

XGBoost

In [None]:
!pip install xgboost

In [None]:
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
import joblib

In [None]:
pos = (y_train == 1).sum()   # hallucinated
neg = (y_train == 0).sum()   # grounded

scale_pos_weight = neg / pos
scale_pos_weight

In [None]:
xgb_model = XGBClassifier(
    n_estimators=300,
    max_depth=6,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    scale_pos_weight=scale_pos_weight,
    objective="binary:logistic",
    eval_metric="logloss",
    random_state=42,
    n_jobs=-1
)

In [None]:
xgb_model.fit(X_train, y_train)

In [None]:
y_train_pred = xgb_model.predict(X_train)

print("ðŸ“Š TRAINING PERFORMANCE")
print(classification_report(y_train, y_train_pred))

In [None]:
y_test_pred = xgb_model.predict(X_test)
y_test_prob = xgb_model.predict_proba(X_test)[:, 1]

print("ðŸ“Š TEST PERFORMANCE")
print(classification_report(y_test, y_test_pred))

print("\nðŸ“‰ Confusion Matrix")
print(confusion_matrix(y_test, y_test_pred))

print("\nROC-AUC:", roc_auc_score(y_test, y_test_prob))

In [None]:
MODEL_PATH = "/content/drive/MyDrive/hallucination_xgb_model.pkl"

joblib.dump(xgb_model, MODEL_PATH)

print("âœ… XGBoost model saved at:", MODEL_PATH)