In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Load CSV
df = pd.read_csv('llama_k30.csv')

# Assume final_vector column is stored as a list/string. Convert it to numeric
import ast
df['final_vector'] = df['final_vector'].apply(ast.literal_eval)  # Convert string to list
X = list(df['final_vector'])
y = df['hallucination']

# Encode labels (yes/no -> 1/0)
le = LabelEncoder()
y = le.fit_transform(y)

# Optional: scale features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Train-test split (if not already split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


#Note: Input Dimension is varying with value of k as we are using DistilGPT2 it has total 72 attention maps so final vector would be 72 times K
# because we are extracting k eigen values from each attention map

In [2]:
df.head()

Unnamed: 0,knowledge,question,answer,hallucination,lap_512,e_512,final_vector
0,Arthur's Magazine (1844–1846) was an American ...,Which magazine was started first Arthur's Maga...,First for Women was started first.,yes,"[-1.0289784822964554, -0.2072795117877403, 0.0...","[2357293844223.0225, -0.007429406813447076, -0...","[-1.0289784822964554, -0.2072795117877403, 0.0..."
1,The Oberoi family is an Indian family that is ...,The Oberoi family is part of a hotel company t...,Delhi,no,"[-1.0351036183174647, -0.20167420511054537, -0...","[2373312234878.5396, -0.0036020093444006835, -...","[-1.0351036183174647, -0.20167420511054537, -0..."
2,"Allison Beth ""Allie"" Goertz (born March 2, 199...",Musician and satirist Allie Goertz wrote a son...,President Richard Nixon,no,"[-0.08009889828795395, 0.022711400050278267, -...","[2834394931793.213, -0.005024173850139737, -0....","[-0.08009889828795395, 0.022711400050278267, -..."
3,"Margaret ""Peggy"" Seeger (born June 17, 1935) i...",What nationality was James Henry Miller's wife?,American,no,"[-0.9797262332281047, -0.14473387045497219, 0....","[2044596672058.1052, -0.00246397898215257, -0....","[-0.9797262332281047, -0.14473387045497219, 0...."
4,It is a hygroscopic solid that is highly solu...,Cadmium Chloride is slightly soluble in this c...,water with a hint of alcohol,yes,"[-0.24179916903172718, 0.056321644818608976, -...","[2587449550628.662, -0.005043848555820679, -0....","[-0.24179916903172718, 0.056321644818608976, -..."


In [3]:
import numpy as np
first_vector = np.array(df['final_vector'].iloc[0])
print("Shape of a vector:", first_vector.shape)


Shape of a vector: (1024,)


In [4]:
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Initialize the model
logreg_model = LogisticRegression(max_iter=1000, random_state=42)

# Train the model
logreg_model.fit(X_train, y_train)

# Predictions
y_pred = logreg_model.predict(X_test)
y_pred_prob = logreg_model.predict_proba(X_test)[:, 1]  # probability of class 1

# Evaluate
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))



auc = roc_auc_score(y_test, y_pred_prob)
print("AUC-ROC:", auc)


Accuracy: 0.9465
              precision    recall  f1-score   support

           0       0.95      0.94      0.95       998
           1       0.94      0.95      0.95      1002

    accuracy                           0.95      2000
   macro avg       0.95      0.95      0.95      2000
weighted avg       0.95      0.95      0.95      2000

[[939  59]
 [ 48 954]]
AUC-ROC: 0.9755149020596081


In [5]:
# lap_512

In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Load CSV
df = pd.read_csv('llama_k30.csv')

# Assume final_vector column is stored as a list/string. Convert it to numeric
import ast
df['lap_512'] = df['lap_512'].apply(ast.literal_eval)  # Convert string to list
X = list(df['lap_512'])
y = df['hallucination']

# Encode labels (yes/no -> 1/0)
le = LabelEncoder()
y = le.fit_transform(y)

# Optional: scale features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Train-test split (if not already split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


#Note: Input Dimension is varying with value of k as we are using DistilGPT2 it has total 72 attention maps so final vector would be 72 times K
# because we are extracting k eigen values from each attention map

In [7]:
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Initialize the model
logreg_model = LogisticRegression(max_iter=1000, random_state=42)

# Train the model
logreg_model.fit(X_train, y_train)

# Predictions
y_pred = logreg_model.predict(X_test)
y_pred_prob = logreg_model.predict_proba(X_test)[:, 1]  # probability of class 1

# Evaluate
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))



auc = roc_auc_score(y_test, y_pred_prob)
print("AUC-ROC:", auc)


Accuracy: 0.91
              precision    recall  f1-score   support

           0       0.92      0.90      0.91       998
           1       0.90      0.92      0.91      1002

    accuracy                           0.91      2000
   macro avg       0.91      0.91      0.91      2000
weighted avg       0.91      0.91      0.91      2000

[[901  97]
 [ 83 919]]
AUC-ROC: 0.9583288333153333
