In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Load CSV
df = pd.read_csv('gptk100.csv')

# Assume final_vector column is stored as a list/string. Convert it to numeric
import ast
df['final_vector'] = df['final_vector'].apply(ast.literal_eval)  # Convert string to list
X = list(df['final_vector'])
y = df['hallucination']

# Encode labels (yes/no -> 1/0)
le = LabelEncoder()
y = le.fit_transform(y)

# Optional: scale features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Train-test split (if not already split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


#Note: Input Dimension is varying with value of k as we are using DistilGPT2 it has total 72 attention maps so final vector would be 72 times K
# because we are extracting k eigen values from each attention map

In [2]:
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Initialize the model
logreg_model = LogisticRegression(max_iter=1000, random_state=42)

# Train the model
logreg_model.fit(X_train, y_train)

# Predictions
y_pred = logreg_model.predict(X_test)
y_pred_prob = logreg_model.predict_proba(X_test)[:, 1]  # probability of class 1

# Evaluate
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))



auc = roc_auc_score(y_test, y_pred_prob)
print("AUC-ROC:", auc)


Accuracy: 0.937
              precision    recall  f1-score   support

           0       0.94      0.94      0.94       998
           1       0.94      0.94      0.94      1002

    accuracy                           0.94      2000
   macro avg       0.94      0.94      0.94      2000
weighted avg       0.94      0.94      0.94      2000

[[935  63]
 [ 63 939]]
AUC-ROC: 0.9649488597954393


In [3]:
#lap_512

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Load CSV
df = pd.read_csv('gptk100.csv')

# Assume final_vector column is stored as a list/string. Convert it to numeric
import ast
df['lap_512'] = df['lap_512'].apply(ast.literal_eval)  # Convert string to list
X = list(df['lap_512'])
y = df['hallucination']

# Encode labels (yes/no -> 1/0)
le = LabelEncoder()
y = le.fit_transform(y)

# Optional: scale features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Train-test split (if not already split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


#Note: Input Dimension is varying with value of k as we are using DistilGPT2 it has total 72 attention maps so final vector would be 72 times K
# because we are extracting k eigen values from each attention map

In [None]:
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Initialize the model
logreg_model = LogisticRegression(max_iter=1000, random_state=42)

# Train the model
logreg_model.fit(X_train, y_train)

# Predictions
y_pred = logreg_model.predict(X_test)
y_pred_prob = logreg_model.predict_proba(X_test)[:, 1]  # probability of class 1

# Evaluate
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))



auc = roc_auc_score(y_test, y_pred_prob)
print("AUC-ROC:", auc)
