In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertModel
import torch

# Load the dataset
df = pd.read_csv("sst2_llm_outputs.csv")

# Split the data into features (X) and target (y)
X = df[['input_text', 'generated_text']]  # First two columns
y = df['model_name']  # Target column (the name of the LLM)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


{'sentence': "it 's a charming and often affecting journey . ", 'label': 1, 'idx': 0}


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


In [None]:
from transformers import BertTokenizer, BertModel
import numpy as np

# Load pre-trained BERT model and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_model = BertModel.from_pretrained('bert-base-uncased')

# Function to extract BERT embeddings
def get_bert_embeddings(text_list):
    tokens = tokenizer(text_list, padding=True, truncation=True, return_tensors="pt")
    with torch.no_grad():
        outputs = bert_model(**tokens)
    return outputs.last_hidden_state[:, 0, :].numpy()  # Use the CLS token embedding

# Get BERT embeddings for both input_text and generated_text columns
X_train_input_bert = get_bert_embeddings(X_train['input_text'].tolist())
X_train_generated_bert = get_bert_embeddings(X_train['generated_text'].tolist())
X_test_input_bert = get_bert_embeddings(X_test['input_text'].tolist())
X_test_generated_bert = get_bert_embeddings(X_test['generated_text'].tolist())

# Combine input and generated text embeddings
X_train_combined_bert = np.hstack([X_train_input_bert, X_train_generated_bert])
X_test_combined_bert = np.hstack([X_test_input_bert, X_test_generated_bert])


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

# Train a classifier (Logistic Regression in this case)
clf = LogisticRegression(max_iter=1000)
clf.fit(X_train_combined_bert, y_train)

# Make predictions on the test set
y_pred = clf.predict(X_test_combined_bert)

# Evaluate with F1-score
f1 = f1_score(y_test, y_pred, average='weighted')
print(f"F1-score: {f1}")

# Generate the classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)

# Plot confusion matrix
plt.figure(figsize=(10, 7))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=clf.classes_, yticklabels=clf.classes_)
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.title('Confusion Matrix')
plt.show()
