# Load Data

In [None]:
import pandas as pd
from datasets import load_dataset

ds = load_dataset("Hello-SimpleAI/HC3", "all")

# Extract and label data for the Hugging Face dataset
rows = []
for entry in ds["train"]:
    for a in entry["chatgpt_answers"]:
        rows.append({"text": a.strip(), "label": 0})  # AI
    for a in entry["human_answers"]:
        rows.append({"text": a.strip(), "label": 1})  # Human

df = pd.DataFrame(rows).dropna()
print(f"Samples: {len(df)}")
df.head()

In [None]:
# Balance classes
min_class = df['label'].value_counts().min()
data_balanced = pd.concat([
    df[df['label'] == 0].sample(min_class),
    df[df['label'] == 1].sample(min_class)
]).sample(frac=1).reset_index(drop=True)

# Model Stuff

In [None]:
# Tokenization + Dataset Prep
from transformers import T5Tokenizer, T5ForSequenceClassification
from torch.utils.data import Dataset, DataLoader
import torch

tokenizer = T5Tokenizer.from_pretrained("t5-small")

class CombinedDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=256):
        self.encodings = tokenizer(texts, truncation=True, padding=True, max_length=max_len)
        self.labels = labels

    def __len__(self): return len(self.labels)

    def __getitem__(self, idx):
        item = {k: torch.tensor(v[idx]) for k, v in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

train_dataset = CombinedDataset(data_balanced['text'].tolist(), data_balanced['label'].tolist(), tokenizer)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)

In [None]:
model = T5ForSequenceClassification.from_pretrained("t5-small", num_labels=2)

In [None]:
# Train

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

from transformers import AdamW
from tqdm import tqdm

optimizer = AdamW(model.parameters(), lr=2e-5)

model.train()
for epoch in range(1):
    total_loss = 0
    for batch in tqdm(train_loader):
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        loss = outputs.loss
        total_loss += loss.item()
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    print(f"Epoch {epoch + 1} Loss: {total_loss:.4f}")

In [None]:
model.save_pretrained("initial-t5-ai-detector")
tokenizer.save_pretrained("initial-t5-ai-detector")

# Eval

In [None]:
from transformers import T5ForSequenceClassification, T5Tokenizer

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load model and tokenizer from the saved directory
model = T5ForSequenceClassification.from_pretrained("initial-t5-ai-detector").to(device)
tokenizer = T5Tokenizer.from_pretrained("initial-t5-ai-detector")

# Predict
model.eval()
preds = []

In [7]:
import os
import json
import jsonlines
import torch
from transformers import T5ForSequenceClassification, T5Tokenizer
from sklearn.metrics import classification_report
from tqdm import tqdm

# Load model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = T5ForSequenceClassification.from_pretrained("initial-t5-ai-detector").to(device)
tokenizer = T5Tokenizer.from_pretrained("initial-t5-ai-detector")

# Directory containing eval files
dev_dir = "cs162-final-dev-main"

# File list (add more files here if needed)
files = [
    "arxiv_chatGPT.jsonl",
    "arxiv_cohere.jsonl",
    "reddit_chatGPT.jsonl",
    "reddit_cohere.jsonl",
    "german_wikipedia.jsonl",
    "hewlett.json",
    "toefl.json"
]

# Main evaluation loop
for filename in files:
    file_path = os.path.join(dev_dir, filename)
    texts, true_labels = [], []

    if filename.endswith(".jsonl"):
        # Handle jsonlines files
        human_texts, machine_texts = [], []
        with jsonlines.open(file_path) as reader:
            for row in reader:
                human_texts.append(row["human_text"])
                machine_texts.append(row["machine_text"])
        texts = human_texts + machine_texts
        true_labels = [1] * len(human_texts) + [0] * len(machine_texts)

    elif filename.endswith(".json"):
        # Handle full json files (hewlett, toefl)
        with open(file_path, 'r') as f:
            data = json.load(f)
            texts = [entry["document"] for entry in data]
            true_labels = [1] * len(texts)  # All human-labeled

    else:
        print(f"Skipping unsupported file type: {filename}")
        continue

    # Inference
    preds = []
    model.eval()
    with torch.no_grad():
        for i in tqdm(range(0, len(texts), 8), desc=f"Evaluating {filename}"):
            batch = texts[i:i+8]
            encodings = tokenizer(batch, return_tensors="pt", padding=True, truncation=True, max_length=256)
            input_ids = encodings["input_ids"].to(device)
            attention_mask = encodings["attention_mask"].to(device)
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            batch_preds = torch.argmax(outputs.logits, dim=1).tolist()
            preds.extend(batch_preds)

    # Print evaluation result
    print(f"\n===== Results for {filename} =====")
    print(classification_report(true_labels, preds, target_names=["AI-generated", "Human-written"]))


Evaluating arxiv_chatGPT.jsonl: 100%|██████████| 750/750 [00:24<00:00, 30.39it/s]



===== Results for arxiv_chatGPT.jsonl =====
               precision    recall  f1-score   support

 AI-generated       0.92      0.97      0.95      3000
Human-written       0.97      0.92      0.94      3000

     accuracy                           0.94      6000
    macro avg       0.95      0.94      0.94      6000
 weighted avg       0.95      0.94      0.94      6000



Evaluating arxiv_cohere.jsonl: 100%|██████████| 750/750 [00:24<00:00, 30.31it/s]



===== Results for arxiv_cohere.jsonl =====
               precision    recall  f1-score   support

 AI-generated       0.92      0.90      0.91      3000
Human-written       0.90      0.92      0.91      3000

     accuracy                           0.91      6000
    macro avg       0.91      0.91      0.91      6000
 weighted avg       0.91      0.91      0.91      6000



Evaluating reddit_chatGPT.jsonl: 100%|██████████| 750/750 [00:27<00:00, 27.72it/s]



===== Results for reddit_chatGPT.jsonl =====
               precision    recall  f1-score   support

 AI-generated       0.88      0.99      0.93      3000
Human-written       0.99      0.86      0.92      3000

     accuracy                           0.92      6000
    macro avg       0.93      0.92      0.92      6000
 weighted avg       0.93      0.92      0.92      6000



Evaluating reddit_cohere.jsonl: 100%|██████████| 750/750 [00:21<00:00, 34.98it/s] 



===== Results for reddit_cohere.jsonl =====
               precision    recall  f1-score   support

 AI-generated       0.88      0.98      0.93      3000
Human-written       0.98      0.86      0.92      3000

     accuracy                           0.92      6000
    macro avg       0.93      0.92      0.92      6000
 weighted avg       0.93      0.92      0.92      6000



Evaluating german_wikipedia.jsonl: 100%|██████████| 125/125 [00:04<00:00, 28.43it/s]



===== Results for german_wikipedia.jsonl =====
               precision    recall  f1-score   support

 AI-generated       0.57      0.23      0.33       500
Human-written       0.52      0.82      0.64       500

     accuracy                           0.53      1000
    macro avg       0.54      0.53      0.48      1000
 weighted avg       0.54      0.53      0.48      1000



Evaluating hewlett.json: 100%|██████████| 11/11 [00:00<00:00, 27.99it/s]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



===== Results for hewlett.json =====
               precision    recall  f1-score   support

 AI-generated       0.00      0.00      0.00         0
Human-written       1.00      0.89      0.94        88

     accuracy                           0.89        88
    macro avg       0.50      0.44      0.47        88
 weighted avg       1.00      0.89      0.94        88



Evaluating toefl.json: 100%|██████████| 12/12 [00:00<00:00, 49.30it/s]


===== Results for toefl.json =====
               precision    recall  f1-score   support

 AI-generated       0.00      0.00      0.00         0
Human-written       1.00      0.41      0.58        91

     accuracy                           0.41        91
    macro avg       0.50      0.20      0.29        91
 weighted avg       1.00      0.41      0.58        91




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
