# configure data

In [1]:
import pandas as pd
from datasets import load_dataset

ds = load_dataset("Hello-SimpleAI/HC3", "all")

# Extract and label data for the Hugging Face dataset
rows = []
for entry in ds["train"]:
    for a in entry["chatgpt_answers"]:
        rows.append({"text": a.strip(), "label": 0})  # AI
    for a in entry["human_answers"]:
        rows.append({"text": a.strip(), "label": 1})  # Human

df = pd.DataFrame(rows).dropna()
print(f"Samples: {len(df)}")
df.head()

  from .autonotebook import tqdm as notebook_tqdm


Samples: 85449


Unnamed: 0,text,label
0,There are many different best seller lists tha...,0
1,"Basically there are many categories of "" Best ...",1
2,"If you 're hearing about it , it 's because it...",1
3,"One reason is lots of catagories . However , h...",1
4,Salt is used on roads to help melt ice and sno...,0


In [2]:
# Balance classes
min_class = df['label'].value_counts().min()
data_balanced = pd.concat([
    df[df['label'] == 0].sample(min_class),
    df[df['label'] == 1].sample(min_class)
]).sample(frac=1).reset_index(drop=True)

In [3]:
# Tokenization + Dataset Prep
from transformers import T5Tokenizer, T5ForSequenceClassification
from torch.utils.data import Dataset, DataLoader
import torch

tokenizer = T5Tokenizer.from_pretrained("t5-large")

class CombinedDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=256):
        self.encodings = tokenizer(texts, truncation=True, padding=True, max_length=max_len)
        self.labels = labels

    def __len__(self): return len(self.labels)

    def __getitem__(self, idx):
        item = {k: torch.tensor(v[idx]) for k, v in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

train_dataset = CombinedDataset(data_balanced['text'].tolist(), data_balanced['label'].tolist(), tokenizer)
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


# model stuff

In [4]:
model = T5ForSequenceClassification.from_pretrained("t5-large", num_labels=2)

Some weights of T5ForSequenceClassification were not initialized from the model checkpoint at t5-large and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
# Train

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

from torch.optim import AdamW
from tqdm import tqdm

optimizer = AdamW(model.parameters(), lr=2e-5)

model.train()
for epoch in range(3):
    total_loss = 0
    for batch in tqdm(train_loader):
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        loss = outputs.loss
        total_loss += loss.item()
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    print(f"Epoch {epoch + 1} Loss: {total_loss:.4f}")

100%|██████████| 13452/13452 [2:22:54<00:00,  1.57it/s] 


Epoch 1 Loss: 189.2406


100%|██████████| 13452/13452 [2:22:39<00:00,  1.57it/s] 


Epoch 2 Loss: 39.0959


100%|██████████| 13452/13452 [2:22:47<00:00,  1.57it/s] 

Epoch 3 Loss: 24.9684





In [7]:
model.save_pretrained("t5-large-ai-detector")
tokenizer.save_pretrained("t5-large-ai-detector")


('t5-large-ai-detector/tokenizer_config.json',
 't5-large-ai-detector/special_tokens_map.json',
 't5-large-ai-detector/spiece.model',
 't5-large-ai-detector/added_tokens.json')

# eval

In [4]:
from transformers import T5ForSequenceClassification, T5Tokenizer
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load model and tokenizer from the saved directory
model = T5ForSequenceClassification.from_pretrained("t5-large-ai-detector").to(device)
tokenizer = T5Tokenizer.from_pretrained("t5-large-ai-detector")

# Predict
model.eval()
preds = []

In [5]:
import os
import json
import jsonlines
import torch
from transformers import T5ForSequenceClassification, T5Tokenizer
from sklearn.metrics import classification_report
from tqdm import tqdm

# Directory containing eval files
dev_dir = "cs162-final-dev-main"

# File list (add more files here if needed)
files = [
    "arxiv_chatGPT.jsonl",
    "arxiv_cohere.jsonl",
    "reddit_chatGPT.jsonl",
    "reddit_cohere.jsonl",
    "german_wikipedia.jsonl",
    "hewlett.json",
    "toefl.json"
]

# Main evaluation loop
for filename in files:
    file_path = os.path.join(dev_dir, filename)
    texts, true_labels = [], []

    if filename.endswith(".jsonl"):
        # Handle jsonlines files
        human_texts, machine_texts = [], []
        with jsonlines.open(file_path) as reader:
            for row in reader:
                human_texts.append(row["human_text"])
                machine_texts.append(row["machine_text"])
        texts = human_texts + machine_texts
        true_labels = [1] * len(human_texts) + [0] * len(machine_texts)

    elif filename.endswith(".json"):
        # Handle full json files (hewlett, toefl)
        with open(file_path, 'r') as f:
            data = json.load(f)
            texts = [entry["document"] for entry in data]
            true_labels = [1] * len(texts)  # All human-labeled

    else:
        print(f"Skipping unsupported file type: {filename}")
        continue

    # Inference
    preds = []
    model.eval()
    with torch.no_grad():
        for i in tqdm(range(0, len(texts), 8), desc=f"Evaluating {filename}"):
            batch = texts[i:i+8]
            encodings = tokenizer(batch, return_tensors="pt", padding=True, truncation=True, max_length=256)
            input_ids = encodings["input_ids"].to(device)
            attention_mask = encodings["attention_mask"].to(device)
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            batch_preds = torch.argmax(outputs.logits, dim=1).tolist()
            preds.extend(batch_preds)

    # Print evaluation result
    print(f"\n===== Results for {filename} =====")
    print(classification_report(true_labels, preds, target_names=["AI-generated", "Human-written"]))


Evaluating arxiv_chatGPT.jsonl:   0%|          | 0/750 [00:00<?, ?it/s]Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.
Evaluating arxiv_chatGPT.jsonl: 100%|██████████| 750/750 [04:33<00:00,  2.74it/s]



===== Results for arxiv_chatGPT.jsonl =====
               precision    recall  f1-score   support

 AI-generated       0.99      0.87      0.93      3000
Human-written       0.88      0.99      0.93      3000

     accuracy                           0.93      6000
    macro avg       0.94      0.93      0.93      6000
 weighted avg       0.94      0.93      0.93      6000



Evaluating arxiv_cohere.jsonl: 100%|██████████| 750/750 [04:43<00:00,  2.65it/s]



===== Results for arxiv_cohere.jsonl =====
               precision    recall  f1-score   support

 AI-generated       0.99      0.52      0.69      3000
Human-written       0.68      0.99      0.81      3000

     accuracy                           0.76      6000
    macro avg       0.83      0.76      0.75      6000
 weighted avg       0.83      0.76      0.75      6000



Evaluating reddit_chatGPT.jsonl: 100%|██████████| 750/750 [04:45<00:00,  2.63it/s]



===== Results for reddit_chatGPT.jsonl =====
               precision    recall  f1-score   support

 AI-generated       0.99      0.98      0.98      3000
Human-written       0.98      0.99      0.98      3000

     accuracy                           0.98      6000
    macro avg       0.98      0.98      0.98      6000
 weighted avg       0.98      0.98      0.98      6000



Evaluating reddit_cohere.jsonl: 100%|██████████| 750/750 [03:31<00:00,  3.54it/s]



===== Results for reddit_cohere.jsonl =====
               precision    recall  f1-score   support

 AI-generated       0.99      0.95      0.97      3000
Human-written       0.96      0.99      0.97      3000

     accuracy                           0.97      6000
    macro avg       0.97      0.97      0.97      6000
 weighted avg       0.97      0.97      0.97      6000



Evaluating german_wikipedia.jsonl: 100%|██████████| 125/125 [00:47<00:00,  2.63it/s]



===== Results for german_wikipedia.jsonl =====
               precision    recall  f1-score   support

 AI-generated       0.25      0.00      0.00       500
Human-written       0.50      0.99      0.66       500

     accuracy                           0.50      1000
    macro avg       0.37      0.50      0.33      1000
 weighted avg       0.37      0.50      0.33      1000



Evaluating hewlett.json: 100%|██████████| 11/11 [00:04<00:00,  2.62it/s]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



===== Results for hewlett.json =====
               precision    recall  f1-score   support

 AI-generated       0.00      0.00      0.00         0
Human-written       1.00      0.95      0.98        88

     accuracy                           0.95        88
    macro avg       0.50      0.48      0.49        88
 weighted avg       1.00      0.95      0.98        88



Evaluating toefl.json: 100%|██████████| 12/12 [00:02<00:00,  4.23it/s]


===== Results for toefl.json =====
               precision    recall  f1-score   support

 AI-generated       0.00      0.00      0.00         0
Human-written       1.00      0.70      0.83        91

     accuracy                           0.70        91
    macro avg       0.50      0.35      0.41        91
 weighted avg       1.00      0.70      0.83        91




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
