In [2]:
import pandas as pd
from datasets import load_dataset
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import T5Tokenizer, T5ForSequenceClassification, AdamW
from tqdm import tqdm

In [3]:
# ------------------------------------------------------
# Load original HC3 dataset
# ------------------------------------------------------
from datasets import load_dataset

ds_hc3 = load_dataset("Hello-SimpleAI/HC3", "all")
rows = []
for entry in ds_hc3["train"]:
    for a in entry["chatgpt_answers"]:
        rows.append({"text": a.strip(), "label": 0})  # AI
    for a in entry["human_answers"]:
        rows.append({"text": a.strip(), "label": 1})  # Human
df_hc3 = pd.DataFrame(rows).dropna()

# Balance HC3 dataset
min_class = df_hc3['label'].value_counts().min()
data_balanced_hc3 = pd.concat([
    df_hc3[df_hc3['label'] == 0].sample(min_class),
    df_hc3[df_hc3['label'] == 1].sample(min_class)
]).sample(frac=1).reset_index(drop=True)

In [4]:
# ------------------------------------------------------
# Load IELTS dataset
# ------------------------------------------------------
ds_ielts = load_dataset("chillies/IELTS-writing-task-2-evaluation")
texts_ielts = [example['essay'] for example in ds_ielts['train']]
labels_ielts = [1] * len(texts_ielts)

df_ielts = pd.DataFrame({
    "text": texts_ielts,
    "label": labels_ielts
})


In [5]:
# ------------------------------------------------------
# Merge both datasets
# ------------------------------------------------------
# Combine HC3 (balanced) and IELTS (human essays)
merged_df = pd.concat([data_balanced_hc3, df_ielts]).sample(frac=1).reset_index(drop=True)

print("Total samples after merging:", len(merged_df))
print(merged_df['label'].value_counts())

Total samples after merging: 63639
label
1    36736
0    26903
Name: count, dtype: int64


In [6]:
# ------------------------------------------------------
# Tokenization & Dataset Prep
# ------------------------------------------------------
from transformers import T5Tokenizer, T5ForSequenceClassification

tokenizer = T5Tokenizer.from_pretrained("t5-small")

class CombinedDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=256):
        self.encodings = tokenizer(texts, truncation=True, padding=True, max_length=max_len)
        self.labels = labels

    def __len__(self): return len(self.labels)

    def __getitem__(self, idx):
        item = {k: torch.tensor(v[idx]) for k, v in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item

train_dataset = CombinedDataset(merged_df['text'].tolist(), merged_df['label'].tolist(), tokenizer)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


In [7]:
# ------------------------------------------------------
# Load pretrained trained model
# ------------------------------------------------------
from transformers import T5ForSequenceClassification

model = T5ForSequenceClassification.from_pretrained("t5-small", num_labels=2,)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

Some weights of T5ForSequenceClassification were not initialized from the model checkpoint at t5-small and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5ForSequenceClassification(
  (transformer): T5Model(
    (shared): Embedding(32128, 512)
    (encoder): T5Stack(
      (embed_tokens): Embedding(32128, 512)
      (block): ModuleList(
        (0): T5Block(
          (layer): ModuleList(
            (0): T5LayerSelfAttention(
              (SelfAttention): T5Attention(
                (q): Linear(in_features=512, out_features=512, bias=False)
                (k): Linear(in_features=512, out_features=512, bias=False)
                (v): Linear(in_features=512, out_features=512, bias=False)
                (o): Linear(in_features=512, out_features=512, bias=False)
                (relative_attention_bias): Embedding(32, 8)
              )
              (layer_norm): T5LayerNorm()
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (1): T5LayerFF(
              (DenseReluDense): T5DenseActDense(
                (wi): Linear(in_features=512, out_features=2048, bias=False)
                (wo): Linear(in_featu

In [8]:
# ------------------------------------------------------
# Fine-tuning
# ------------------------------------------------------
optimizer = AdamW(model.parameters(), lr=2e-5)

model.train()
for epoch in range(3):
    total_loss = 0
    for batch in tqdm(train_loader):
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        loss = outputs.loss
        total_loss += loss.item()
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    print(f"Epoch {epoch+1} Loss: {total_loss:.4f}")

100%|██████████| 7955/7955 [13:00<00:00, 10.20it/s]


Epoch 1 Loss: 479.3315


100%|██████████| 7955/7955 [13:07<00:00, 10.11it/s]


Epoch 2 Loss: 85.4250


100%|██████████| 7955/7955 [13:06<00:00, 10.11it/s]

Epoch 3 Loss: 44.6346





In [9]:
# Save model weights and tokenizer after fine-tuning
model.save_pretrained("t5-ethics-ai-human-detector")
tokenizer.save_pretrained("t5-ethics-ai-human-detector")

('t5-ethics-ai-human-detector/tokenizer_config.json',
 't5-ethics-ai-human-detector/special_tokens_map.json',
 't5-ethics-ai-human-detector/spiece.model',
 't5-ethics-ai-human-detector/added_tokens.json')

In [11]:
import os
import json
import jsonlines
import torch
from tqdm import tqdm
from sklearn.metrics import classification_report
from transformers import MT5Tokenizer, MT5ForSequenceClassification

# Load classification model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = T5ForSequenceClassification.from_pretrained("t5-ethics-ai-human-detector").to(device)
tokenizer = T5Tokenizer.from_pretrained("t5-small")

dev_dir = "cs162-final-dev-main"

# Loop over all files in your folder
for filename in os.listdir(dev_dir):
    file_path = os.path.join(dev_dir, filename)
    texts, true_labels = [], []

    if filename.endswith(".jsonl"):
        # jsonlines format (human_text / machine_text)
        human_texts, machine_texts = [], []
        with jsonlines.open(file_path) as reader:
            for row in reader:
                human_texts.append(row["human_text"])
                machine_texts.append(row["machine_text"])
        texts = human_texts + machine_texts
        true_labels = [1] * len(human_texts) + [0] * len(machine_texts)

    elif filename.endswith(".json"):
        # plain json (hewlett.json, toefl.json)
        with open(file_path, 'r') as f:
            data = json.load(f)
            texts = [entry["document"] for entry in data]
            true_labels = [1] * len(texts)

    else:
        print(f"Skipping unsupported file: {filename}")
        continue

    # Inference loop for classification
    preds = []
    model.eval()

    for i in tqdm(range(0, len(texts), 8), desc=f"Evaluating {filename}"):
        batch_texts = texts[i:i+8]
        inputs = tokenizer(batch_texts, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)
        
        with torch.no_grad():
            outputs = model(**inputs)
            logits = outputs.logits
            batch_preds = torch.argmax(logits, dim=1).tolist()
            preds.extend(batch_preds)

    # Print classification report for each file
    print(f"\n===== Results for {filename} =====")
    print(classification_report(true_labels, preds, target_names=["AI-generated", "Human-written"]))


Skipping unsupported file: README.md


Evaluating reddit_chatGPT.jsonl:   0%|          | 0/750 [00:00<?, ?it/s]Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.
Evaluating reddit_chatGPT.jsonl: 100%|██████████| 750/750 [00:59<00:00, 12.52it/s]



===== Results for reddit_chatGPT.jsonl =====
               precision    recall  f1-score   support

 AI-generated       0.95      0.88      0.91      3000
Human-written       0.89      0.96      0.92      3000

     accuracy                           0.92      6000
    macro avg       0.92      0.92      0.92      6000
 weighted avg       0.92      0.92      0.92      6000



Evaluating toefl.json: 100%|██████████| 12/12 [00:00<00:00, 48.89it/s]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



===== Results for toefl.json =====
               precision    recall  f1-score   support

 AI-generated       0.00      0.00      0.00         0
Human-written       1.00      0.79      0.88        91

     accuracy                           0.79        91
    macro avg       0.50      0.40      0.44        91
 weighted avg       1.00      0.79      0.88        91



Evaluating arxiv_chatGPT.jsonl: 100%|██████████| 750/750 [00:45<00:00, 16.55it/s]



===== Results for arxiv_chatGPT.jsonl =====
               precision    recall  f1-score   support

 AI-generated       0.97      0.76      0.85      3000
Human-written       0.80      0.97      0.88      3000

     accuracy                           0.87      6000
    macro avg       0.88      0.87      0.87      6000
 weighted avg       0.88      0.87      0.87      6000



Evaluating reddit_cohere.jsonl: 100%|██████████| 750/750 [00:44<00:00, 16.68it/s] 



===== Results for reddit_cohere.jsonl =====
               precision    recall  f1-score   support

 AI-generated       0.96      0.98      0.97      3000
Human-written       0.98      0.96      0.97      3000

     accuracy                           0.97      6000
    macro avg       0.97      0.97      0.97      6000
 weighted avg       0.97      0.97      0.97      6000



Evaluating arxiv_cohere.jsonl: 100%|██████████| 750/750 [00:50<00:00, 14.94it/s]



===== Results for arxiv_cohere.jsonl =====
               precision    recall  f1-score   support

 AI-generated       0.97      0.75      0.84      3000
Human-written       0.79      0.97      0.88      3000

     accuracy                           0.86      6000
    macro avg       0.88      0.86      0.86      6000
 weighted avg       0.88      0.86      0.86      6000



Evaluating hewlett.json: 100%|██████████| 11/11 [00:00<00:00, 11.45it/s]


===== Results for hewlett.json =====





ValueError: Number of classes, 1, does not match size of target_names, 2. Try specifying the labels parameter

In [12]:
import os
import json
import jsonlines
import torch
from tqdm import tqdm
from sklearn.metrics import classification_report
from transformers import MT5Tokenizer, MT5ForSequenceClassification

# Load your classification model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MT5ForSequenceClassification.from_pretrained("t5-ethics-ai-human-detector").to(device)
tokenizer = MT5Tokenizer.from_pretrained("t5-small")

# Directory where eval files live
dev_dir = "cs162-final-dev-main"

# Files to evaluate
files_to_evaluate = ["hewlett.json", "german_wikipedia.jsonl"]

# Loop over the selected files
for filename in files_to_evaluate:
    file_path = os.path.join(dev_dir, filename)
    texts, true_labels = [], []

    if filename.endswith(".jsonl"):
        # jsonlines format (human_text / machine_text)
        human_texts, machine_texts = [], []
        with jsonlines.open(file_path) as reader:
            for row in reader:
                human_texts.append(row["human_text"])
                machine_texts.append(row["machine_text"])
        texts = human_texts + machine_texts
        true_labels = [1] * len(human_texts) + [0] * len(machine_texts)

    elif filename.endswith(".json"):
        # plain json (hewlett.json)
        with open(file_path, 'r') as f:
            data = json.load(f)
            texts = [entry["document"] for entry in data]
            true_labels = [1] * len(texts)  # all human-written

    else:
        print(f"Skipping unsupported file: {filename}")
        continue

    # Inference loop for classification
    preds = []
    model.eval()

    batch_size = 8
    for i in tqdm(range(0, len(texts), batch_size), desc=f"Evaluating {filename}"):
        batch_texts = texts[i:i + batch_size]
        inputs = tokenizer(batch_texts, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)

        with torch.no_grad():
            outputs = model(**inputs)
            logits = outputs.logits
            batch_preds = torch.argmax(logits, dim=1).tolist()
            preds.extend(batch_preds)

    # Print classification report for each file
    print(f"\n===== Results for {filename} =====")
    print(classification_report(
        true_labels,
        preds,
        labels=[0, 1],
        target_names=["AI-generated", "Human-written"],
        zero_division=0
    ))


You are using a model of type t5 to instantiate a model of type mt5. This is not supported for all configurations of models and can yield errors.
Evaluating hewlett.json:   0%|          | 0/11 [00:00<?, ?it/s]Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.
Evaluating hewlett.json: 100%|██████████| 11/11 [00:00<00:00, 11.91it/s]



===== Results for hewlett.json =====
               precision    recall  f1-score   support

 AI-generated       0.00      0.00      0.00         0
Human-written       1.00      1.00      1.00        88

    micro avg       1.00      1.00      1.00        88
    macro avg       0.50      0.50      0.50        88
 weighted avg       1.00      1.00      1.00        88



Evaluating german_wikipedia.jsonl: 100%|██████████| 125/125 [00:10<00:00, 11.99it/s]


===== Results for german_wikipedia.jsonl =====
               precision    recall  f1-score   support

 AI-generated       0.20      0.01      0.01       500
Human-written       0.50      0.98      0.66       500

     accuracy                           0.49      1000
    macro avg       0.35      0.49      0.33      1000
 weighted avg       0.35      0.49      0.33      1000




