## Load Datasets, using our csv and the huggingface HC3

In [23]:
import pandas as pd

df = pd.read_csv("data/AI_Human.csv")
df = df.dropna()
print(df.head())

from datasets import load_dataset

ds = load_dataset("Hello-SimpleAI/HC3", "all")

                                                text  generated
0  Cars. Cars have been around since they became ...        0.0
1  Transportation is a large necessity in most co...        0.0
2  "America's love affair with it's vehicles seem...        0.0
3  How often do you ride in a car? Do you drive a...        0.0
4  Cars are a wonderful thing. They are perhaps o...        0.0


In [24]:
# Extract and label data for the Hugging Face dataset
rows = []
for entry in ds["train"]:
    for a in entry["chatgpt_answers"]:
        rows.append({"text": a.strip(), "label": 0})  # AI
    for a in entry["human_answers"]:
        rows.append({"text": a.strip(), "label": 1})  # Human

hdf = pd.DataFrame(rows).dropna()
print(f"Samples: {len(hdf)}")
hdf.head()

Samples: 85449


Unnamed: 0,text,label
0,There are many different best seller lists tha...,0
1,"Basically there are many categories of "" Best ...",1
2,"If you 're hearing about it , it 's because it...",1
3,"One reason is lots of catagories . However , h...",1
4,Salt is used on roads to help melt ice and sno...,0


In [25]:
# i love chatgpt

# Rename the column
df.rename(columns={'generated': 'label'}, inplace=True)

# Invert 0.0 to 1 and 1.0 to 0
df['label'] = df['label'].apply(lambda x: 1 if x == 0.0 else 0)

In [26]:
print(f"Samples: {len(df)}")
df.head()

Samples: 487235


Unnamed: 0,text,label
0,Cars. Cars have been around since they became ...,1
1,Transportation is a large necessity in most co...,1
2,"""America's love affair with it's vehicles seem...",1
3,How often do you ride in a car? Do you drive a...,1
4,Cars are a wonderful thing. They are perhaps o...,1


In [27]:
# concatenate both

data = pd.concat([df, hdf], ignore_index=True)
print(len(data))

572684


In [28]:
# Balance classes
min_class = data['label'].value_counts().min()
data_balanced = pd.concat([
    data[data['label'] == 0].sample(min_class),
    data[data['label'] == 1].sample(min_class)
]).sample(frac=1).reset_index(drop=True)

In [29]:
print(len(data_balanced))

416682


## Model stuff

In [30]:
# Tokenization + Dataset Prep
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from torch.utils.data import Dataset, DataLoader
import torch

tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

class CombinedDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=256):
        self.encodings = tokenizer(texts, truncation=True, padding=True, max_length=max_len)
        self.labels = labels

    def __len__(self): return len(self.labels)

    def __getitem__(self, idx):
        item = {k: torch.tensor(v[idx]) for k, v in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

train_dataset = CombinedDataset(data_balanced['text'].tolist(), data_balanced['label'].tolist(), tokenizer)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)

In [None]:
model = RobertaForSequenceClassification.from_pretrained("roberta-base", num_labels=2)

In [33]:
# Train the RoBERTa Classifier

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

from transformers import AdamW
from tqdm import tqdm

optimizer = AdamW(model.parameters(), lr=2e-5)

model.train()
for epoch in range(1):
    total_loss = 0
    for batch in tqdm(train_loader):
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        loss = outputs.loss
        total_loss += loss.item()
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    print(f"Epoch {epoch + 1} Loss: {total_loss:.4f}")

100%|██████████| 52086/52086 [2:04:27<00:00,  6.97it/s]  

Epoch 1 Loss: 1087.9315





In [34]:
model.save_pretrained("moredata-roberta-ai-detector")
tokenizer.save_pretrained("moredata-roberta-ai-detector")

('moredata-roberta-ai-detector/tokenizer_config.json',
 'moredata-roberta-ai-detector/special_tokens_map.json',
 'moredata-roberta-ai-detector/vocab.json',
 'moredata-roberta-ai-detector/merges.txt',
 'moredata-roberta-ai-detector/added_tokens.json')

In [35]:
from transformers import RobertaForSequenceClassification

# Load model and tokenizer from the saved directory
model = RobertaForSequenceClassification.from_pretrained("moredata-roberta-ai-detector").to(device)
tokenizer = RobertaTokenizer.from_pretrained("moredata-roberta-ai-detector")

# Predict
model.eval()
preds = []

In [36]:
import os
import jsonlines
import torch
from sklearn.metrics import classification_report
from transformers import RobertaTokenizer
from tqdm import tqdm

dev_dir = "cs162-final-dev-main"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
human_texts, machine_texts = [], []

for filename in os.listdir(dev_dir):
    if filename.endswith(".jsonl"):
        file_path = os.path.join(dev_dir, filename)
        with jsonlines.open(file_path) as reader:
            for row in reader:
                human_texts.append(row["human_text"])
                machine_texts.append(row["machine_text"])

test_texts = human_texts + machine_texts
true_labels = [1] * len(human_texts) + [0] * len(machine_texts)

# Load tokenizer
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

# Predict
model.eval()
preds = []

with torch.no_grad():
    for i in tqdm(range(0, len(test_texts), 8)):
        batch = test_texts[i:i+8]
        encodings = tokenizer(batch, return_tensors="pt", padding=True, truncation=True, max_length=256)
        input_ids = encodings["input_ids"].to(device)
        attention_mask = encodings["attention_mask"].to(device)
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        batch_preds = torch.argmax(outputs.logits, dim=1).tolist()
        preds.extend(batch_preds)

# Evaluate
from sklearn.metrics import classification_report

print(classification_report(true_labels, preds, target_names=["AI-generated", "Human-written"]))

100%|██████████| 3000/3000 [02:15<00:00, 22.11it/s] 


               precision    recall  f1-score   support

 AI-generated       0.79      0.95      0.86     12000
Human-written       0.93      0.74      0.83     12000

     accuracy                           0.84     24000
    macro avg       0.86      0.84      0.84     24000
 weighted avg       0.86      0.84      0.84     24000

