In [1]:
import torch
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer

import re, random
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

  from .autonotebook import tqdm as notebook_tqdm


**Baseline Test:** Usng Naive Bayes + TF-IDF, we can establish a quick baseline that tells us if simple lexical cues already seperate the human text and AI-generated text.

In [None]:
def preprocess(text):
    text = text.lower()
    text = re.sub(r"[^a-z0-9\s]", "", text)
    toks = text.split()
    return " ".join(toks) if len(toks) >= 5 else ""

def get_data_from_jsonl(path, n_per_split=100):
    ds = load_dataset("json", data_files=path, split=f"train[:{n_per_split}]")
    
    human_raw = ds["real"]
    ai_raw    = ds["gpt2"]

    # Preprocess & drop too-short
    human = [preprocess(t) for t in human_raw]
    ai    = [preprocess(t) for t in ai_raw]
    # Remove empty strings
    human = [t for t in human if t]
    ai    = [t for t in ai if t]

    # Zip up & shuffle
    texts = human + ai
    labels = [0]*len(human) + [1]*len(ai)
    data = list(zip(texts, labels))
    random.shuffle(data)
    return zip(*data)

if __name__ == "__main__":
    texts, labels = get_data_from_jsonl("data/train.jsonl", n_per_split=10000)

    # split
    X_train, X_dev, y_train, y_dev = train_test_split(
        texts, labels,
        test_size=0.1,
        random_state=42,
        stratify=labels
    )

    # vectorize
    vec = TfidfVectorizer(ngram_range=(1,2), max_features=20000)
    Xtr_tfidf = vec.fit_transform(X_train)
    Xdv_tfidf = vec.transform(X_dev)

    # train NB
    clf = MultinomialNB()
    clf.fit(Xtr_tfidf, y_train)

    # evaluate
    preds = clf.predict(Xdv_tfidf)
    acc = accuracy_score(y_dev, preds)
    prec, rec, f1, _ = precision_recall_fscore_support(
        y_dev, preds, average="binary"
    )

    print(f"Baseline → Acc: {acc:.4f}, Prec: {prec:.4f}, Rec: {rec:.4f}, F1: {f1:.4f}")

Generating train split: 300784 examples [00:00, 1281004.69 examples/s]


Baseline → Acc: 0.6287, Prec: 0.6316, Rec: 0.6113, F1: 0.6213
