# AI-Generated Text Detection

Stacked ensemble (DeBERTa-v3 + LightGBM + TF-IDF/SGD) on `merged_ai_human_multisocial_features.csv` (0=Human, 1=AI). Flow: setup -> data check -> Model A/B/C CV (OOF) -> stacking -> inference. Text column: `text`; label column: `label`; numerical features like `burstiness`, `perplexity_score`, `lexical_diversity`, `gunning_fog_index`.


In [1]:
# Optional: install dependencies if running in a fresh environment
# !pip install -q pandas numpy torch torchvision torchaudio transformers datasets lightgbm scikit-learn
# For CUDA builds of PyTorch: https://pytorch.org/get-started/locally/


In [None]:
import os
import random
from pathlib import Path

import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset

import lightgbm as lgb
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    DataCollatorWithPadding,
    Trainer,
    TrainingArguments,
    set_seed,
)
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import SGDClassifier, LogisticRegression
from sklearn.pipeline import Pipeline

SEED = 42
set_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)
torch.manual_seed(SEED)

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device: {DEVICE}")

BASE_MODEL = "microsoft/deberta-v3-base"
MAX_LENGTH = 256
N_SPLITS = 5

CWD = Path.cwd()
candidate_paths = [
    CWD / "merged_ai_human_multisocial_features.csv",
    CWD / "src/ai_vs_human/merged_ai_human_multisocial_features.csv",
]
for cp in candidate_paths:
    if cp.exists():
        DATA_PATH = cp
        break
else:
    raise FileNotFoundError("merged_ai_human_multisocial_features.csv not found")

WORK_DIR = DATA_PATH.parent
MODEL_DIR = WORK_DIR / "models/deberta_v3_base"
MODEL_DIR.mkdir(parents=True, exist_ok=True)
OOF_DIR = WORK_DIR / "oof"
OOF_DIR.mkdir(exist_ok=True)
test_candidates = [
    WORK_DIR / "merged_ai_human_multisocial_features_test.csv",
    WORK_DIR / "ai_human_content_detection_test.csv",
]
TEST_PATH = next((p for p in test_candidates if p.exists()), test_candidates[0])  # optional


Device: cuda


In [None]:
# Load data and inspect
train_df = pd.read_csv(DATA_PATH)
test_df = pd.read_csv(TEST_PATH) if TEST_PATH.exists() else None

text_col = "text"
label_col = "label"
meta_prefixes = ("src_",)  # exclude metadata to avoid leakage

num_cols = [
    c
    for c in train_df.columns
    if c not in [text_col, label_col]
    and pd.api.types.is_numeric_dtype(train_df[c])
    and not any(c.startswith(pref) for pref in meta_prefixes)
]

train_df[num_cols] = train_df[num_cols].apply(pd.to_numeric, errors="coerce")
num_medians = train_df[num_cols].median()
train_df[num_cols] = train_df[num_cols].fillna(num_medians)
if test_df is not None:
    test_df[num_cols] = test_df[num_cols].apply(pd.to_numeric, errors="coerce")
    test_df[num_cols] = test_df[num_cols].fillna(num_medians)

print(f"Train shape: {train_df.shape}")
print(f"Numeric features ({len(num_cols)}): {num_cols[:10]}{'...' if len(num_cols) > 10 else ''}")
print(train_df[[text_col, label_col]].head(2))
print(train_df[num_cols].describe().T.head())

y = train_df[label_col].astype(int).values
skf = StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=SEED)
folds = list(skf.split(train_df[text_col], y))


In [4]:
# Helper classes and metrics
class HFTextDataset(Dataset):
    def __init__(self, df, tokenizer, text_col, label_col=None, max_length=256):
        self.df = df.reset_index(drop=True)
        self.tokenizer = tokenizer
        self.text_col = text_col
        self.label_col = label_col
        self.max_length = max_length

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        text = str(self.df.loc[idx, self.text_col])
        enc = self.tokenizer(
            text,
            truncation=True,
            max_length=self.max_length,
            padding=False,
        )
        if self.label_col is not None:
            enc["labels"] = int(self.df.loc[idx, self.label_col])
        return enc


def softmax_logits(logits):
    logits = torch.tensor(logits)
    probs = torch.softmax(logits, dim=1).cpu().numpy()
    return probs[:, 1]


def compute_metrics(eval_pred):
    logits, labels = eval_pred
    probs = softmax_logits(logits)
    return {"roc_auc": roc_auc_score(labels, probs)}


In [None]:
# Model A: DeBERTa-v3-base with Stratified K-Fold OOF
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=False)
collator = DataCollatorWithPadding(tokenizer=tokenizer, padding=True)

oof_deberta = np.zeros(len(train_df))
deberta_model_paths = []

for fold, (train_idx, val_idx) in enumerate(folds):
    print(f"\n[Model A] Fold {fold+1}/{N_SPLITS}")
    fold_dir = MODEL_DIR / f"fold_{fold}"
    fold_dir.mkdir(parents=True, exist_ok=True)

    train_ds = HFTextDataset(train_df.iloc[train_idx], tokenizer, text_col, label_col, MAX_LENGTH)
    val_ds = HFTextDataset(train_df.iloc[val_idx], tokenizer, text_col, label_col, MAX_LENGTH)

    model = AutoModelForSequenceClassification.from_pretrained(
        BASE_MODEL, num_labels=2
    )

    training_kwargs = dict(
        output_dir=str(fold_dir),
        per_device_train_batch_size=8,
        per_device_eval_batch_size=16,
        gradient_accumulation_steps=2,
        num_train_epochs=2,
        learning_rate=2e-5,
        weight_decay=0.01,
        warmup_ratio=0.1,
        fp16=torch.cuda.is_available(),
        logging_steps=50,
        save_total_limit=1,
        load_best_model_at_end=True,
        metric_for_best_model="roc_auc",
        greater_is_better=True,
        dataloader_num_workers=2,
        report_to="none",
    )

    # Handle TrainingArguments API differences across transformers versions
    try:
        args = TrainingArguments(
            **training_kwargs,
            evaluation_strategy="epoch",
            save_strategy="epoch",
        )
    except TypeError:
        try:
            args = TrainingArguments(
                **training_kwargs,
                evaluate_during_training=True,
                eval_steps=500,
                save_steps=500,
            )
        except TypeError:
            fallback_kwargs = training_kwargs.copy()
            for key in ("load_best_model_at_end", "metric_for_best_model", "greater_is_better"):
                fallback_kwargs.pop(key, None)
            args = TrainingArguments(**fallback_kwargs)

    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=train_ds,
        eval_dataset=val_ds,
        tokenizer=tokenizer,
        data_collator=collator,
        compute_metrics=compute_metrics,
    )

    trainer.train()

    preds = trainer.predict(val_ds).predictions
    oof_deberta[val_idx] = softmax_logits(preds)

    best_dir = trainer.state.best_model_checkpoint or str(fold_dir / "best")
    if trainer.state.best_model_checkpoint is None:
        trainer.save_model(best_dir)
    deberta_model_paths.append(best_dir)

    torch.cuda.empty_cache()

deberta_oof_auc = roc_auc_score(y, oof_deberta)
print(f"Model A OOF ROC-AUC: {deberta_oof_auc:.5f}")
pd.DataFrame({"oof_deberta": oof_deberta}).to_csv(OOF_DIR / "oof_deberta.csv", index=False)



[Model A] Fold 1/5


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 2, 'bos_token_id': 1}.


Step,Training Loss





[Model A] Fold 2/5


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 2, 'bos_token_id': 1}.


Step,Training Loss





[Model A] Fold 3/5


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 2, 'bos_token_id': 1}.


Step,Training Loss





[Model A] Fold 4/5


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 2, 'bos_token_id': 1}.


Step,Training Loss





[Model A] Fold 5/5


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 2, 'bos_token_id': 1}.


Step,Training Loss


In [None]:
# Model B: LightGBM on numerical features only
oof_lgb = np.zeros(len(train_df))
lgb_models = []

for fold, (train_idx, val_idx) in enumerate(folds):
    print(f"\n[Model B] Fold {fold+1}/{N_SPLITS}")
    X_train = train_df.iloc[train_idx][num_cols]
    y_train = y[train_idx]
    X_val = train_df.iloc[val_idx][num_cols]
    y_val = y[val_idx]

    model = lgb.LGBMClassifier(
        n_estimators=600,
        learning_rate=0.03,
        num_leaves=128,
        subsample=0.8,
        colsample_bytree=0.8,
        objective="binary",
        random_state=SEED + fold,
        n_jobs=-1,
        reg_lambda=1.0,
        min_child_samples=40,
    )

    model.fit(
        X_train,
        y_train,
        eval_set=[(X_val, y_val)],
        eval_metric="auc",
        callbacks=[lgb.early_stopping(50, verbose=False)],
    )

    oof_lgb[val_idx] = model.predict_proba(X_val)[:, 1]
    lgb_models.append(model)

lgb_oof_auc = roc_auc_score(y, oof_lgb)
print(f"Model B OOF ROC-AUC: {lgb_oof_auc:.5f}")
pd.DataFrame({"oof_lgb": oof_lgb}).to_csv(OOF_DIR / "oof_lgb.csv", index=False)


In [None]:
# Model C: TF-IDF (3-5 char-grams) + SGDClassifier(log_loss)
oof_sgd = np.zeros(len(train_df))
sgd_models = []

for fold, (train_idx, val_idx) in enumerate(folds):
    print(f"\n[Model C] Fold {fold+1}/{N_SPLITS}")
    X_train = train_df.iloc[train_idx][text_col].astype(str)
    y_train = y[train_idx]
    X_val = train_df.iloc[val_idx][text_col].astype(str)
    y_val = y[val_idx]

    clf = Pipeline(
        [
            (
                "tfidf",
                TfidfVectorizer(
                    analyzer="char",
                    ngram_range=(3, 5),
                    sublinear_tf=True,
                    min_df=2,
                    max_features=600000,
                ),
            ),
            (
                "clf",
                SGDClassifier(
                    loss="log_loss",
                    alpha=1e-4,
                    max_iter=1000,
                    tol=1e-3,
                    random_state=SEED + fold,
                ),
            ),
        ]
    )

    clf.fit(X_train, y_train)
    oof_sgd[val_idx] = clf.predict_proba(X_val)[:, 1]
    sgd_models.append(clf)

sgd_oof_auc = roc_auc_score(y, oof_sgd)
print(f"Model C OOF ROC-AUC: {sgd_oof_auc:.5f}")
pd.DataFrame({"oof_sgd": oof_sgd}).to_csv(OOF_DIR / "oof_sgd.csv", index=False)


In [None]:
# Stacked Ensemble: Logistic Regression meta-learner on OOF probabilities
stack_train = np.vstack([oof_deberta, oof_lgb, oof_sgd]).T

meta_learner = LogisticRegression(
    max_iter=1000,
    n_jobs=-1,
    C=2.0,
    solver="lbfgs",
)
meta_learner.fit(stack_train, y)
stack_auc = roc_auc_score(y, meta_learner.predict_proba(stack_train)[:, 1])
print(f"Meta-learner OOF ROC-AUC: {stack_auc:.5f}")

pd.DataFrame(
    {
        "oof_deberta": oof_deberta,
        "oof_lgb": oof_lgb,
        "oof_sgd": oof_sgd,
        "oof_stack": meta_learner.predict_proba(stack_train)[:, 1],
        label_col: y,
    }
).to_csv(OOF_DIR / "oof_stack.csv", index=False)


In [None]:
# Inference on test data (averaging fold predictions for each base model)
def predict_deberta(df):
    test_ds = HFTextDataset(df, tokenizer, text_col, None, MAX_LENGTH)
    fold_preds = []
    for path in deberta_model_paths:
        model = AutoModelForSequenceClassification.from_pretrained(path).to(DEVICE)
        infer_trainer = Trainer(
            model=model,
            tokenizer=tokenizer,
            data_collator=collator,
        )
        preds = infer_trainer.predict(test_ds).predictions
        fold_preds.append(softmax_logits(preds))
        torch.cuda.empty_cache()
    return np.mean(fold_preds, axis=0)


def predict_lgb(df):
    feats = df[num_cols]
    fold_preds = [m.predict_proba(feats)[:, 1] for m in lgb_models]
    return np.mean(fold_preds, axis=0)


def predict_sgd(df):
    texts = df[text_col].astype(str)
    fold_preds = [m.predict_proba(texts)[:, 1] for m in sgd_models]
    return np.mean(fold_preds, axis=0)


if test_df is not None:
    print("\nRunning inference on test set...")
    test_deberta = predict_deberta(test_df)
    test_lgb = predict_lgb(test_df)
    test_sgd = predict_sgd(test_df)

    stack_test = np.vstack([test_deberta, test_lgb, test_sgd]).T
    test_pred = meta_learner.predict_proba(stack_test)[:, 1]

    submission = pd.DataFrame({"id": test_df.index, "prediction": test_pred})
    submission.to_csv(WORK_DIR / "submission.csv", index=False)
    print("Saved submission.csv")
else:
    print("No test file found; set TEST_PATH to run inference.")
