In [1]:
import torch
import numpy as np
import pandas as pd
from transformers import AutoTokenizer, AutoModel
from tqdm.notebook import tqdm
import joblib
import os
import re
from datasets import load_dataset
import textstat
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.neural_network import MLPClassifier

In [2]:
def preprocess_text_for_bert(text):
    text = text.lower()
    text = re.sub(r"<[^>]+>", " ", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text

def load_and_process_data(filepath, human_key, ai_key, n_samples_per_class=1000):
    try:
        raw_ds = load_dataset("json", data_files=filepath, split=f"train[:{n_samples_per_class*10}]")
    except Exception:
        raw_ds = load_dataset("json", data_files=filepath, split="train")

    texts, labels = [], []
    count_human, count_ai = 0, 0
    for item in raw_ds:
        human_texts = item.get(human_key, [])
        ai_texts = item.get(ai_key, [])
        if isinstance(human_texts, str): human_texts = [human_texts]
        if isinstance(ai_texts, str): ai_texts = [ai_texts]
        for text in human_texts:
            if count_human < n_samples_per_class:
                processed_text = preprocess_text_for_bert(str(text))
                if len(processed_text.split()) >= 5:
                    texts.append(processed_text); labels.append(0); count_human += 1

        for text in ai_texts:
            if count_ai < n_samples_per_class:
                processed_text = preprocess_text_for_bert(str(text))
                if len(processed_text.split()) >= 5:
                    texts.append(processed_text); labels.append(1); count_ai += 1
                    
        if count_human >= n_samples_per_class and count_ai >= n_samples_per_class: break
    print(f"From {os.path.basename(filepath)}: Loaded {count_human} human, {count_ai} AI samples.")
    return texts, labels

In [3]:
FEATURE_NAMES = ['flesch_reading_ease', 'flesch_kincaid_grade', 'gunning_fog', 'smog_index', 'automated_readability_index', 'coleman_liau_index', 'lexicon_count', 'sentence_count', 'avg_sentence_length', 'avg_word_length', 'type_token_ratio']

def extract_stylometric_features(text: str) -> np.ndarray:
    if not text or len(text.split()) < 3:
        return np.zeros(len(FEATURE_NAMES))
    try:
        features = [
            textstat.flesch_reading_ease(text),
            textstat.flesch_kincaid_grade(text),
            textstat.gunning_fog(text),
            textstat.smog_index(text),
            textstat.automated_readability_index(text),
            textstat.coleman_liau_index(text),
            textstat.lexicon_count(text),
            textstat.sentence_count(text),
            textstat.avg_sentence_length(text),
            textstat.avg_word_length(text),
        ]
        words = text.lower().split()
        ttr = len(set(words)) / len(words) if len(words) > 0 else 0
        features.append(ttr)
        return np.array(features, dtype=np.float32)
    except Exception:
        return np.zeros(len(FEATURE_NAMES))

In [4]:
BERT_MODEL_PATH = "./bert_ai_detector_final"
HYBRID_MODEL_SAVE_DIR = "hybrid_model"
HYBRID_CLASSIFIER_SAVE_PATH = os.path.join(HYBRID_MODEL_SAVE_DIR, "hybrid_classifier.joblib")
SCALER_SAVE_PATH = os.path.join(HYBRID_MODEL_SAVE_DIR, "feature_scaler.joblib")
os.makedirs(HYBRID_MODEL_SAVE_DIR, exist_ok=True)

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {DEVICE}")

tokenizer = AutoTokenizer.from_pretrained(BERT_MODEL_PATH)
bert_model = AutoModel.from_pretrained(BERT_MODEL_PATH).to(DEVICE)
bert_model.eval()

Using device: cuda


BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(30522, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0-11): 12 x BertLayer(
        (attention): BertAttention(
          (self): BertSdpaSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False

In [5]:
texts_orig, labels_orig = load_and_process_data("data/real-vs-gpt2-sentences.jsonl", "real", "gpt2", n_samples_per_class=1000)
texts_hc3, labels_hc3 = load_and_process_data("data/HC3.jsonl", "human_answers", "chatgpt_answers", n_samples_per_class=1000)
all_texts = texts_orig + texts_hc3
all_labels = labels_orig + labels_hc3

def create_hybrid_features(texts, base_model, tokenizer, device):
    bert_features = []
    stylometry_features = []

    with torch.no_grad():
        for text in tqdm(texts, desc="Extracting BERT Features"):
            inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=256).to(device)
            outputs = base_model(**inputs)
            cls_embedding = outputs.last_hidden_state[:, 0, :].cpu().numpy().squeeze()
            bert_features.append(cls_embedding)

    for text in tqdm(texts, desc="Extracting Stylometry Features"):
        stylometry_features.append(extract_stylometric_features(text))
    
    return np.array(bert_features), np.array(stylometry_features)

bert_feats, style_feats = create_hybrid_features(all_texts, bert_model, tokenizer, DEVICE)
X_hybrid = np.concatenate([bert_feats, style_feats], axis=1)
y = np.array(all_labels)

print(f"Hybrid feature matrix shape: {X_hybrid.shape}")

From real-vs-gpt2-sentences.jsonl: Loaded 1000 human, 1000 AI samples.
From HC3.jsonl: Loaded 1000 human, 1000 AI samples.


Extracting BERT Features:   0%|          | 0/4000 [00:00<?, ?it/s]

Extracting Stylometry Features:   0%|          | 0/4000 [00:00<?, ?it/s]

  textstat.avg_sentence_length(text),


Hybrid feature matrix shape: (4000, 779)


In [6]:
X_train, X_test, y_train, y_test = train_test_split(X_hybrid, y, test_size=0.2, random_state=42, stratify=y)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

hybrid_classifier = MLPClassifier(
    hidden_layer_sizes=(128, 64),
    activation='relu',
    solver='adam',
    alpha=0.0001,
    batch_size='auto',
    learning_rate_init=0.001,
    max_iter=1000,
    random_state=42,
    early_stopping=True,
    verbose=True
)

hybrid_classifier.fit(X_train_scaled, y_train)

Iteration 1, loss = 0.20246838
Validation score: 0.943750
Iteration 2, loss = 0.10982140
Validation score: 0.956250
Iteration 3, loss = 0.09600164
Validation score: 0.953125
Iteration 4, loss = 0.08687994
Validation score: 0.956250
Iteration 5, loss = 0.07907500
Validation score: 0.943750
Iteration 6, loss = 0.07310477
Validation score: 0.946875
Iteration 7, loss = 0.06745590
Validation score: 0.943750
Iteration 8, loss = 0.06255022
Validation score: 0.953125
Iteration 9, loss = 0.05813540
Validation score: 0.946875
Iteration 10, loss = 0.05581217
Validation score: 0.940625
Iteration 11, loss = 0.05054624
Validation score: 0.943750
Iteration 12, loss = 0.04682006
Validation score: 0.953125
Iteration 13, loss = 0.04645658
Validation score: 0.950000
Validation score did not improve more than tol=0.000100 for 10 consecutive epochs. Stopping.


In [None]:
print("\nLocal Test Set Evaluation:")
y_pred = hybrid_classifier.predict(X_test_scaled)
print(classification_report(y_test, y_pred, target_names=["Human", "AI"]))

joblib.dump(hybrid_classifier, HYBRID_CLASSIFIER_SAVE_PATH)
joblib.dump(scaler, SCALER_SAVE_PATH)
print("Hybrid model and scaler saved successfully")


Local Test Set Evaluation:
              precision    recall  f1-score   support

       Human       0.97      0.97      0.97       400
          AI       0.97      0.97      0.97       400

    accuracy                           0.97       800
   macro avg       0.97      0.97      0.97       800
weighted avg       0.97      0.97      0.97       800

Hybrid model and scaler saved successfully
