In [None]:
"""
AI Text Classification Setup Script
Author: Zubair Elliot
Date: 2025-09-27
Description:
    This script sets up the Python environment for training and evaluating
    a transformer-based AI text classification model. It includes package
    installations, library imports, and Google Drive mounting for dataset access.
"""


!pip install -q transformers datasets scikit-learn torch accelerate evaluate


import json          # For working with JSON data
import os            # For interacting with the operating system
import pandas as pd  # For data manipulation and analysis
import numpy as np   # For numerical computations


import torch                       # Core PyTorch library for deep learning
from datasets import Dataset       # Hugging Face dataset class

# Transformers imports for model and tokenizer handling
from transformers import (
    AutoTokenizer,                 # Pre-trained tokenizer loader
    AutoModelForSequenceClassification,  # Pre-trained model for sequence classification
    Trainer,                       # Trainer class for model training
    TrainingArguments,             # Configuration for training process
    DataCollatorWithPadding        # Handles dynamic padding of input sequences
)


from sklearn.metrics import (
    accuracy_score,                # Measures model accuracy
    precision_recall_fscore_support, # Computes precision, recall, and F1-score
    confusion_matrix               # Generates confusion matrix for classification results
)


from google.colab import drive     # Allows mounting Google Drive to access datasets


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/84.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:

drive.mount("/content/drive") # mounts google drive

Mounted at /content/drive


In [None]:

# Define the path to the dataset stored in Google Drive
file_path = "/content/drive/MyDrive/fm_shuffled.jsonl" #replace fm_shuffled.jsonl with your specified json file

# Initialize an empty list to store JSON objects
data = []

# Open the JSONL file and read line by line
with open(file_path, "r", encoding="utf-8") as f:
    for line in f:
        data.append(json.loads(line))  # Parse JSON string into Python dict and append


df = pd.DataFrame(data)  # Convert list of dicts to DataFrame for easier manipulation

# Remove rows where the 'text' field is missing or null
df = df.dropna(subset=["text"])

# Ensure the 'generated' label column is of integer type
df["generated"] = df["generated"].astype(int)


print(f"Total records: {len(df)}")  # Print total number of records after cleaning
print(df["generated"].value_counts())  # Show count of each label (0=human, 1=AI)


Total records: 622054
generated
0    377687
1    244367
Name: count, dtype: int64


In [None]:

dataset = Dataset.from_pandas(df)  # Convert pandas DataFrame to HF Dataset


# First, split into training (80%) and temporary test set (20%)
train_test = dataset.train_test_split(test_size=0.2, seed=42)
train_dataset = train_test["train"]
test_dataset  = train_test["test"]

# Further split the temporary test set into validation (50%) and final test (50%)
val_test = test_dataset.train_test_split(test_size=0.5, seed=42)
val_dataset  = val_test["train"]   # Validation set
test_dataset = val_test["test"]    # Test set


print(f"Train: {len(train_dataset)}, Val: {len(val_dataset)}, Test: {len(test_dataset)}")


Train: 497643, Val: 62205, Test: 62206


In [None]:

# Define the pre-trained model checkpoint for tokenization
model_name = "google/electra-base-discriminator"

# Load tokenizer corresponding to the pre-trained model
tokenizer = AutoTokenizer.from_pretrained(model_name)

# ----------------------------
# Function to tokenize a batch of examples
# ----------------------------
def tokenize_fn(batch):
    """
    Tokenizes a batch of text samples using the specified tokenizer.

    Parameters:
        batch (dict): A batch of examples with a "text" field.

    Returns:
        dict: Tokenized outputs ready for model input.
    """
    return tokenizer(
        batch["text"],       # Input text
        truncation=True,     # Truncate sequences longer than max_length
        padding=False,       # Disable automatic padding (use dynamic padding in DataCollator)
        max_length=512       # Maximum sequence length
    )

# Apply tokenization to each dataset split
train_dataset = train_dataset.map(tokenize_fn, batched=True, remove_columns=["text"])
val_dataset   = val_dataset.map(tokenize_fn, batched=True, remove_columns=["text"])
test_dataset  = test_dataset.map(tokenize_fn, batched=True, remove_columns=["text"])


# Rename label column to "labels" for compatibility with Hugging Face Trainer
train_dataset = train_dataset.rename_column("generated", "labels")
val_dataset   = val_dataset.rename_column("generated", "labels")
test_dataset  = test_dataset.rename_column("generated", "labels")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/666 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Map:   0%|          | 0/497643 [00:00<?, ? examples/s]

Map:   0%|          | 0/62205 [00:00<?, ? examples/s]

Map:   0%|          | 0/62206 [00:00<?, ? examples/s]

In [None]:

# Load the ELECTRA model with a classification head for binary labels
model = AutoModelForSequenceClassification.from_pretrained(
    "google/electra-base-discriminator",  # Pre-trained ELECTRA checkpoint
    num_labels=2,                         # Binary classification: Human (0) vs AI (1)
    id2label={0: "Human", 1: "AI"},       # Mapping from label IDs to human-readable labels
    label2id={"Human": 0, "AI": 1}        # Mapping from human-readable labels to label IDs
)




pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:

# This ensures that all sequences in a batch have the same length without truncating unnecessarily
data_collator = DataCollatorWithPadding(tokenizer)


In [None]:
# define metrics for evaluation
def compute_metrics(eval_pred):
    """
    Computes evaluation metrics for model predictions.

    Parameters:
        eval_pred (tuple): Tuple containing (logits, labels)
            - logits: Raw model predictions
            - labels: True labels for the batch

    Returns:
        dict: Dictionary containing accuracy, precision, recall, and F1-score
    """
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)  # Convert logits to predicted class indices

    # Compute metrics
    acc = accuracy_score(labels, preds)
    precision, recall, f1, _ = precision_recall_fscore_support(
        labels, preds, average="weighted"
    )

    return {
        "accuracy": acc,
        "f1": f1,
        "precision": precision,
        "recall": recall
    }




In [None]:

# Define training configuration for Hugging Face Trainer
training_args = TrainingArguments(
    output_dir="./electra-ai-human-classifier",  # Directory to save model checkpoints and outputs
    learning_rate=2e-5,                          # Learning rate for optimizer
    per_device_train_batch_size=32,              # Batch size per device for training
    per_device_eval_batch_size=64,               # Batch size per device for evaluation
    num_train_epochs=2,                          # Number of training epochs
    weight_decay=0.01,                           # L2 weight decay for regularization
    eval_strategy="epoch",                        # Evaluate at the end of each epoch
    save_strategy="epoch",                        # Save checkpoint at the end of each epoch
    logging_strategy="epoch",                     # Log training metrics at the end of each epoch
    load_best_model_at_end=True,                  # Load the best model based on specified metric at the end
    metric_for_best_model="accuracy",             # Metric used to select the best model
    greater_is_better=True,                        # Whether higher metric values are better
    fp16=True,                                     # Use mixed precision (16-bit) training for speed and memory efficiency
    save_total_limit=2,                            # Limit the number of saved checkpoints to 2
    report_to="none",                              # Disable integration with logging platforms (e.g., WandB)
    seed=42,                                       # Random seed for reproducibility
    remove_unused_columns=False                    # Keep all dataset columns
)




In [None]:

# Initialize Hugging Face Trainer

# The Trainer class handles training, evaluation, and prediction
# using the specified model, datasets, tokenizer, and training arguments.
trainer = Trainer(
    model=model,                     # Pre-trained model with classification head
    args=training_args,              # Training configuration defined above
    train_dataset=train_dataset,     # Tokenized training dataset
    eval_dataset=val_dataset,        # Tokenized validation dataset
    tokenizer=tokenizer,             # Tokenizer used for preprocessing
    data_collator=data_collator,     # Handles dynamic padding for batches
    compute_metrics=compute_metrics  # Evaluation metrics function for validation
)




  trainer = Trainer(


In [None]:
# trains the detector
train_result = trainer.train()


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.0444,0.035808,0.988441,0.988461,0.988598,0.988441
2,0.0138,0.054546,0.987589,0.987616,0.987846,0.987589


In [None]:

# 1️⃣2️⃣ Save model to Google Drive

drive_path = "/content/drive/MyDrive/electra_ai_human"
os.makedirs(drive_path, exist_ok=True)
trainer.save_model(drive_path)
tokenizer.save_pretrained(drive_path)

print(f"Model saved to Google Drive: {drive_path}")


Model saved to Google Drive: /content/drive/MyDrive/electra_ai_human


In [3]:
# === Debug + Inference helper (copy into Colab) ===
from google.colab import drive
drive.mount('/content/drive')

import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import os

# PATH to your checkpoint folder (the folder with config + pytorch_model.bin + tokenizer files)
model_path = "/content/drive/MyDrive/electra_ai_human"
# load tokenizer + model
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True)
model = AutoModelForSequenceClassification.from_pretrained(model_path)
model.eval()

# move model to device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Build id2label dict safely (handles keys saved as strings or ints)
cfg = model.config
id2label = {}
if hasattr(cfg, "id2label") and cfg.id2label:
    # cfg.id2label keys sometimes are strings, convert to int
    try:
        id2label = {int(k): v for k, v in cfg.id2label.items()}
    except Exception:
        # if keys already ints or conversion failed, just copy
        id2label = {k: v for k, v in cfg.id2label.items()}
# fallback: assume 0 = Human, 1 = AI (override if needed)
if not id2label:
    id2label = {0: "Human", 1: "AI"}

# print mapping + sanity info
print("Model path:", model_path)
print("Device:", device)
print("Model config id2label:", id2label)
print("Model config label2id:", getattr(cfg, "label2id", None))
print("Tokenizer type:", type(tokenizer))
print()

# prediction function that returns label, confidence, and per-label probs
def predict_with_confidence(text, max_length=512):
    # tokenize and move inputs to same device as model
    inputs = tokenizer(text, truncation=True, padding=True, max_length=max_length, return_tensors="pt")
    inputs = {k: v.to(device) for k, v in inputs.items()}
    with torch.no_grad():
        outputs = model(**inputs)
    logits = outputs.logits  # shape (1, num_labels)
    probs = F.softmax(logits, dim=-1).cpu().numpy()[0]  # numpy array

    # build label->prob mapping using id2label
    probs_by_label = {}
    for idx, p in enumerate(probs):
        label_name = id2label.get(idx, str(idx))
        probs_by_label[label_name] = float(p)

    # predicted index and label
    pred_idx = int(probs.argmax())
    pred_label = id2label.get(pred_idx, str(pred_idx))
    confidence = float(probs[pred_idx])

    # also return raw logits for debugging
    return {
        "pred_idx": pred_idx,
        "pred_label": pred_label,
        "confidence": confidence,
        "probs": probs_by_label,
        "logits": logits.cpu().numpy().tolist()[0]
    }

# quick debug tests - replace these with whatever you want to try
examples = [
    "This is a short, casual sentence written by a real human.",
    "As an AI language model, I can compose coherent text on almost any topic with ease."
]
for ex in examples:
    out = predict_with_confidence(ex)
    print("Text:", ex)
    print("Predicted:", out["pred_label"], f"({out['confidence']*100:.2f}%)")
    print("Per-label probabilities:", out["probs"])
    print("Raw logits:", out["logits"])
    print("-----")

# interactive loop
print("Interactive test — type text, or 'exit' to quit")
while True:
    text = input("Enter text: ")
    if text.strip().lower() in ("exit", "quit"):
        break
    out = predict_with_confidence(text)
    print(f"Predicted: {out['pred_label']}  ({out['confidence']*100:.2f}%)")
    for lbl, p in out["probs"].items():
        print(f"  {lbl}: {p*100:.2f}%")
    print()


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Model path: /content/drive/MyDrive/electra_ai_human
Device: cuda
Model config id2label: {0: 'Human', 1: 'AI'}
Model config label2id: {'AI': 1, 'Human': 0}
Tokenizer type: <class 'transformers.models.electra.tokenization_electra_fast.ElectraTokenizerFast'>

Text: This is a short, casual sentence written by a real human.
Predicted: Human (99.00%)
Per-label probabilities: {'Human': 0.9900312423706055, 'AI': 0.009968753904104233}
Raw logits: [2.169701099395752, -2.4285800457000732]
-----
Text: As an AI language model, I can compose coherent text on almost any topic with ease.
Predicted: Human (90.07%)
Per-label probabilities: {'Human': 0.9006745219230652, 'AI': 0.09932541847229004}
Raw logits: [1.0441290140151978, -1.1606135368347168]
-----
Interactive test — type text, or 'exit' to quit
Enter text: The rise of large language models such as ChatGPT, Claude, Grok 