In [1]:
import pandas as pd
import tensorflow as tf
import numpy as np
import re

from tensorflow.keras.models import load_model
from tensorflow.keras import layers, models, callbacks, optimizers, losses, metrics
from tensorflow.keras.preprocessing.sequence import pad_sequences

from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split

2025-09-12 19:13:31.761564: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1757704411.779235  157828 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1757704411.785158  157828 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-09-12 19:13:31.803125: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE3 SSE4.1 SSE4.2, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [14]:
df = pd.read_csv('AI_Human.csv')

# only keep normal characters
cleaning_pattern = r"[^a-zA-Z0-9\s.,;:!?'\"()\{\}\-—]"

def clean_text(text):
    text = text.replace('\n', ' ')  # replace newline characters with space
    text = re.sub(cleaning_pattern, ' ', text)  # remove unwanted characters
    text = re.sub(r'\s+', ' ', text)  # collapse multiple spaces into one
    text = text.strip()  # remove leading/trailing spaces
    return text
    
df['text'] = df['text'].apply(clean_text)
df = df.drop_duplicates().reset_index(drop=True) # drop duplicates

In [15]:
# Compute the number of words in each text 
df['length'] = df['text'].str.split().apply(len) 

print('df size before cutoff: ' + str(df.shape))
# Keep only essays with more than 3 words 
df = df[df['length'] >= 140].reset_index(drop=True) 
print('df size after cutoff: ' + str(df.shape))

# Print basic length statistics 
print("Min length:", df['length'].min()) 
print("Max length:", df['length'].max()) 
print("Mean length:", df['length'].mean()) 

# Compute 95th percentile to determine max sequence length 
percentile_95 = int(np.percentile(df['length'], 95)) 
print("95th percentile length:", percentile_95) 

# Set maximum sequence length for vectorization 
max_len = percentile_95

df size before cutoff: (464671, 3)
df size after cutoff: (459120, 3)
Min length: 140
Max length: 1704
Mean length: 396.951550792821
95th percentile length: 725


In [16]:
texts = df['text']
labels = df['generated']

# split out train/test to test final model on
train_texts, test_texts, train_labels, test_labels = train_test_split(
    texts, labels, test_size=0.1, stratify=labels, random_state=42)

# split val set out from train to train model on
train_texts, val_texts, train_labels, val_labels = train_test_split(
    train_texts, train_labels, test_size=0.2, stratify=train_labels, random_state=42)

In [17]:
max_tokens = 30000   # vocabulary size, counts tokens/words by frequency and any token not in the top max_tokens all become marked as the same

vectorizer = layers.TextVectorization(
    max_tokens=max_tokens,
    output_mode="int",
    output_sequence_length=max_len
)

vectorizer.adapt(train_texts)

I0000 00:00:1757705335.364647  157828 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13294 MB memory:  -> device: 0, name: NVIDIA A2, pci bus id: 0000:00:10.0, compute capability: 8.6
2025-09-12 19:29:07.967974: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 3146574456 exceeds 10% of free system memory.


In [24]:
# --- configurable hyperparams ---
max_tokens = max_tokens      # vocab size for vectorizer, how many words it can know. 30000 is arbitrary, a real measure would be better.
maxlen = max_len             # inputs are padded/truncated to this length, in this case 722 or the 95th percentile of word length balances too much truncation or large amounts of padding
embed_dim = 128              # takes the vectorized data and maps it into this many dimensions to represent different features of a given word
rnn_units = 128              # neurons in LSTM layer
dense_units = 128            # neurons in dense layer
dropout_rate = 0.3           # rate at which to drop features with dropout layers
learning_rate = 1e-3         # steps at which to optimize loss

# --- small attention pooling layer ---
class AttentionPool(layers.Layer):
    def __init__(self, units=64, **kwargs):
        super().__init__(**kwargs)
        # tell Keras this layer understands masks
        self.supports_masking = True

        self.dense = layers.Dense(units, activation="tanh")
        self.score = layers.Dense(1, use_bias=False)

    def compute_mask(self, inputs, mask=None):
        # This layer reduces the time dimension to a vector, so it produces no mask.
        return None

    def call(self, inputs, mask=None):
        # inputs: [B, T, F], mask: [B, T] or None
        x = self.dense(inputs)            # [B, T, units]
        scores = self.score(x)            # [B, T, 1]

        if mask is not None:
            # Broadcast mask to scores' shape and make masked positions very negative
            mask = tf.cast(mask, scores.dtype)           # [B, T]
            mask = tf.expand_dims(mask, axis=-1)         # [B, T, 1]
            scores = scores + (mask - 1.0) * 1e9         # masked positions -> ~ -inf

        weights = tf.nn.softmax(scores, axis=1)          # [B, T, 1]
        weighted = tf.reduce_sum(weights * inputs, axis=1)  # [B, F]
        return weighted

    def get_config(self):
        cfg = super().get_config()
        cfg.update({"units": self.dense.units if hasattr(self.dense, "units") else None})
        return cfg

# --- model architecture ---
# raw text input
# -> int sequence with max_len and max_tokens
# -> Embedding layer embeds based on max_tokens, embed_dim, and sees 0 values from the vectorizer as padding
# -> The Dropout layer will randomly 0 out values at the dropout rate to prevent over reliance on them, 
#    the 1d dropout layer does the same but with whole columns/dimensions from the embedding
# -> Not Equal layer from mask_zero=True, creates a boolean mask of x != 0 to ensure the model isnt trying to learn from the padded layers
# -> BiLSTM: more intensive than LSTM, might want to try just lstm
#            Runs a rnn through the text backwards and forwards with the ability to weight how much of past information is forgot, remebered, and what is output
# -> Attention Pooling: BiLSTM outputs data in the shape of [batch, timestamp, features] and outputs [batch, features], size 64 in this case, with the features being weighted based on learned importance of past words  
#                       If all you do is take the last lstm state or word in the sequence you will lose potentially important past data
#                       If you pool across mean/avg/min important information can be lost especially because of the length of the texts
#                       AttentionPool class lets the model learn to weight various words in the seqeunce differently ex. the is less important than delve which ai loves
# -> Batch Normalization takes in the [batch, weighted features] matrix from Attention pooling and normalizes the features in the given batch with mean ~0, sd ~1 
#                        helps with speed and accuracy, allowing models to converge more efficently due to removing any changes between features and can reduce overfitting by adding a bit of noise
# -> Dense layer: takes in the reduced size from the activation layer and transforms it into a larger(128) sized dimension, 
#                 uses a linear method to handle the still encoded, dense data, and a relu function to turn negative values to 0 removing linearity and allowing the model to learn more complex relationships
# -> Dropout layer pt.2 but not 1d this time
# -> final Dense layer with one node to give 0/1 predictions and a sigmoid to bring it between 0/1 for probabilities

inp = layers.Input(shape=(1,), dtype=tf.string)         
x = vectorizer(inp)                                     
x = layers.Embedding(input_dim=max_tokens, output_dim=embed_dim, mask_zero=True)(x)
x = layers.SpatialDropout1D(dropout_rate)(x)     
x = layers.Bidirectional(layers.LSTM(rnn_units, return_sequences=True))(x)
x = AttentionPool(units=64)(x)
x = layers.BatchNormalization()(x)
x = layers.Dense(dense_units, activation="relu")(x)
x = layers.Dropout(dropout_rate)(x)
out = layers.Dense(1, activation="sigmoid")(x)

model = models.Model(inputs=inp, outputs=out)

# --- model training ---
# Adaptive moment estimation optimizer
# momentum allows more efficent gradent descent by allwoing the updates to gain 'momentum' down the loss gradient 
# this accounts for gradients oscilating as they descend by canceling out oscilating gradients meaning smoother convergance and faster updates as the updates gain momentum allowing for bigger steps
# Binary cross entropy: loss function for binary predictions that penalizes high predicted probabilities for true 0 and vice versa
# accuracy just gives proportion of predictions matching true label
# Area under curve: made for imbalanced data and measures area under true positve vs false positve ROC curve
# allows for better accuracy measure on imbalanced data, .5 for random guesses and 1 for perfect predictions

model.compile(
    optimizer=optimizers.Adam(learning_rate),
    loss=losses.BinaryCrossentropy(),
    metrics=["accuracy", metrics.AUC(name="auc")]
)
model.summary()

In [None]:
callbacks_list = [
    # stops training early if max auc on val set hasnt improved for 6 epochs and returns to model with the best auc
    callbacks.EarlyStopping(monitor="val_auc", mode="max", patience=6, restore_best_weights=True),
    # reduces learning rate by half if max val auc hasnt improved for 3 epochs
    callbacks.ReduceLROnPlateau(monitor="val_auc", mode="max", patience=3, factor=0.5),
    # every epoch if there is a new max val auc, save the model to best.keras
    callbacks.ModelCheckpoint("best.keras", monitor="val_auc", mode="max", save_best_only=True)
]


train_ds = (tf.data.Dataset.from_tensor_slices((train_texts, train_labels))
            .shuffle(10000) # shuffles data before passing it to the model to make sure the model isnt seeing it in the same order every time
            .batch(64) # passes data to model in batches for gpu multi processing
            .prefetch(tf.data.AUTOTUNE)) # speeds up model processing by multi threading data fetching and model training

val_ds = (tf.data.Dataset.from_tensor_slices((test_texts, test_labels))
          .batch(64)
          .prefetch(tf.data.AUTOTUNE))

# Train on GPU )
with tf.device('/GPU:0'):
    history = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=3,
        callbacks=callbacks_list
    )

Epoch 1/3


In [20]:
test_ds = (tf.data.Dataset.from_tensor_slices((test_texts, test_labels))
           .batch(64)
           .prefetch(tf.data.AUTOTUNE))

# load model in
# model = tf.keras.models.load_model("best.keras", custom_objects={"AttentionPool": AttentionPool})


test_loss, test_accuracy, test_auc = model.evaluate(test_ds)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Test AUC: {test_auc:.4f}")

[1m718/718[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 33ms/step - accuracy: 0.9976 - auc: 0.9997 - loss: 0.0075
Test Loss: 0.0075
Test Accuracy: 0.9976
Test AUC: 0.9997


In [21]:
# Get predictions for the test set
pred_probs = model.predict(test_ds)
pred_labels = (pred_probs >= 0.5).astype(int).flatten()  # convert to 0/1 labels

# Get true labels (flatten if necessary)
true_labels = np.array(test_labels)

cm = confusion_matrix(true_labels, pred_labels)
print("Confusion Matrix:\n", cm)

[1m718/718[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 31ms/step
Confusion Matrix:
 [[28458    12]
 [  100 17342]]


In [22]:
# test string
text_input = """a
"""
text_input = clean_text(text_input)
text_input = text_input.lower()

# Convert to a batch of 1 element
input_ds = tf.data.Dataset.from_tensor_slices([text_input]).batch(1)

# Get predicted probability (between 0 and 1)
pred_prob = model.predict(input_ds)[0][0]

# Convert to class label (0 or 1)
pred_label = int(pred_prob >= 0.5)

print(f"Predicted probability: {pred_prob:.4f}")
print(f"Predicted label: {pred_label}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 254ms/step
Predicted probability: 1.0000
Predicted label: 1


In [23]:
# I want to check predictions on data that isnt in this dataset to see if it can handle different data
# found https://www.kaggle.com/datasets/prajwaldongre/llm-detect-ai-generated-vs-student-generated-text/data
new_test_df = pd.read_csv('LLM.csv')
new_test_df['Label'] = new_test_df['Label'].map({'ai':1, 'student':0})

new_test_df['Text'] = new_test_df['Text'].apply(clean_text)
new_test_df = new_test_df.drop_duplicates().reset_index(drop=True) # drop duplicates

new_text = new_test_df['Text']
new_label = new_test_df['Label']



new_test_ds = (tf.data.Dataset.from_tensor_slices((new_text, new_label))
           .batch(64)
           .prefetch(tf.data.AUTOTUNE))

new_test_loss, new_test_accuracy, new_test_auc = model.evaluate(new_test_ds)
print(f"Test Loss: {new_test_loss:.4f}")
print(f"Test Accuracy: {new_test_accuracy:.4f}")
print(f"Test AUC: {new_test_auc:.4f}")

# Get predictions for the test set
new_pred_probs = model.predict(new_test_ds)
new_pred_labels = (new_pred_probs >= 0.5).astype(int).flatten()  # convert to 0/1 labels

# Get true labels (flatten if necessary)
new_true_labels = np.array(new_label)

# remove nan for bad predictions
mask = ~np.isnan(new_label)
new_true_labels = np.array(new_label)[mask]
new_pred_labels = new_pred_labels[mask]

# confusion matrix
new_cm = confusion_matrix(new_true_labels, new_pred_labels)
print("Confusion Matrix:\n", new_cm)

[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.5654 - auc: 0.5000 - loss: nan
Test Loss: nan
Test Accuracy: 0.5654
Test AUC: 0.5000
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
Confusion Matrix:
 [[  0 288]
 [  0 376]]
