In [79]:
import pandas as pd
import tensorflow as tf
import numpy as np
import re

from tensorflow.keras import layers, models, callbacks, optimizers, losses, metrics
from tensorflow.keras.preprocessing.sequence import pad_sequences

from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split

In [93]:
df = pd.read_csv('AI_Human.csv')

# only keep normal characters
cleaning_pattern = r"[^a-zA-Z0-9\s.,;:!?'\"()\{\}\-—]"

def clean_text(text):
    text = text.replace('\n', ' ')  # replace newline characters with space
    text = re.sub(cleaning_pattern, '', text)  # remove unwanted characters
    text = re.sub(r'\s+', ' ', text)  # collapse multiple spaces into one
    text = text.strip()  # remove leading/trailing spaces
    return text
    
df['text'] = df['text'].apply(clean_text)
df = df.drop_duplicates().reset_index(drop=True) # drop duplicates

In [94]:
# Compute the number of words in each text
df['length'] = df['text'].str.split().apply(len)

# Keep only sentences with more than 3 words
df = df[df['length'] > 3].reset_index(drop=True)

# Print basic length statistics
print("Min length:", df['length'].min())
print("Max length:", df['length'].max())
print("Mean length:", df['length'].mean())

# Compute 95th percentile to determine max sequence length
percentile_95 = int(np.percentile(df['length'], 95))
print("95th percentile length:", percentile_95)

# Set maximum sequence length for vectorization
max_len = percentile_95

Min length: 4
Max length: 1668
Mean length: 393.05384024395755
95th percentile length: 722


In [95]:
texts = df['text']
labels = df['generated']

# split out train/test to test final model on
train_texts, test_texts, train_labels, test_labels = train_test_split(
    texts, labels, test_size=0.1, stratify=labels, random_state=42)

# split val set out from train to train model on
train_texts, val_texts, train_labels, val_labels = train_test_split(
    train_texts, train_labels, test_size=0.2, stratify=train_labels, random_state=42)

In [96]:
max_tokens = 30000   # vocabulary size, counts tokens/words by frequency and any token not in the top max_tokens all become marked as the same

vectorizer = layers.TextVectorization(
    max_tokens=max_tokens,
    output_mode="int",
    output_sequence_length=max_len
)

vectorizer.adapt(train_texts)

In [97]:
# --- configurable hyperparams ---
max_tokens = max_tokens      # vocab size
maxlen = max_len            # max sequence length
embed_dim = 128
rnn_units = 128
dense_units = 128
dropout_rate = 0.3
learning_rate = 1e-4

# --- small attention layer ---
class AttentionPool(layers.Layer):
    def __init__(self, units=64, **kwargs):
        super().__init__(**kwargs)
        # tell Keras this layer understands masks
        self.supports_masking = True

        self.dense = layers.Dense(units, activation="tanh")
        self.score = layers.Dense(1, use_bias=False)

    def compute_mask(self, inputs, mask=None):
        # This layer reduces the time dimension to a vector, so it produces no mask.
        return None

    def call(self, inputs, mask=None):
        # inputs: [B, T, F], mask: [B, T] or None
        x = self.dense(inputs)            # [B, T, units]
        scores = self.score(x)            # [B, T, 1]

        if mask is not None:
            # Broadcast mask to scores' shape and make masked positions very negative
            mask = tf.cast(mask, scores.dtype)           # [B, T]
            mask = tf.expand_dims(mask, axis=-1)         # [B, T, 1]
            scores = scores + (mask - 1.0) * 1e9         # masked positions -> ~ -inf

        weights = tf.nn.softmax(scores, axis=1)          # [B, T, 1]
        weighted = tf.reduce_sum(weights * inputs, axis=1)  # [B, F]
        return weighted

    def get_config(self):
        cfg = super().get_config()
        cfg.update({"units": self.dense.units if hasattr(self.dense, "units") else None})
        return cfg

# --- model ---
inp = layers.Input(shape=(1,), dtype=tf.string)         # raw text
x = vectorizer(inp)                                     # -> int sequence [B, T]
x = layers.Embedding(input_dim=max_tokens, output_dim=embed_dim, mask_zero=True)(x)
x = layers.SpatialDropout1D(dropout_rate)(x)
x = layers.Bidirectional(layers.LSTM(rnn_units, return_sequences=True))(x)
x = AttentionPool(units=64)(x)
x = layers.BatchNormalization()(x)
x = layers.Dense(dense_units, activation="relu")(x)
x = layers.Dropout(dropout_rate)(x)
out = layers.Dense(1, activation="sigmoid")(x)

model = models.Model(inputs=inp, outputs=out)

model = models.Model(inputs=inp, outputs=out)
model.compile(
    optimizer=optimizers.Adam(learning_rate),
    loss=losses.BinaryCrossentropy(),
    metrics=["accuracy", metrics.AUC(name="auc")]
)
model.summary()

In [109]:
callbacks_list = [
    callbacks.EarlyStopping(monitor="val_auc", mode="max", patience=6, restore_best_weights=True),
    callbacks.ReduceLROnPlateau(monitor="val_auc", mode="max", patience=3, factor=0.5),
    callbacks.ModelCheckpoint("best.h5", monitor="val_auc", mode="max", save_best_only=True)
]


train_ds = (tf.data.Dataset.from_tensor_slices((train_texts, train_labels))
            .shuffle(10000)
            .batch(64)
            .prefetch(tf.data.AUTOTUNE))

val_ds = (tf.data.Dataset.from_tensor_slices((test_texts, test_labels))
          .batch(64)
          .prefetch(tf.data.AUTOTUNE))

# Train on GPU (ops created here will use GPU)
with tf.device('/GPU:0'):
    history = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=5,
        callbacks=callbacks_list
    )


Epoch 1/5
[1m5228/5228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step - accuracy: 0.9716 - auc: 0.9917 - loss: 0.0791



[1m5228/5228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m431s[0m 82ms/step - accuracy: 0.9880 - auc: 0.9984 - loss: 0.0363 - val_accuracy: 0.9947 - val_auc: 0.9992 - val_loss: 0.0172 - learning_rate: 1.0000e-04
Epoch 2/5
[1m5228/5228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m427s[0m 82ms/step - accuracy: 0.9958 - auc: 0.9996 - loss: 0.0129 - val_accuracy: 0.9961 - val_auc: 0.9988 - val_loss: 0.0159 - learning_rate: 1.0000e-04
Epoch 3/5
[1m5228/5228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step - accuracy: 0.9970 - auc: 0.9998 - loss: 0.0095



[1m5228/5228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m428s[0m 82ms/step - accuracy: 0.9973 - auc: 0.9998 - loss: 0.0082 - val_accuracy: 0.9980 - val_auc: 0.9998 - val_loss: 0.0053 - learning_rate: 1.0000e-04
Epoch 4/5
[1m5228/5228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m428s[0m 82ms/step - accuracy: 0.9980 - auc: 0.9999 - loss: 0.0059 - val_accuracy: 0.9985 - val_auc: 0.9997 - val_loss: 0.0048 - learning_rate: 1.0000e-04
Epoch 5/5
[1m5228/5228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step - accuracy: 0.9983 - auc: 0.9999 - loss: 0.0048



[1m5228/5228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m427s[0m 82ms/step - accuracy: 0.9985 - auc: 0.9999 - loss: 0.0042 - val_accuracy: 0.9985 - val_auc: 1.0000 - val_loss: 0.0039 - learning_rate: 1.0000e-04


In [110]:
test_ds = (tf.data.Dataset.from_tensor_slices((test_texts, test_labels))
           .batch(64)
           .prefetch(tf.data.AUTOTUNE))

test_loss, test_accuracy, test_auc = model.evaluate(test_ds)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Test AUC: {test_auc:.4f}")

[1m727/727[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 33ms/step - accuracy: 0.9985 - auc: 1.0000 - loss: 0.0039
Test Loss: 0.0039
Test Accuracy: 0.9985
Test AUC: 1.0000


In [111]:
# Get predictions for the test set
pred_probs = model.predict(test_ds)
pred_labels = (pred_probs >= 0.5).astype(int).flatten()  # convert to 0/1 labels

# Get true labels (flatten if necessary)
true_labels = np.array(test_labels)

# confusion matrixcm = confusion_matrix(true_labels, pred_labels)
print("Confusion Matrix:\n", cm)

[1m727/727[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 31ms/step
Confusion Matrix:
 [[198  90]
 [156 220]]


In [112]:
# test string
text_input = """a
"""
text_input = clean_text(text_input)
text_input = text_input.lower()

# Convert to a batch of 1 element
input_ds = tf.data.Dataset.from_tensor_slices([text_input]).batch(1)

# Get predicted probability (between 0 and 1)
pred_prob = model.predict(input_ds)[0][0]

# Convert to class label (0 or 1)
pred_label = int(pred_prob >= 0.5)

print(f"Predicted probability: {pred_prob:.4f}")
print(f"Predicted label: {pred_label}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 296ms/step
Predicted probability: 1.0000
Predicted label: 1


In [113]:
# I want to check predictions on data that isnt in this dataset to see if it can handle different data
# found https://www.kaggle.com/datasets/prajwaldongre/llm-detect-ai-generated-vs-student-generated-text/data
new_test_df = pd.read_csv('LLM.csv')
new_test_df['Label'] = new_test_df['Label'].map({'ai':1, 'student':0})

new_test_df['Text'] = new_test_df['Text'].apply(clean_text)
new_test_df = new_test_df.drop_duplicates().reset_index(drop=True) # drop duplicates

new_text = new_test_df['Text']
new_label = new_test_df['Label']



new_test_ds = (tf.data.Dataset.from_tensor_slices((new_text, new_label))
           .batch(64)
           .prefetch(tf.data.AUTOTUNE))

new_test_loss, new_test_accuracy, new_test_auc = model.evaluate(new_test_ds)
print(f"Test Loss: {new_test_loss:.4f}")
print(f"Test Accuracy: {new_test_accuracy:.4f}")
print(f"Test AUC: {new_test_auc:.4f}")

# Get predictions for the test set
new_pred_probs = model.predict(new_test_ds)
new_pred_labels = (new_pred_probs >= 0.5).astype(int).flatten()  # convert to 0/1 labels

# Get true labels (flatten if necessary)
new_true_labels = np.array(new_label)

# remove nan for bad predictions
mask = ~np.isnan(new_label)
new_true_labels = np.array(new_label)[mask]
new_pred_labels = new_pred_labels[mask]

# confusion matrix
cm = confusion_matrix(new_true_labels, new_pred_labels)
print("Confusion Matrix:\n", cm)

[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.5654 - auc: 0.5000 - loss: nan
Test Loss: nan
Test Accuracy: 0.5654
Test AUC: 0.5000
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
Confusion Matrix:
 [[  0 288]
 [  0 376]]
