Imports

In [1]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.layers import Embedding, GRU, Dense, Dropout, BatchNormalization
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

import numpy as np
import random
import pandas as pd
from sklearn.model_selection import train_test_split

Read CSV

In [2]:
# Set seed
np.random.seed(42)
random.seed(42)
tf.random.set_seed(42)

# Load the dataset
dataset = pd.read_csv("../../datasets/final_dataset.csv")

# First split: train and test
train_texts, test_texts, train_labels, test_labels = train_test_split(
    dataset["Text"], dataset["Label"], test_size=0.2, random_state=42, stratify=dataset["Label"]
)

# Second split: train and validation
train_texts, val_texts, train_labels, val_labels = train_test_split(
    train_texts, train_labels, test_size=0.2, random_state=42, stratify=train_labels
)

Process Data

In [3]:
# Tokenize the data
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(train_texts)

X_train_seq = tokenizer.texts_to_sequences(train_texts)
X_val_seq = tokenizer.texts_to_sequences(val_texts)
X_test_seq = tokenizer.texts_to_sequences(test_texts)

X_train = pad_sequences(X_train_seq, maxlen=100)
X_val = pad_sequences(X_val_seq, maxlen=100)
X_test = pad_sequences(X_test_seq, maxlen=100)

y_train = np.array(train_labels)
y_val = np.array(val_labels)
y_test = np.array(test_labels)


Neural Network

In [4]:
model = Sequential([
    Embedding(input_dim=10000, output_dim=64),  

    GRU(16, activation="tanh", return_sequences=True, 
         kernel_regularizer=l2(0.003), recurrent_dropout=0.5),  # Increased L2 and Dropout
    Dropout(0.6),

    GRU(8, kernel_regularizer=l2(0.003), recurrent_dropout=0.5, return_sequences=False),
    Dropout(0.6),

    BatchNormalization(),

    Dense(16, activation="relu", kernel_regularizer=l2(0.003)),  # Increased L2
    Dropout(0.7),  # Increased Dropout

    Dense(1, activation="sigmoid")
])

# Compile with a lower learning rate
optimizer = Adam(learning_rate=0.0002)  # Further reduced learning rate
model.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=["accuracy"])

# Early Stopping (More aggressive stopping)
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=2,  # Decreased patience for early stopping
    restore_best_weights=True
)

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.3,  # More aggressive reduction
    patience=1,
    min_lr=0.00005
)

# Train with adjusted dropout, regularization, and patience for early stopping
history = model.fit(
    X_train, y_train,
    epochs=15,
    batch_size=32,
    validation_data=(X_val, y_val),
    callbacks=[early_stopping, reduce_lr]
)


Epoch 1/15
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 58ms/step - accuracy: 0.5172 - loss: 1.0151 - val_accuracy: 0.5424 - val_loss: 0.9222 - learning_rate: 2.0000e-04
Epoch 2/15
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 52ms/step - accuracy: 0.5263 - loss: 0.9919 - val_accuracy: 0.5686 - val_loss: 0.8983 - learning_rate: 2.0000e-04
Epoch 3/15
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 50ms/step - accuracy: 0.5378 - loss: 0.9139 - val_accuracy: 0.5855 - val_loss: 0.8746 - learning_rate: 2.0000e-04
Epoch 4/15
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 50ms/step - accuracy: 0.5764 - loss: 0.8709 - val_accuracy: 0.7334 - val_loss: 0.8512 - learning_rate: 2.0000e-04
Epoch 5/15
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 52ms/step - accuracy: 0.5861 - loss: 0.8202 - val_accuracy: 0.7982 - val_loss: 0.8204 - learning_rate: 2.0000e-04
Epoch 6/15
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

Results

In [5]:
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test accuracy: {test_acc:.4f}\n")

val_loss, val_acc = model.evaluate(X_val, y_val)
print(f"Validation accuracy: {val_acc:.4f}")

[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.8726 - loss: 0.4983
Test accuracy: 0.8841

[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.8865 - loss: 0.4845
Validation accuracy: 0.8891


Benchmarking

In [6]:
# Load new data
new_data = pd.read_csv("../../datasets/validation_dataset.csv", delimiter=";")

# Tokenize and pad the new data (using the same tokenizer you trained on)
X_new_seq = tokenizer.texts_to_sequences(new_data["Text"])
X_new = pad_sequences(X_new_seq, maxlen=100)  # Ensure maxlen is consistent with your training data

# Make predictions
predictions = model.predict(X_new).flatten()

# Convert predictions to labels based on threshold
labels = ["AI" if pred > 0.5 else "Human" for pred in predictions]

# Create output DataFrame with predictions
output_df = pd.DataFrame({
    "Label": labels,
    "Prediction": predictions
})

# Load the ground truth labels (from the same dataset)
# Since the labels are in the 'Label' column, we'll compare them with predictions.
ground_truth = new_data["Label"]

# Calculate accuracy
accuracy = (output_df["Label"] == ground_truth).mean()

# Print the accuracy
print(f"Accuracy: {accuracy:.4f}")

# Merge predictions with ground truth for comparison
comparison_df = output_df.copy()
comparison_df["Label_actual"] = ground_truth

# Show misclassified samples
misclassified = comparison_df[comparison_df["Label"] != comparison_df["Label_actual"]]
print("\nMisclassified Samples:")
print(misclassified)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 301ms/step
Accuracy: 0.7125

Misclassified Samples:
    Label  Prediction Label_actual
5   Human    0.101738           AI
6   Human    0.361020           AI
9   Human    0.257932           AI
12  Human    0.332061           AI
13  Human    0.258429           AI
15     AI    0.612618        Human
22  Human    0.269079           AI
24  Human    0.216066           AI
28  Human    0.441210           AI
33  Human    0.476455           AI
36     AI    0.591288        Human
38  Human    0.402031           AI
39  Human    0.321272           AI
42  Human    0.300596           AI
43  Human    0.441112           AI
46     AI    0.655020        Human
48     AI    0.720089        Human
51  Human    0.278061           AI
57  Human    0.463407           AI
59  Human    0.342313           AI
69  Human    0.226644           AI
71  Human    0.429430           AI
79  Human    0.256309           AI
