Imports

In [1]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.optimizers import Adam

import numpy as np
import random
import pandas as pd
from sklearn.model_selection import train_test_split

Read CSV

In [2]:
# Set seed
np.random.seed(144)
random.seed(144)
tf.random.set_seed(144)

# Load the dataset
dataset = pd.read_csv("../datasets/final_dataset.csv")

# First split: train and test
train_texts, test_texts, train_labels, test_labels = train_test_split(
    dataset["Text"], dataset["Label"], test_size=0.2, random_state=144, stratify=dataset["Label"]
)

# Second split: train and validation
train_texts, val_texts, train_labels, val_labels = train_test_split(
    train_texts, train_labels, test_size=0.2, random_state=144, stratify=train_labels
)

Process Data

In [3]:
# Tokenize the data
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(train_texts)

X_train_seq = tokenizer.texts_to_sequences(train_texts)
X_val_seq = tokenizer.texts_to_sequences(val_texts)
X_test_seq = tokenizer.texts_to_sequences(test_texts)

X_train = pad_sequences(X_train_seq, maxlen=100)
X_val = pad_sequences(X_val_seq, maxlen=100)
X_test = pad_sequences(X_test_seq, maxlen=100)

y_train = np.array(train_labels)
y_val = np.array(val_labels)
y_test = np.array(test_labels)


Neural Network

In [None]:
# Define the RNN model
model = Sequential([
    Embedding(input_dim=10000, output_dim=50, input_length=100),
    SimpleRNN(64, activation="relu"),
    
    Dense(1, activation="sigmoid")
])

# Compile the model
optimizer = Adam(learning_rate=0.001)
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val))

Epoch 1/10




[1m39/82[0m [32m━━━━━━━━━[0m[37m━━━━━━━━━━━[0m [1m0s[0m 12ms/step - accuracy: 0.5227 - loss: 0.6906

Results

In [None]:
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test accuracy: {test_acc:.4f}\n")

val_loss, val_acc = model.evaluate(X_val, y_val)
print(f"Validation accuracy: {val_acc:.4f}")

[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8593 - loss: 0.3401
Test accuracy: 0.8705

[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8876 - loss: 0.2670
Validation accuracy: 0.8860


Benchmarking

In [None]:
# Load new data
new_data = pd.read_csv("../datasets/dataset1_inputs.csv", delimiter="\t") 

# Tokenize and pad the new data
X_new_seq = tokenizer.texts_to_sequences(new_data["Text"])
X_new = pad_sequences(X_new_seq, maxlen=100)

# Make predictions
predictions = model.predict(X_new).flatten()

# Convert predictions to labels
labels = ["AI" if pred > 0.5 else "Human" for pred in predictions]

# Create output DataFrame
output_df = pd.DataFrame({"ID": new_data["ID"], "Label": labels, "Prediction": predictions})

# Load the correct labels (ground truth)
ground_truth = pd.read_csv("../datasets/dataset1_outputs.csv", delimiter="\t")  # Ensure it's tab-separated

# Merge predictions with ground truth
comparison_df = output_df.merge(ground_truth, on="ID", suffixes=("_predicted", "_actual"))

# Calculate accuracy
accuracy = (comparison_df["Label_predicted"] == comparison_df["Label_actual"]).mean()

# Print results
print(f"Accuracy: {accuracy:.4f}")

# Show misclassified samples
misclassified = comparison_df[comparison_df["Label_predicted"] != comparison_df["Label_actual"]]
print("\nMisclassified Samples:")
print(misclassified)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
Accuracy: 0.7333

Misclassified Samples:
       ID Label_predicted  Prediction Label_actual
1    D1-2           Human    0.013412           AI
7    D1-8           Human    0.143792           AI
9   D1-10           Human    0.466122           AI
11  D1-12           Human    0.399194           AI
19  D1-20           Human    0.135811           AI
21  D1-22           Human    0.171110           AI
25  D1-26           Human    0.453890           AI
29  D1-30           Human    0.031945           AI


Test Sentences Manually

In [None]:
# Hardcoded sentence
hardcoded_sentence = "The question is meaningless because it assumes the existence of “creation scientists”. So-called creation scientists are not scientists. They are people pretending to be scientists to give themselves credibility with rubes who prefer a belief in magic to an acceptance of reality. Creation scientists are con men. Fraudsters. Liars. Some of them even have degrees in some scientific discipline, but it’s not likely to be biology. They have never published their findings in peer-reviewed scientific journals because they’re peer-reviewed, and that’s a filter they can’t pass through."

# Tokenize and pad the sentence
X_hardcoded_seq = tokenizer.texts_to_sequences([hardcoded_sentence])  # Use the same tokenizer
X_hardcoded = pad_sequences(X_hardcoded_seq, maxlen=100)

# Make prediction
prediction = model.predict(X_hardcoded).flatten()[0] 

# Convert prediction to label
label = "AI" if prediction > 0.5 else "Human"

# Print result
print(f"Sentence: {hardcoded_sentence}")
print(f"Predicted Label: {label}")
print(f"Prediction Score: {prediction:.4f}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 128ms/step
Sentence: The question is meaningless because it assumes the existence of “creation scientists”. So-called creation scientists are not scientists. They are people pretending to be scientists to give themselves credibility with rubes who prefer a belief in magic to an acceptance of reality. Creation scientists are con men. Fraudsters. Liars. Some of them even have degrees in some scientific discipline, but it’s not likely to be biology. They have never published their findings in peer-reviewed scientific journals because they’re peer-reviewed, and that’s a filter they can’t pass through.
Predicted Label: Human
Prediction Score: 0.0415
