In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split

# Load the url dataset
df = pd.read_csv('urls_dataset.csv')

# Preprocess the data: convert titles to strings and distractions to integers
texts = df['title'].astype(str).tolist()
labels = df['distraction'].astype(int).tolist()

# Split into train and test sets
x_train, x_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=42)

# Convert labels to NumPy arrays
y_train = np.array(y_train)
y_test = np.array(y_test)

# Set up text vectorisation
max_features = 10000
sequence_length = 100

vectorizer = layers.TextVectorization(
    max_tokens=max_features,
    output_mode='int',
    output_sequence_length=sequence_length
)
vectorizer.adapt(x_train)

# Vectorize the training and testing data
x_train_vec = vectorizer(np.array([[s] for s in x_train])).numpy()
x_test_vec = vectorizer(np.array([[s] for s in x_test])).numpy()

# Build the model
model = keras.Sequential([
    layers.Embedding(input_dim=max_features, output_dim=64),
    layers.GlobalAveragePooling1D(),
    layers.Dense(64, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

# Train the model using validation_split 
model.fit(x_train_vec, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Evaluate the model
loss, accuracy = model.evaluate(x_test_vec, y_test)
print(f'Test accuracy: {accuracy:.3f}')

# Save the model as a TensorFlow SavedModel
tf.saved_model.save(model, 'distraction_detector_model_tf')


Epoch 1/10
[1m551/551[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - accuracy: 0.7477 - loss: 0.5618 - val_accuracy: 0.7541 - val_loss: 0.5397
Epoch 2/10
[1m551/551[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.7705 - loss: 0.4977 - val_accuracy: 0.8785 - val_loss: 0.3379
Epoch 3/10
[1m551/551[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.8706 - loss: 0.3230 - val_accuracy: 0.8890 - val_loss: 0.3126
Epoch 4/10
[1m551/551[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.8980 - loss: 0.2663 - val_accuracy: 0.8874 - val_loss: 0.3124
Epoch 5/10
[1m551/551[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.9001 - loss: 0.2513 - val_accuracy: 0.8758 - val_loss: 0.3079
Epoch 6/10
[1m551/551[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.9040 - loss: 0.2442 - val_accuracy: 0.8881 - val_loss: 0.2680
Epoch 7/10
[1m551/551[0m 

INFO:tensorflow:Assets written to: distraction_detector_model_tf/assets
