In [2]:
import numpy as np
import tensorflow as tf
import keras
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import os

print(tf.__version__)
print(os.getcwd())

2.16.1
/Users/torbenkoehler/Developer/code_uni/clickbait-nn


In [3]:
def plot_graphs(history, metric):
  plt.plot(history.history[metric])
  plt.plot(history.history['val_'+metric], '')
  plt.xlabel("Epochs")
  plt.ylabel(metric)
  plt.legend([metric, 'val_'+metric])

raw_data = pd.read_csv(
    "./clickbait_data.csv",
    names=["headline", "clickbait"]
)
# drop first line
raw_data = raw_data.iloc[1:]

raw_data_features = raw_data.copy()
raw_data_labels = raw_data_features.pop("clickbait").astype(int)
raw_data_features = np.array(raw_data_features)

TEST_SPLIT = 0.2
SEED = 29

train_features, test_features, train_labels, test_labels = train_test_split(
    raw_data_features,
    raw_data_labels,
    test_size=TEST_SPLIT,
    random_state=SEED
)

In [4]:
# text encoding
VOCAB_SIZE = 10000
encoder = keras.layers.TextVectorization(
    max_tokens=VOCAB_SIZE,
    output_sequence_length=64,
)
encoder.adapt(train_features)
example = train_features[0]
encoded_example = encoder(example)[:3]
encoded_example

2024-04-23 11:34:00.183271: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1
2024-04-23 11:34:00.183296: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2024-04-23 11:34:00.183302: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2024-04-23 11:34:00.183320: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-04-23 11:34:00.183332: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


<tf.Tensor: shape=(1, 64), dtype=int64, numpy=
array([[ 116,  111, 3427,   40,   46,    3,  831,  833,   29,    6,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0]])>

In [5]:
model = keras.models.Sequential([
    keras.layers.Input(shape=(1,), dtype='string'),
    encoder,
    keras.layers.Embedding(
        input_dim=len(encoder.get_vocabulary()),
        output_dim=64,
        mask_zero=True
    ),
    keras.layers.Bidirectional(keras.layers.LSTM(64)),
    keras.layers.Dense(64, activation="relu"),
    keras.layers.Dense(1, activation="sigmoid")
])

In [6]:
train_ds = tf.data.Dataset.from_tensor_slices((train_features, np.expand_dims(train_labels, -1)))
test_ds = tf.data.Dataset.from_tensor_slices((test_features, np.expand_dims(test_labels, -1)))

print(len(train_features))
print(len(test_features))

25600
6400


In [7]:
model.compile(
    loss=keras.losses.BinaryCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(1e-4),
    metrics=["accuracy"]
)

In [8]:
model.summary()

In [9]:
history = model.fit(
    train_ds.repeat(),
    epochs=10,
    batch_size=128,
    validation_data=test_ds.repeat(),
    validation_steps=1000
)

Epoch 1/10


  output, from_logits = _get_logits(
2024-04-23 11:34:01.027181: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.
2024-04-23 11:34:01.030291: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[1m  350/25600[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m11:49[0m 28ms/step - accuracy: 0.5276 - loss: 0.6931

KeyboardInterrupt: 

In [None]:
plot_graphs(history, 'accuracy')

In [None]:
plot_graphs(history, 'loss')

In [None]:
test_loss, test_acc = model.evaluate(test_ds)

print('Test Loss:', test_loss)
print('Test Accuracy:', test_acc)

In [None]:
model.save("./model.keras")

In [10]:
model.load_weights("./model.keras")


  trackable.load_own_variables(weights_store.get(inner_path))


In [None]:
from google.colab import drive
drive.mount('/content/drive')