In [1]:
import os

os.environ["KERAS_BACKEND"] = "tensorflow"

import keras
import tensorflow as tf
import numpy as np
from keras import layers
import pandas as pd
import time
from sklearn.model_selection import train_test_split
import string
import re

In [2]:
# df_fake = pd.read_csv('../data/DataSet_Misinfo_FAKE.csv')
# df_fake = df_fake.dropna()
# df_real = pd.read_csv('../data/DataSet_Misinfo_TRUE.csv')
# df_real = df_real.dropna()
# df_fake['truth'] = 0
# df_real['truth'] = 1
# df = pd.concat([df_real, df_fake])
# df = df.drop('Unnamed: 0', axis=1)
# df = df.sample(frac=1, random_state=42).reset_index(drop=True)
df = pd.read_csv("../data/WELFake_Dataset.csv")
df['text'] = df['title'] + " " + df['text']
df = df.drop(columns=['title', 'Unnamed: 0'])
df = df.dropna()
df.head()

Unnamed: 0,text,label
0,LAW ENFORCEMENT ON HIGH ALERT Following Threat...,1
2,UNBELIEVABLE! OBAMA’S ATTORNEY GENERAL SAYS MO...,1
3,"Bobby Jindal, raised Hindu, uses story of Chri...",0
4,SATAN 2: Russia unvelis an image of its terrif...,1
5,About Time! Christian Group Sues Amazon and SP...,1


In [3]:
X = df['text'].tolist()
y = df['label'].tolist()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [4]:
# Model Constants
max_features = 20000
embedding_dim = 128
sequence_length = 500

batch_size = 32
train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(batch_size)
val_ds = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(batch_size)

vectorize_layer = keras.layers.TextVectorization(
    max_tokens=max_features,
    output_mode="int",
    output_sequence_length=sequence_length,
)

text_ds = train_ds.map(lambda x, y: x)

vectorize_layer.adapt(text_ds)


In [5]:
def vectorize_text(text, label):
    text = tf.expand_dims(text, -1)
    return vectorize_layer(text), label

train_ds = train_ds.map(vectorize_text)
val_ds = val_ds.map(vectorize_text)

train_ds = train_ds.cache().prefetch(buffer_size=10)
val_ds = val_ds.cache().prefetch(buffer_size=10)

In [6]:
# The Model
inputs = keras.Input(shape=(None,), dtype="int64")

x = layers.Embedding(max_features, embedding_dim)(inputs)
x = layers.Dropout(0.5)(x)

x = layers.Conv1D(128, 7, padding="valid", activation="relu", strides=3)(x)
x = layers.Conv1D(128, 7, padding="valid", activation="relu", strides=3)(x)
x = layers.GlobalMaxPooling1D()(x)

x = layers.Dense(128, activation="relu")(x)
x = layers.Dropout(0.5)(x)

predictions = layers.Dense(1, activation="sigmoid", name="predictions")(x)

model = keras.Model(inputs, predictions)

model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])


In [7]:
epochs = 3

model.fit(train_ds, validation_data=val_ds, epochs=epochs)

Epoch 1/3
[1m1789/1789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 44ms/step - accuracy: 0.8663 - loss: 0.2479 - val_accuracy: 0.9781 - val_loss: 0.0592
Epoch 2/3
[1m1789/1789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 44ms/step - accuracy: 0.9836 - loss: 0.0461 - val_accuracy: 0.9829 - val_loss: 0.0552
Epoch 3/3
[1m1789/1789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 44ms/step - accuracy: 0.9935 - loss: 0.0211 - val_accuracy: 0.9750 - val_loss: 0.1071


<keras.src.callbacks.history.History at 0x15880060040>

In [24]:
# Model can now take in strings
inputs = keras.Input(shape=(1,), dtype="string")
# Turn strings into vocab indices
indices = vectorize_layer(inputs)
# Turn vocab indices into predictions
outputs = model(indices)

# Our end to end model
end_to_end_model = keras.Model(inputs, outputs)
end_to_end_model.compile(
    loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"]
)

In [9]:
df_test = pd.read_csv("../data/FakeNewsNet.csv")
df_test = df_test.drop(columns=['news_url', 'tweet_num', 'source_domain'])
df_test = df_test.dropna()
df_test.head()

Unnamed: 0,title,real
0,Kandi Burruss Explodes Over Rape Accusation on...,1
1,People's Choice Awards 2018: The best red carp...,1
2,Sophia Bush Sends Sweet Birthday Message to 'O...,1
3,Colombian singer Maluma sparks rumours of inap...,1
4,Gossip Girl 10 Years Later: How Upper East Sid...,1


In [29]:
test_text = df_test['title'].tolist()
test_label = df_test['real'].tolist()
np.save('X_test.npy', test_text)
np.save('y_test.npy', test_label)

test_ds = tf.data.Dataset.from_tensor_slices((test_text, test_label)).batch(batch_size)

In [33]:
end_to_end_model.evaluate(test_ds)


AttributeError: 'NoneType' object has no attribute 'items'

In [12]:
single_string_array = tf.convert_to_tensor(["This is a string"])
result = end_to_end_model.predict(single_string_array)
result[0]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 143ms/step


array([0.9748548], dtype=float32)

In [26]:
tf.keras.models.save_model(end_to_end_model, filepath="./neural_network.h5", custom_objects={'TextVectorization': tf.keras.layers.TextVectorization})

