In [None]:
import pandas as pd
fake = pd.read_csv("../input/fake-and-real-news-dataset/Fake.csv")
real = pd.read_csv("../input/fake-and-real-news-dataset/True.csv")

In [None]:
fake.drop(['date', 'subject'], axis=1, inplace=True)
real.drop(['date', 'subject'], axis=1, inplace=True)

In [None]:
fake['class'] = 0 
real['class'] = 1

In [None]:
news_df = pd.concat([fake, real], ignore_index=True, sort=False)

In [None]:
news_df['text'] = news_df['title'] + news_df['text']
news_df.drop('title', axis=1, inplace=True)

In [None]:
features = news_df['text']
targets = news_df['class']

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(features, targets, test_size=0.20, random_state=18)

In [None]:
import re

In [None]:
def normalize(data):
    normalized = []
    for i in data:
        i = i.lower()
        # get rid of urls
        i = re.sub('https?://\S+|www\.\S+', '', i)
        # get rid of non words and extra spaces
        i = re.sub('\\W', ' ', i)
        i = re.sub('\n', '', i)
        i = re.sub(' +', ' ', i)
        i = re.sub('^ ', '', i)
        i = re.sub(' $', '', i)
        normalized.append(i)
    return normalized

X_train = normalize(X_train)
X_test = normalize(X_test)

In [None]:
from keras.preprocessing.text import Tokenizer

In [None]:
max_vocab = 10000
tokenizer = Tokenizer(num_words=max_vocab)
tokenizer.fit_on_texts(X_train)

In [None]:
# tokenize the text into vectors 
X_train = tokenizer.texts_to_sequences(X_train)
X_test = tokenizer.texts_to_sequences(X_test)

In [None]:
import tensorflow as tf

In [None]:
X_train = tf.keras.preprocessing.sequence.pad_sequences(X_train, padding='post', maxlen=256)
X_test = tf.keras.preprocessing.sequence.pad_sequences(X_test, padding='post', maxlen=256)

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(max_vocab, 32),
    
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64,  return_sequences=True)),
    
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(16)),
    
    tf.keras.layers.Dense(64, activation='relu'),
    
    tf.keras.layers.Dropout(0.5),
    
    tf.keras.layers.Dense(1)
])

model.summary()

In [None]:
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              optimizer=tf.keras.optimizers.Adam(1e-4),
              metrics=['accuracy'])

history = model.fit(X_train, y_train, epochs=10,validation_split=0.1, batch_size=30, shuffle=True)

In [None]:
history_dict = history.history

acc = history_dict['accuracy']
val_acc = history_dict['val_accuracy']
loss = history_dict['loss']
val_loss = history_dict['val_loss']
epochs = history.epoch

In [None]:
model.evaluate(X_test, y_test)

In [None]:
pred = model.predict(X_test)

binary_predictions = []

for i in pred:
    if i >= 0.5:
        binary_predictions.append(1)
    else:
        binary_predictions.append(0) 

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score

In [None]:
print('Accuracy on testing set:', accuracy_score(binary_predictions, y_test))
print('Precision on testing set:', precision_score(binary_predictions, y_test))
print('Recall on testing set:', recall_score(binary_predictions, y_test))

http://projector.tensorflow.org/