In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import tensorflow as tf
from tensorflow import keras

In [None]:
real = pd.read_csv('/kaggle/input/fake-and-real-news-dataset/True.csv')
fake = pd.read_csv('/kaggle/input/fake-and-real-news-dataset/Fake.csv')

In [None]:
real.head(5)

In [None]:
print(real.iloc[5])

In [None]:
fake = fake[['text']]

In [None]:
print(fake.iloc[9])

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [None]:
from nltk.corpus import stopwords
stop_words = set(stopwords.words("english"))
stop_words

In [None]:
real.text[5]

In [None]:
import re
import tqdm
def process_news(text):
    # Lower Case
    text = text.lower()
    # Remove stop words
    text = ' '.join([word for word in text.split(' ') if word not in stop_words])
    # Remove mentions
    text = re.sub("@\S+", " ", text)
    # Romove URLs
    text = re.sub("https*\S+", " ", text)
    # Spaces
    text = re.sub('\s{2,}', " ", text)
    
    return text
    
processed_news_real = []
for row in tqdm.tqdm(range(len(real))):
    processed_news_real.append(process_news(str(real.text[row])))
    
processed_news_fake = []
for row in tqdm.tqdm(range(len(fake))):
    processed_news_fake.append(process_news(str(fake.text[row])))
    

In [None]:
processed_news_real[0]

In [None]:
processed_news_fake[0]

In [None]:
tokenizer_real = Tokenizer(oov_token='<OOV>')
tokenizer_real.fit_on_texts(processed_news_real)

tokenizer_fake = Tokenizer(oov_token='<OOV>')
tokenizer_fake.fit_on_texts(processed_news_fake)

In [None]:
sequences_real = tokenizer_real.texts_to_sequences(processed_news_real)
padded_real = pad_sequences(sequences_real, padding='post', maxlen=600)

In [None]:
sequences_fake = tokenizer_fake.texts_to_sequences(processed_news_fake)
padded_fake = pad_sequences(sequences_fake, padding='post', maxlen=600)

In [None]:
padded_real[0]

In [None]:
X = []
y = []
for news in padded_real:
    X.append(news)
    y.append(0)

for news in padded_fake:
    X.append(news)
    y.append(1)

In [None]:
X[5]

In [None]:
y[5]

In [None]:
X = np.array(X)
y = np.array(y)

In [None]:
X.shape

In [None]:
y.shape

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = np.random.randint(15, 400), test_size = 0.33)

In [None]:
X_train.shape

In [None]:
y_train.shape

In [None]:
from tensorflow.keras import layers
from keras.regularizers import l2
max_features = 600000
embedding_dim = 50

model = tf.keras.Sequential([
  layers.Embedding(max_features + 1, embedding_dim),
  layers.Dropout(0.2),
  layers.GlobalAveragePooling1D(),
  layers.Dropout(0.2),
  layers.Dense(16, activation='relu', kernel_regularizer=l2(0.001)),
  layers.Dense(1)])

model.summary()

In [None]:
esc = tf.keras.callbacks.EarlyStopping(monitor='accuracy', 
                                patience=2, 
                                verbose=0, 
                                mode='auto',
                                restore_best_weights=True)

model.compile(optimizer='adam',
              loss=tf.losses.BinaryCrossentropy(from_logits=True),
              metrics=[tf.metrics.BinaryAccuracy(threshold=0.0, name='accuracy')],              
             )

In [None]:
tf.debugging.set_log_device_placement(True)
with tf.device("GPU:0"):
    history = model.fit(tf.convert_to_tensor(X_train),
                        tf.convert_to_tensor(y_train),
                        epochs=15,
                        batch_size=512,
                        validation_data=(X_test, y_test),
                        callbacks=[esc]
                        )

In [None]:
import seaborn as sns
sns.lineplot(data = history.history)