## Importing required libraries

In [1]:
import nltk
import re
import pandas as pd
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import one_hot
from tensorflow.keras.layers import Dense, Embedding, LSTM
from tensorflow.keras.models import Sequential
import numpy as np
from sklearn.model_selection import train_test_split

## Reading input files

In [2]:
train_data = pd.read_csv("../input/fake-news/train.csv")

In [3]:
train_data.dropna(inplace=True)

## Initializing objects and constants

In [4]:
ps = PorterStemmer()
sentence_length = 5000
padding_length = 1000
dimension = 100

## Text preprocessing

In [5]:
def pre_process_data(data):
    text = data['title']

In [6]:
x = train_data['title']

In [7]:
x_list = list(map(lambda x: "". join(ps.stem(word) for word in re.sub("[^A-Za-z]", " ", x.lower()) if word not in stopwords.words("english")), x.values))

In [8]:
encoded_sentences = [one_hot(sentence, sentence_length) for sentence in x_list]

In [9]:
padded_sentences = pad_sequences(encoded_sentences, maxlen=padding_length, padding='pre')

In [10]:
model = Sequential()
model.add(Embedding(sentence_length, dimension, input_length=padding_length))
model.add(LSTM(500))
model.add(Dense(1, activation='sigmoid'))

2022-03-03 17:08:45.726752: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


In [11]:
model.compile(loss='binary_crossentropy', metrics='accuracy', optimizer='adam')

In [12]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 1000, 100)         500000    
_________________________________________________________________
lstm (LSTM)                  (None, 500)               1202000   
_________________________________________________________________
dense (Dense)                (None, 1)                 501       
Total params: 1,702,501
Trainable params: 1,702,501
Non-trainable params: 0
_________________________________________________________________


In [13]:
x_input = np.array(padded_sentences)
y_input = np.array(train_data['label'].values)

In [14]:
x_train, x_test, y_train, y_test = train_test_split(x_input, y_input, test_size=0.2, random_state=1)

In [15]:
model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=2)

2022-03-03 17:08:46.948172: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f686dcbf690>

In [16]:
x_input.shape

(18285, 1000)