<a href="https://www.kaggle.com/sanjayanbu/fake-news-lstm?scriptVersionId=89272612" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

## Importing required libraries

In [1]:
import nltk
import re
import pandas as pd
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import one_hot
from tensorflow.keras.layers import Dense, Embedding, LSTM
from tensorflow.keras.models import Sequential
import numpy as np
from sklearn.model_selection import train_test_split

## Reading input files

In [2]:
train_data = pd.read_csv("../input/fake-news/train.csv")

In [3]:
train_data.dropna(inplace=True)

## Initializing objects and constants

In [4]:
ps = PorterStemmer()
sentence_length = 5000
padding_length = 1000
dimension = 100

## Text preprocessing

In [5]:
def pre_process_data(data):
    text = data['title']
    sentence_list = list(map(lambda x: "". join(ps.stem(word) for word in re.sub("[^A-Za-z]", " ", x.lower()) if word not in stopwords.words("english")), text.values))
    encoded_sentences = [one_hot(sentence, sentence_length) for sentence in sentence_list]
    padded_sentences = pad_sequences(encoded_sentences, maxlen=padding_length, padding='pre')
    return padded_sentences

In [6]:
sen_list = pre_process_data(train_data)

## Constructing LSTM Model

In [7]:
model = Sequential()
model.add(Embedding(sentence_length, dimension, input_length=padding_length))
model.add(LSTM(500))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', metrics='accuracy', optimizer='adam')

2022-03-04 11:33:24.567799: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-04 11:33:24.742588: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-04 11:33:24.743644: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-04 11:33:24.745342: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

In [8]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 1000, 100)         500000    
_________________________________________________________________
lstm (LSTM)                  (None, 500)               1202000   
_________________________________________________________________
dense (Dense)                (None, 1)                 501       
Total params: 1,702,501
Trainable params: 1,702,501
Non-trainable params: 0
_________________________________________________________________


## Reshaping the input 

In [9]:
x_input = np.array(sen_list)
y_input = np.array(train_data['label'].values)

## Training the model

In [10]:
x_train, x_test, y_train, y_test = train_test_split(x_input, y_input, test_size=0.2, random_state=1)
model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=2, batch_size=64)

2022-03-04 11:33:28.099677: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/2


2022-03-04 11:33:30.954371: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005


Epoch 2/2


<keras.callbacks.History at 0x7f84960d7910>

## Predicting the test data

In [11]:
test_data = pd.read_csv("../input/fake-news/test.csv")
test_data.fillna(inplace=True, method='pad')

In [12]:
test_sentence_list = pre_process_data(test_data)
test_input = np.array(test_sentence_list)
test_output = model.predict(test_input)

## Submitting the output

In [13]:
submission_df = pd.DataFrame({'id': test_data['id'], 'label': (test_output.flatten().reshape(-1,) > 0.5).astype('int')})

In [14]:
submission_df.to_csv("submission.csv", index=False)

In [None]:
submission_df