In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/fake-news/fake_train.csv


## Read Data

In [2]:
data = pd.read_csv("/kaggle/input/fake-news/fake_train.csv")

In [3]:
data.head()

Unnamed: 0,id,title,author,text,label
0,0,House Dem Aide: We Didn’t Even See Comey’s Let...,Darrell Lucus,House Dem Aide: We Didn’t Even See Comey’s Let...,1
1,1,"FLYNN: Hillary Clinton, Big Woman on Campus - ...",Daniel J. Flynn,Ever get the feeling your life circles the rou...,0
2,2,Why the Truth Might Get You Fired,Consortiumnews.com,"Why the Truth Might Get You Fired October 29, ...",1
3,3,15 Civilians Killed In Single US Airstrike Hav...,Jessica Purkiss,Videos 15 Civilians Killed In Single US Airstr...,1
4,4,Iranian woman jailed for fictional unpublished...,Howard Portnoy,Print \nAn Iranian woman has been sentenced to...,1


## Check for null values

In [4]:
data[data.isnull().any(axis=1)]

Unnamed: 0,id,title,author,text,label
6,6,Life: Life Of Luxury: Elton John’s 6 Favorite ...,,Ever wonder how Britain’s most iconic pop pian...,1
8,8,Excerpts From a Draft Script for Donald Trump’...,,Donald J. Trump is scheduled to make a highly ...,0
20,20,News: Hope For The GOP: A Nude Paul Ryan Has J...,,Email \nSince Donald Trump entered the electio...,1
23,23,Massachusetts Cop’s Wife Busted for Pinning Fa...,,Massachusetts Cop’s Wife Busted for Pinning Fa...,1
31,31,Israel is Becoming Pivotal to China’s Mid-East...,,Country: Israel While China is silently playin...,1
...,...,...,...,...,...
20745,20745,Thomas Frank Explores Whether Hillary Clinton ...,,Thomas Frank Explores Whether Hillary Clinton ...,1
20768,20768,Osama bin Laden’s older brother rents out luxu...,,Osama bin Laden’s older brother rents out luxu...,1
20771,20771,,Letsbereal,DYN's Statement on Last Week's Botnet Attack h...,1
20772,20772,,beersession,Kinda reminds me of when Carter gave away the ...,1


## Remove Nulls

In [5]:
data.dropna(inplace=True)

In [6]:
data.shape

(18285, 5)

In [7]:
X = data.drop('label',axis=1)
Y = data['label']

In [8]:
X.shape

(18285, 4)

## Import Required Libraries

In [9]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential

In [10]:
messages = X.copy()

In [11]:
messages.reset_index(inplace=True)

In [12]:
import nltk
import re
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer

## Data Preprocessing

In [13]:
ps = PorterStemmer()
corpus = []
for i in range(len(messages)):
    title = re.sub('[^a-zA-Z]',' ',messages['title'][i])
    title = title.lower()
    title = title.split()
    title = [ps.stem(word) for word in title if word not in stopwords.words('english')]
    title = ' '.join(title)
    corpus.append(title)

## One Hot Representation

In [14]:
vocab_size = 5000
one_hot_rep = [one_hot(sent,vocab_size) for sent in corpus]
#one_hot_rep

## Embedding Representation

### Create Padded Sequence

In [15]:
sent_length = 20
padded_docs = pad_sequences(one_hot_rep, padding = 'pre', maxlen=sent_length)

In [16]:
# Creating model
embedding_vector_features = 40
model = Sequential()
model.add(Embedding(vocab_size,embedding_vector_features,input_length = sent_length))
model.add(LSTM(100)) #one LSTM layer with 100 neurons
model.add(Dense(1,activation = 'sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam',metrics=['accuracy'])
print(model.summary())

2023-02-04 13:29:18.608221: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-02-04 13:29:18.609143: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-02-04 13:29:18.727691: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-02-04 13:29:18.728551: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-02-04 13:29:18.729531: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from S

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 20, 40)            200000    
_________________________________________________________________
lstm (LSTM)                  (None, 100)               56400     
_________________________________________________________________
dense (Dense)                (None, 1)                 101       
Total params: 256,501
Trainable params: 256,501
Non-trainable params: 0
_________________________________________________________________
None


In [17]:
X_final = np.array(padded_docs)
Y_final = np.array(Y)

In [18]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X_final, Y_final, test_size = 0.3)

## Model Training

In [19]:
model.fit(X_train, Y_train, validation_data=(X_test,Y_test), epochs=10, batch_size = 64)

Epoch 1/10


2023-02-04 13:29:22.952958: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)
2023-02-04 13:29:26.330095: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f4c20155a10>

## Performance Metrics and Accuracy

In [20]:
predictions = (model.predict(X_test) > 0.5).astype("int32")

In [21]:
from sklearn.metrics import confusion_matrix
confusion_matrix(Y_test, predictions)

array([[2860,  290],
       [ 194, 2142]])

In [22]:
from sklearn.metrics import accuracy_score
accuracy_score(Y_test, predictions)

0.9117754283631061