# IMPORTING THE LIBRARIES

In [1]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense

# IMPORTING THE DATASET

In [1]:
data = pd.read_csv('../input/fake-news/fake_train.csv')

In [1]:
data.head()

In [1]:
# Droping the NAN Values
data = data.dropna()

In [1]:
# Independent features
X = data.drop('label',axis=1)

In [1]:
# Dependent features
y = data['label']

In [1]:
X.shape

In [1]:
y.shape

In [1]:
# Vocabulary size
voc_size = 5000

# ONE HOT REPRESENTATION

In [1]:
messages = X.copy()

In [1]:
messages.reset_index(inplace=True)

In [1]:
import nltk
import re
from nltk.corpus import stopwords

In [1]:
nltk.download('stopwords')

In [1]:
# Dataset Preprocessing
from nltk.stem.porter import PorterStemmer
ps = PorterStemmer()
corpus = []
for i in range(0, len(messages)):
    print(i)
    review = re.sub('[^a-zA-Z]', ' ', messages['title'][i])
    # lowering words in the particular sentences.
    review = review.lower()
    review = review.split()
    
    review = [ps.stem(word) for word in review if not word in stopwords.words('english')]
    review = ' '.join(review)
    corpus.append(review)

In [1]:
corpus

In [1]:
onehot_repr=[one_hot(words,voc_size)for words in corpus] 

In [1]:
onehot_repr

# EMBEDDING REPRESENTATION

In [1]:
sent_length = 20
embedded_docs = pad_sequences(onehot_repr,padding='pre',maxlen=sent_length)
print(embedded_docs)

In [1]:
embedded_docs[0]

In [1]:
# Creating the model
embedding_vector_features = 40
model = Sequential()
model.add(Embedding(voc_size,embedding_vector_features,input_length = sent_length))
model.add(LSTM(100))
model.add(Dense(1,activation='sigmoid'))
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

In [1]:
model.summary()

In [1]:
import numpy as np

In [1]:
X_final = np.array(embedded_docs)
y_final = np.array(y)

In [1]:
X_final.shape

In [1]:
y_final.shape

In [1]:
from sklearn.model_selection import train_test_split

In [1]:
X_train, X_test, y_train, y_test = train_test_split(X_final, y_final, test_size=0.33, random_state=42)

# MODEL TRAINING

In [1]:
model.fit(X_train,y_train,validation_data = (X_test,y_test), epochs = 10,batch_size = 64)

# PERFORMANCE METRICS AND ACCURACY

In [1]:
y_pred = model.predict_classes(X_test)

In [1]:
from sklearn.metrics import confusion_matrix

In [1]:
confusion_matrix(y_test,y_pred)

In [1]:
from sklearn.metrics import accuracy_score

In [1]:
accuracy_score(y_test,y_pred)