In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout


In [None]:
fake = pd.read_csv(r'../input/fake-and-real-news-dataset/Fake.csv')
true = pd.read_csv(r'../input/fake-and-real-news-dataset/True.csv')

In [None]:
pd.set_option('display.max_colwidth',300)

In [None]:
fake.shape, true.shape

In [None]:
fake.head()

In [None]:
length_fake = fake['title'].str.len()
length_true = true['title'].str.len()
plt.hist(length_fake,bins=20,label='fake_title')
plt.hist(length_true,bins=20,label='true_title')
plt.legend()
plt.show()

In [None]:
fake['label'] = 1
true['label'] = 0
data = pd.concat([fake,true],ignore_index=True)

In [None]:
data.head()

In [None]:
data[data['label']==1].head()

In [None]:
data[data['label']==0].head()

In [None]:
data.nunique()

In [None]:
# Removing subject and date columns:
data.drop('date',axis=1,inplace=True)

In [None]:
data.head()

In [None]:
X = data.drop('label',axis=1)
y = data['label']

In [None]:
### Vocabulary size
voc_size=10000

#### **Onehot Representation**

In [None]:
messages=X.copy()
messages['title'][1]

In [None]:
messages.reset_index(inplace=True)

In [None]:
import nltk
import re
from nltk.corpus import stopwords

### Dataset Preprocessing
from nltk.stem.porter import PorterStemmer
ps = PorterStemmer()
corpus = []
for i in range(0, len(messages)):
    print(i)
    review = re.sub('[^a-zA-Z]', ' ', messages['title'][i])
    review = review.lower()
    review = review.split()
    
    review = [ps.stem(word) for word in review if not word in stopwords.words('english')]
    review = ' '.join(review)
    corpus.append(review)

In [None]:
onehot_repr=[one_hot(words,voc_size)for words in corpus]

#### **Embedding Representation**

In [None]:
sent_length=20
embedded_docs=pad_sequences(onehot_repr,padding='pre',maxlen=sent_length)
print(embedded_docs)

In [None]:
embedded_docs[0]

In [None]:
## Creating model
embedding_vector_features=40
model=Sequential()
model.add(Embedding(voc_size,embedding_vector_features,input_length=sent_length))
model.add(Dropout(0.3))
model.add(LSTM(100))
model.add(Dropout(0.3))
model.add(Dense(1,activation='sigmoid'))
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
print(model.summary())

In [None]:
len(embedded_docs),y.shape

In [None]:
X_final=np.array(embedded_docs)
y_final=np.array(y)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_final, y_final, test_size=0.33, random_state=42)

#### **Model Training**

In [None]:
### Finally Training
history = model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=15,batch_size=128)

#### **Performance Metrics And Accuracy**

In [None]:
y_pred=model.predict_classes(X_test)

In [None]:
from sklearn.metrics import confusion_matrix
print(f'confusion_matrix: {confusion_matrix(y_test,y_pred)}')

In [None]:
from sklearn.metrics import accuracy_score
print(f'accuracy_score: {accuracy_score(y_test,y_pred)}')