In [None]:
import tensorflow as tf
tf.__version__

In [None]:
import pandas as pd
import csv
import sys

# Increase field size limit
csv.field_size_limit(sys.maxsize)

# Now read the large CSV safely
df = pd.read_csv('/content/WELFake_Dataset.csv', skip_blank_lines=True, engine='python')
df.head()
df=df.dropna()

In [None]:
X=df.drop('label',axis=1)
y=df['label']
X,y
X.shape,y.shape

Installing Required Dependencies

In [None]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense

In [None]:
voc_size=5000
messages=X.copy()
messages['title']

In [None]:
import nltk
import re
from nltk.corpus import stopwords
nltk.download('stopwords')
messages = messages.reset_index(drop=True)

Preprocessing Text

In [None]:
from nltk.stem.porter import PorterStemmer
stop_words = set(stopwords.words('english'))

ps = PorterStemmer()
corpus = []

for i in range(0, len(messages)):
    review = re.sub('[^a-zA-Z0-9]', ' ', messages['title'][i])  # fixed regex
    review = review.lower()
    review = review.split()
    review = [ps.stem(word) for word in review if word not in stop_words]
    review = ' '.join(review)  # fixed join
    corpus.append(review)

One hot representation and padding to make the sequence equal

In [None]:
onehot_repr=[one_hot(words,voc_size) for words in corpus]
onehot_repr
max_length=30
padded_title=pad_sequences(onehot_repr,padding='pre',maxlen=max_length)
padded_title[0]

Creating Embedding layer and LSTM-RNN model

In [None]:
from tensorflow.keras.layers import Dropout
from tensorflow.keras import regularizers
embedded_features=40
model=Sequential()
model.add(Embedding(voc_size,embedded_features,input_length=max_length))
model.add( LSTM(64,return_sequences=True) )
model.add(Dropout(0.3))
model.add(LSTM(32))
model.add(Dropout(0.3))
model.add(Dense(1,activation='sigmoid',kernel_regularizer=regularizers.l2(0.01)))
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])


Implemeting early stopping and reduceLR to stop overfitting

In [None]:
import tensorflow as tf
from tensorflow.keras.callbacks import ReduceLROnPlateau

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=5,
    min_lr=1e-6,
    verbose=1
)
early_stopping=tf.keras.callbacks.EarlyStopping(
    monitor="val_loss",
    min_delta=0.0001,
    patience=20,
    verbose=1,
    mode="auto",
    baseline=None,
    restore_best_weights=True,
    start_from_epoch=0,
)

channging x and y to numpy arrays and preprocessing using sklearn

In [None]:
import numpy as np
X_final=np.array(padded_title)
y_final=np.array(y)
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X_final,y_final,test_size=0.33,random_state=42)

Model training 

In [None]:
model_1=model.fit(X_train,y_train,epochs=40,batch_size=64,callbacks=[early_stopping,reduce_lr],validation_split=0.2)

Plots for model1

In [None]:
import matplotlib.pyplot as plt
plt.plot(model_1.history['accuracy'])
plt.plot(model_1.history['val_accuracy'])
plt.title('accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train','test'],loc='upper left')
plt.show()

Metrices for checking the performance

In [None]:
y_pred=model.predict(X_test)
y_pred=(y_pred>=0.5)
from sklearn.metrics import accuracy_score
score=accuracy_score(y_pred,y_test)
score
from sklearn.metrics import confusion_matrix
cm=confusion_matrix(y_test,y_pred)
cm
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

Saving the model for further use and transfer learning

In [None]:
model.save('fake_news_model1.keras')