# LSTM Implementation for fake news detection

In [82]:
import pandas as pd
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense
from keras.callbacks import EarlyStopping

# Read the dataset
data = pd.read_csv("news_data.csv")

# Drop rows with missing values
data = data.dropna()

# Combine 'Headline' and 'Body'
data['Text'] = data['Headline'] + " " + data['Body']

# Filter out samples with empty text
data = data[data['Text'].apply(lambda x: len(x.split()) > 0)]

# Prepare input and target variables
X = data['Text']
y = data['Label']

# Tokenization and padding
max_sequence_length = 1000
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X)
X_seq = tokenizer.texts_to_sequences(X)
X_pad = pad_sequences(X_seq, maxlen=max_sequence_length)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_pad, y, test_size=0.2, random_state=42)

# Build LSTM model
model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=100, input_length=max_sequence_length))
model.add(LSTM(100))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Define early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Train the model with early stopping
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=64, callbacks=[early_stopping])


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


<keras.src.callbacks.History at 0x7dbfe36f7d30>

## Importing Libraries


In [83]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score


## Initializing Models

In [84]:
logistic_regression_model = LogisticRegression(max_iter=1000)
random_forest_model = RandomForestClassifier()
svm_model = SVC()
naive_bayes_model = MultinomialNB()
gradient_boosting_model = GradientBoostingClassifier()


## Training Models

In [85]:
random_forest_model.fit(X_train, y_train)
svm_model.fit(X_train, y_train)
naive_bayes_model.fit(X_train, y_train)
gradient_boosting_model.fit(X_train, y_train)


## Evaluating Models

In [91]:

random_forest_accuracy = accuracy_score(y_test, random_forest_model.predict(X_test))
svm_accuracy = accuracy_score(y_test, svm_model.predict(X_test))
naive_bayes_accuracy = accuracy_score(y_test, naive_bayes_model.predict(X_test))
gradient_boosting_accuracy = accuracy_score(y_test, gradient_boosting_model.predict(X_test))
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)



## Comparitive Study

In [92]:
print("LSTM Model Accuracy:", accuracy)
print("Random Forest Accuracy:", random_forest_accuracy)
print("SVM Accuracy:", svm_accuracy)
print("Naive Bayes Accuracy:", naive_bayes_accuracy)
print("Gradient Boosting Accuracy:", gradient_boosting_accuracy)


LSTM Model Accuracy: 0.9837092757225037
Random Forest Accuracy: 0.9110275689223057
SVM Accuracy: 0.7481203007518797
Naive Bayes Accuracy: 0.6265664160401002
Gradient Boosting Accuracy: 0.9010025062656641
