# 📰 Fake News Detector using Bi-LSTM
This notebook demonstrates how to build a fake news detector using a Bi-directional LSTM model.


In [1]:
import pandas as pd
import numpy as np
import re
import pickle
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional, Dropout

ModuleNotFoundError: No module named 'tensorflow'

## Step 1: Load and Clean the Data

In [None]:
# Sample dataset (replace with real data for production use)
data = {
    'text': [
        "The president announced a new policy today.",
        "Aliens have landed in New York City!",
        "The stock market saw a significant rise today.",
        "Scientists discovered a portal to another dimension.",
        "The local team won their championship game.",
        "Breaking: Government confirms contact with UFOs.",
        "Economy shows signs of strong recovery this quarter.",
        "Zombie outbreak reported in downtown area!"
    ],
    'label': [1, 0, 1, 0, 1, 0, 1, 0]  # 1 = Real, 0 = Fake
}
df = pd.DataFrame(data)

def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)
    return text

df['text'] = df['text'].apply(clean_text)
df.head()

## Step 2: Tokenization and Padding

In [None]:
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.25, random_state=42)

tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

max_len = 50
X_train_pad = pad_sequences(X_train_seq, maxlen=max_len)
X_test_pad = pad_sequences(X_test_seq, maxlen=max_len)

## Step 3: Build and Train the Bi-LSTM Model

In [None]:
model = Sequential()
model.add(Embedding(input_dim=5000, output_dim=64, input_length=max_len))
model.add(Bidirectional(LSTM(64)))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train_pad, np.array(y_train), epochs=5, validation_split=0.1, batch_size=2)

## Step 4: Evaluate the Model

In [None]:
y_pred_prob = model.predict(X_test_pad)
y_pred = (y_pred_prob > 0.5).astype(int)
print(classification_report(y_test, y_pred))

## Step 5: Save the Model and Tokenizer

In [None]:
model.save("fake_news_bilstm_model.h5")
with open("fake_news_tokenizer.pkl", "wb") as f:
    pickle.dump(tokenizer, f)