# Bidirectional LSTM – Fake‑News Detection
Trains on the LIAR dataset (Hugging Face) and saves `models/bi_lstm_fake_news.h5`.

In [None]:
import os, numpy as np, pandas as pd, tensorflow as tf
from datasets import load_dataset
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dense, Dropout
from tensorflow.keras.models import Sequential

In [None]:
MAX_VOCAB = 25_000
MAX_LEN   = 120
EMB_DIM   = 128
LSTM_UNITS= 64
DROPOUT   = 0.3
BATCH     = 128
EPOCHS    = 5

In [None]:
ds = load_dataset("liar", split="train")
df = ds.to_pandas()[["statement", "label"]]
df.columns = ["text","target"]
df["target"] = df["target"].apply(lambda x: 0 if x in [0,1,2] else 1)

tok = Tokenizer(num_words=MAX_VOCAB, oov_token="<UNK>")
tok.fit_on_texts(df.text)
X = pad_sequences(tok.texts_to_sequences(df.text), maxlen=MAX_LEN, padding="post", truncating="post")
y = df.target.values

X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
print('Train/val shapes:', X_tr.shape, X_te.shape)

In [None]:
model = Sequential([
    Embedding(MAX_VOCAB, EMB_DIM, mask_zero=True),
    Bidirectional(LSTM(LSTM_UNITS)),
    Dropout(DROPOUT),
    Dense(1, activation="sigmoid")
])
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
model.summary()

In [None]:
history = model.fit(
    X_tr, y_tr,
    epochs=EPOCHS,
    batch_size=BATCH,
    validation_split=0.2
)

In [None]:
loss, acc = model.evaluate(X_te, y_te, verbose=0)
print(f"Test accuracy: {acc:.3f}")

In [None]:
os.makedirs("models", exist_ok=True)
model.save("models/bi_lstm_fake_news.h5")
import pickle
with open("models/tokenizer.pkl", "wb") as f:
    pickle.dump(tok, f)
print("✅  Model and tokenizer saved!")