In [5]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pickle
import re
import nltk
from nltk.corpus import stopwords

# Load the trained model
model_path = "ML/models/bidirectional_lstm_model.h5"
model = tf.keras.models.load_model(model_path)

# Load the tokenizer used for training
tokenizer_path = "ML/models/tokenizer.pkl"
with open(tokenizer_path, "rb") as handle:
    tokenizer = pickle.load(handle)

# Load NLTK stopwords
nltk.download("stopwords")
stop_words = set(stopwords.words("english"))

def clean_text(text):
    """ Preprocess the input text (remove special characters, lowercase, remove stopwords) """
    text = re.sub(r'\W', ' ', text).lower()
    tokens = text.split()
    tokens = [word for word in tokens if word not in stop_words]
    return ' '.join(tokens)

def predict_news(news_text):
    """ Predict whether the given news text is FAKE or REAL """
    cleaned_text = clean_text(news_text)
    
    # Convert text to sequences
    seq = tokenizer.texts_to_sequences([cleaned_text])
    
    # Pad sequences to match training input size
    max_length = 500  # Must be the same as during training
    padded_seq = pad_sequences(seq, maxlen=max_length, padding='post')

    # Make prediction
    prediction = model.predict(padded_seq)[0][0]  # Get probability

    # Set threshold (adjustable)
    threshold = 0.45  
    label = "FAKE" if prediction < threshold else "REAL"

    return f"📰 Prediction: {label} (Confidence: {prediction:.4f})"

# Example usage
sample_news = "How US election fraud claims changed as Trump won"
print(predict_news(sample_news))


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\bhati\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!






[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 612ms/step
📰 Prediction: FAKE (Confidence: 0.0980)
