In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import pickle
from tensorflow.keras.models import load_model

#TRAINIG SCRIPT

In [35]:
class FakeNewsDetector:
    def __init__(self, max_features=5000, max_length=500):
        """Initialize Fake News Detection model"""
        self.max_features = max_features
        self.max_length = max_length
        self.tokenizer = Tokenizer(num_words=max_features)
        self.model = None
    
    def load_datasets(self, fake_csv, true_csv, text_column='text'):
        """
        Load and preprocess datasets from CSV files
        """
        try:
            fake_df = pd.read_csv(fake_csv)
            true_df = pd.read_csv(true_csv)
        except Exception as e:
            print(f"Error loading CSV files: {e}")
            raise
        
        if text_column not in fake_df.columns or text_column not in true_df.columns:
            raise ValueError(f"Text column '{text_column}' not found in one or both datasets")
        
        fake_texts = fake_df[text_column].fillna('').astype(str).tolist()
        true_texts = true_df[text_column].fillna('').astype(str).tolist()
        
        texts = fake_texts + true_texts
        labels = [1] * len(fake_texts) + [0] * len(true_texts)
        
        return texts, np.array(labels)
    
    def preprocess_text(self, texts):
        """Preprocess text data by tokenizing and padding"""
        texts = [text.lower() for text in texts]
        self.tokenizer.fit_on_texts(texts)
        sequences = self.tokenizer.texts_to_sequences(texts)
        return pad_sequences(sequences, maxlen=self.max_length, truncating='post', padding='post')
    
    def build_model(self, input_shape):
        """Build CNN model for text classification"""
        model = Sequential([
            Embedding(self.max_features, 128, input_length=input_shape[1]),
            Conv1D(128, 5, activation='relu'),
            GlobalMaxPooling1D(),
            Dense(64, activation='relu'),
            Dropout(0.5),
            Dense(32, activation='relu'),
            Dropout(0.3),
            Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        return model
    
    def train(self, fake_csv, true_csv, text_column='text', test_size=0.2, epochs=10, batch_size=32):
        """Train the fake news detection model from CSV files"""
        X, y = self.load_datasets(fake_csv, true_csv, text_column)
        X_processed = self.preprocess_text(X)
        X_train, X_test, y_train, y_test = train_test_split(
            X_processed, y, test_size=test_size, random_state=42)
        
        self.model = self.build_model(X_train.shape)
        history = self.model.fit(
            X_train, y_train, 
            epochs=epochs, 
            batch_size=batch_size, 
            validation_split=0.2,
            verbose=1
        )
        
        loss, accuracy = self.model.evaluate(X_test, y_test)
        print(f"\nTest Loss: {loss:.4f}")
        print(f"Test Accuracy: {accuracy:.4f}")
        
        with open('tokenizer.pkl', 'wb') as f:
            pickle.dump(self.tokenizer, f)
        print("Tokenizer saved successfully!")
        
        return history

def main():
    fake_csv_path = 'Fake.csv'
    true_csv_path = 'True.csv'
    detector = FakeNewsDetector(max_features=10000, max_length=500)
    detector.train(fake_csv_path, true_csv_path)
    detector.model.save('fake_news_cnn_model.h5')
    print("Model saved successfully!")

if __name__ == "__main__":
    main()


Epoch 1/10




[1m898/898[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 42ms/step - accuracy: 0.9108 - loss: 0.1862 - val_accuracy: 0.9994 - val_loss: 0.0021
Epoch 2/10
[1m898/898[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 34ms/step - accuracy: 0.9984 - loss: 0.0052 - val_accuracy: 0.9992 - val_loss: 0.0022
Epoch 3/10
[1m898/898[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 37ms/step - accuracy: 0.9998 - loss: 6.9566e-04 - val_accuracy: 0.9989 - val_loss: 0.0049
Epoch 4/10
[1m898/898[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 35ms/step - accuracy: 0.9998 - loss: 7.8397e-04 - val_accuracy: 0.9971 - val_loss: 0.0158
Epoch 5/10
[1m898/898[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 34ms/step - accuracy: 0.9992 - loss: 0.0027 - val_accuracy: 0.9994 - val_loss: 0.0015
Epoch 6/10
[1m898/898[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 35ms/step - accuracy: 1.0000 - loss: 1.4836e-04 - val_accuracy: 0.9994 - val_loss: 0.0019
Epoch 7/10
[1m



Model saved successfully!


#PREDICTION SCRIPT

In [3]:
class FakeNewsPredictor:
    def __init__(self, model_path='fake_news_cnn_model.h5', tokenizer_path='tokenizer.pkl', max_length=500):
        """Initialize predictor with pre-trained model and tokenizer"""
        self.max_length = max_length
        self.model = load_model(model_path)
        print("Model loaded successfully!")
        
        with open(tokenizer_path, 'rb') as f:
            self.tokenizer = pickle.load(f)
        print("Tokenizer loaded successfully!")
    
    def preprocess_text(self, text):
        """Preprocess input text"""
        text = text.lower()
        sequence = self.tokenizer.texts_to_sequences([text])
        padded_sequence = pad_sequences(sequence, maxlen=self.max_length, truncating='post', padding='post')
        return padded_sequence
    
    def predict_news(self, text):
        """Predict if news is fake or real"""
        processed_text = self.preprocess_text(text)
        prediction = self.model.predict(processed_text)[0][0]
        is_fake = prediction > 0.5
        confidence = prediction if is_fake else 1 - prediction
        return {
            'is_fake': is_fake,
            'confidence': float(confidence),
            'fake_probability': float(prediction)
        }
    
    # Add detailed preprocessing and prediction logging
    def detailed_prediction(text):
        processed_text = predictor.preprocess_text(text)
        raw_prediction = predictor.model.predict(processed_text)[0][0]
        
        print("Raw Prediction Value:", raw_prediction)
        print("Processed Text Shape:", processed_text.shape)
    # Create predictor instance
        predictor = FakeNewsPredictor()

if __name__ == "__main__":
    predictor = FakeNewsPredictor(
        model_path='fake_news_cnn_model.h5', 
        tokenizer_path='tokenizer.pkl'
    )
    
    while True:
        # Accept user input
        text = input("\nEnter news text to check (or type 'exit' to quit): ")
        if text.lower() == 'exit':
            print("Exiting...")
            break
        
        # Predict and display results
        result = predictor.predict_news(text)
        
        print("\nPrediction Result:")
        print(f"Fake News: {'Yes' if result['is_fake'] else 'No'}")
        print(f"Confidence: {result['confidence']*100:.2f}%")

    


    # Function to check news
# def check_news(text):
#     result = predictor.predict_news(text)
#     print("\nPrediction Result:")
#     print(f"Fake News: {'Yes' if result['is_fake'] else 'No'}")
#     print(f"Confidence: {result['confidence']*100:.2f}%")



Model loaded successfully!
Tokenizer loaded successfully!
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 365ms/step

Prediction Result:
Fake News: Yes
Confidence: 100.00%
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87ms/step

Prediction Result:
Fake News: No
Confidence: 100.00%
Exiting...
