In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [2]:
def load_data(file_path):
    data = pd.read_csv(file_path)
    return data

In [3]:
def prepare_data(data):
    X = data['text']
    y = data['label']
    return X, y

In [4]:
def split_data(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    return X_train, X_test, y_train, y_test

In [5]:
def create_pipeline():
    tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
    classifier = MultinomialNB()
    return tfidf_vectorizer, classifier

In [6]:
def train_model(tfidf_vectorizer, classifier, X_train, y_train):
    X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
    classifier.fit(X_train_tfidf, y_train)

In [7]:
def predict(tfidf_vectorizer, classifier, X_test):
    X_test_tfidf = tfidf_vectorizer.transform(X_test)
    y_pred = classifier.predict(X_test_tfidf)
    return y_pred

In [8]:
def evaluate_model(y_test, y_pred):
    accuracy = accuracy_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)
    classification_rep = classification_report(y_test, y_pred)
    return accuracy, conf_matrix, classification_rep

In [9]:
def main():
    file_path = 'fake_or_real_news.csv'  # Replace with the actual path to your dataset
    data = load_data(file_path)
    X, y = prepare_data(data)
    X_train, X_test, y_train, y_test = split_data(X, y)
    tfidf_vectorizer, classifier = create_pipeline()
    train_model(tfidf_vectorizer, classifier, X_train, y_train)
    y_pred = predict(tfidf_vectorizer, classifier, X_test)
    accuracy, conf_matrix, classification_rep = evaluate_model(y_test, y_pred)
    print("Accuracy:", accuracy)
    print("Confusion Matrix:\n", conf_matrix)
    print("Classification Report:\n", classification_rep)

if __name__ == "__main__":
    main()

Accuracy: 0.8453038674033149
Confusion Matrix:
 [[443 185]
 [ 11 628]]
Classification Report:
               precision    recall  f1-score   support

        FAKE       0.98      0.71      0.82       628
        REAL       0.77      0.98      0.87       639

    accuracy                           0.85      1267
   macro avg       0.87      0.84      0.84      1267
weighted avg       0.87      0.85      0.84      1267



# Fake News

In [11]:
# Import the necessary libraries
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline

# Define a function to preprocess the input text
def preprocess_text(text):
    # Add any preprocessing steps here if needed
    return text

# Create the NLP pipeline
def create_pipeline():
    tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
    classifier = MultinomialNB()
    pipeline = Pipeline([('tfidf', tfidf_vectorizer), ('classifier', classifier)])
    return pipeline

# Main function for real-time fake news analysis
def main():
    # Load the fake news detection dataset (replace with your dataset) and split into features (X) and labels (y)
    file_path = 'fake_or_real_news.csv'  # Replace with the actual path to your dataset
    data = pd.read_csv(file_path)
    X = data['text']
    y = data['label']
    
    # Create the NLP pipeline
    pipeline = create_pipeline()
    
    # Fit the pipeline with the entire dataset
    pipeline.fit(X, y)
    
    # Prompt the user to enter the news article or text for analysis
    print("Enter the news article or text for analysis:")
    user_input = input()
    
    # Preprocess the user input
    preprocessed_input = preprocess_text(user_input)
    
    # Perform prediction using the fitted NLP pipeline
    prediction = pipeline.predict([preprocessed_input])
    
    # Print the prediction result
    if prediction[0] == 'FAKE':
        print("The news article is classified as FAKE.")
    else:
        print("The news article is classified as REAL.")

if __name__ == "__main__":
    main()


Enter the news article or text for analysis:
shocking news
The news article is classified as FAKE.
