In [None]:
import pandas as pd
import numpy as np
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
from newspaper import Article
import warnings
warnings.filterwarnings('ignore')


nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')


fake_path = 'C:/Users/HP/Downloads/archive/Fake.csv'
true_path = 'C:/Users/HP/Downloads/archive/True.csv'

fake_df = pd.read_csv(fake_path)
true_df = pd.read_csv(true_path)


fake_df['label'] = 0  
true_df['label'] = 1   
df = pd.concat([fake_df, true_df], axis=0)


df = df.sample(frac=1).reset_index(drop=True)


lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

def clean_text(text):
 
    text = re.sub(r'[^a-zA-Z\s]', '', text, re.I|re.A)

    text = text.lower()
   
    tokens = nltk.word_tokenize(text)

    tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
    return ' '.join(tokens)


df['clean_text'] = df['text'].apply(clean_text)


X = df['clean_text']
y = df['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


tfidf_vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)


pac = PassiveAggressiveClassifier(max_iter=50)
pac.fit(X_train_tfidf, y_train)


y_pred = pac.predict(X_test_tfidf)
print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


def verify_news(url):
    try:
        article = Article(url)
        article.download()
        article.parse()
        article.nlp()
        
   
        clean_article = clean_text(article.text)
      
        article_vector = tfidf_vectorizer.transform([clean_article])
     
        prediction = pac.predict(article_vector)
        
        if prediction[0] == 0:
            return "⚠️ Warning: This news is likely FAKE"
        else:
            return "✅ Verified: This news appears to be TRUE"
        
    except Exception as e:
        return f"❌ Error processing URL: {str(e)}"



[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [4]:
test_url = ""
print(verify_news(test_url))

