<a href="https://colab.research.google.com/github/thanigai1830/511323106052-fake-new-detection/blob/main/511323106052_fake_news_detections.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import nltk
import string

# Download stopwords
nltk.download('stopwords')
from nltk.corpus import stopwords

# Load dataset
def load_data():
    # Use your own dataset path or download from Kaggle
    df = pd.read_csv('fake_or_real_news.csv')  # Must contain 'text' and 'label' columns
    df = df[['text', 'label']]
    df.dropna(inplace=True)
    return df

# Text preprocessing
def clean_text(text):
    text = text.lower()
    text = ''.join([char for char in text if char not in string.punctuation])
    stop_words = set(stopwords.words('english'))
    words = text.split()
    filtered = [word for word in words if word not in stop_words]
    return ' '.join(filtered)

# Main function
def fake_news_detection():
    # Load and clean data
    df = load_data()
    df['text'] = df['text'].apply(clean_text)

    # Split data
    X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42)

    # Vectorization
    tfidf = TfidfVectorizer(max_features=5000)
    X_train_vec = tfidf.fit_transform(X_train)
    X_test_vec = tfidf.transform(X_test)

    # Model training
    model = LogisticRegression()
    model.fit(X_train_vec, y_train)

    # Predictions
    y_pred = model.predict(X_test_vec)

    # Evaluation
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Run
if __name__ == "__main__":
    fake_news_detection()
