In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report  # Modified: Combined import statements
import re
import string
from sklearn.feature_extraction.text import TfidfVectorizer  # Modified: Moved import statement here
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier  # Modified: Combined import statements


In [None]:
df = pd.read_csv("Fake.csv")
dt = pd.read_csv("True.csv")

In [None]:
# Combine dataframes, dropping NaN values
df.dropna(inplace=True)
df['class'] = 0
dt['class'] = 1
datamerge = pd.concat([df, dt], axis=0)



# Preprocessing function
def clean_text(text):
    text = text.lower()
    text = re.sub(r'\[.*?\]', '', text)
    text = re.sub(r'\W', ' ', text)  # Simplified regex pattern
    text = re.sub(r'https?://\S+|www\.\S+', '', text)
    text = re.sub(r'<.*?>+', '', text)
    text = re.sub(r'[%s]' % re.escape(string.punctuation), '', text)
    text = re.sub(r'\n', ' ', text)
    text = re.sub(r'\w*\d\w*', '', text)
    return text.strip()  # Removed unnecessary stripping of text


datamerge['text'] = datamerge['text'].apply(clean_text)

# Split data into features and target
x = datamerge['text']
y = datamerge['class']


In [1]:

# Splitting data into train, validation, and test sets
x_train, x_temp, y_train, y_temp = train_test_split(x, y, test_size=0.4, random_state=42)  # Set random state for reproducibility
x_val, x_test, y_val, y_test = train_test_split(x_temp, y_temp, test_size=0.5, random_state=42)  # Set random state for reproducibility

# Vectorize text data
vectorizer = TfidfVectorizer()
xv_train = vectorizer.fit_transform(x_train)
xv_test = vectorizer.transform(x_test)

# Classifier initialization and training
lr = LogisticRegression()
dt = DecisionTreeClassifier()
gb = GradientBoostingClassifier()
rf = RandomForestClassifier()

lr.fit(xv_train, y_train)
dt.fit(xv_train, y_train)
gb.fit(xv_train, y_train)
rf.fit(xv_train, y_train)

# Predictions and evaluation
pred_lr = lr.predict(xv_test)
pred_dt = dt.predict(xv_test)
pred_gb = gb.predict(xv_test)
pred_rf = rf.predict(xv_test)

print("Logistic Regression:")
print(classification_report(y_test, pred_lr))

print("Decision Tree:")
print(classification_report(y_test, pred_dt))

print("Gradient Boosting:")
print(classification_report(y_test, pred_gb))

print("Random Forest:")
print(classification_report(y_test, pred_rf))

# Function for manual testing
def output_label(n):
    if n < 0.5:
        return "Fake news"
    elif n >= 0.5:
        return "Not a Fake News"



Logistic Regression:
              precision    recall  f1-score   support

           0       0.99      0.98      0.99      4743
           1       0.98      0.99      0.98      4237

    accuracy                           0.98      8980
   macro avg       0.98      0.98      0.98      8980
weighted avg       0.98      0.98      0.98      8980

Decision Tree:
              precision    recall  f1-score   support

           0       1.00      0.99      0.99      4743
           1       0.99      1.00      0.99      4237

    accuracy                           0.99      8980
   macro avg       0.99      0.99      0.99      8980
weighted avg       0.99      0.99      0.99      8980

Gradient Boosting:
              precision    recall  f1-score   support

           0       1.00      0.99      1.00      4743
           1       0.99      1.00      1.00      4237

    accuracy                           1.00      8980
   macro avg       1.00      1.00      1.00      8980
weighted avg       

In [None]:
def manualtest(news):
    cleaned_news = clean_text(news)
    new_x_test = vectorizer.transform([cleaned_news])
    pred_lr = lr.predict(new_x_test)
    pred_dt = dt.predict(new_x_test)
    pred_gb = gb.predict(new_x_test)
    pred_rf = rf.predict(new_x_test)
    print("\nLogistic Regression Prediction:", output_label(pred_lr[0]))
    print("Decision Tree Prediction:", output_label(pred_dt[0]))
    print("Gradient Boosting Prediction:", output_label(pred_gb[0]))
    print("Random Forest Prediction:", output_label(pred_rf[0]))

news = input("Enter news for manual testing: ")
manualtest(news)
