In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix
import nltk
from nltk.corpus import stopwords
import string


In [2]:
# Load dataset
df_true = pd.read_csv('D:\Fake News Detection\True.csv')  # Assuming your real news dataset is True.csv
df_fake = pd.read_csv('D:\Fake News Detection\Fake.csv')  # Assuming your fake news dataset is Fake.csv

# Add labels
df_true['label'] = 1  # 1 means real
df_fake['label'] = 0  # 0 means fake

# Combine datasets
df = pd.concat([df_true, df_fake])

# Shuffle the data
df = df.sample(frac=1).reset_index(drop=True)

# Remove punctuation and stop words
nltk.download('stopwords')
stop_words = stopwords.words('english')

def clean_text(text):
    text = ''.join([char for char in text if char not in string.punctuation])
    text = ' '.join([word for word in text.split() if word.lower() not in stop_words])
    return text

df['text'] = df['text'].apply(clean_text)

# Split the data
X = df['text']
y = df['label']

# Vectorize the text
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(X)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



  df_true = pd.read_csv('D:\Fake News Detection\True.csv')  # Assuming your real news dataset is True.csv
  df_fake = pd.read_csv('D:\Fake News Detection\Fake.csv')  # Assuming your fake news dataset is Fake.csv
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Hello\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [3]:
# Model initialization
model = LogisticRegression()

# Train the model
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")


Accuracy: 0.987750556792873


In [4]:
pip install flask


Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [5]:
from flask import Flask, request, render_template
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer

app = Flask(__name__)

# Load model and vectorizer
model = pickle.load(open('fake_news_model.pkl', 'rb'))
vectorizer = pickle.load(open('tfidf_vectorizer.pkl', 'rb'))

@app.route('/')
def home():
    return render_template('index.html')

@app.route('/predict', methods=['POST'])
def predict():
    if request.method == 'POST':
        news = request.form['news']
        data = vectorizer.transform([news])
        prediction = model.predict(data)
        result = 'Real' if prediction == 1 else 'Fake'
        return render_template('result.html', prediction=result)

if __name__ == "__main__":
    app.run(debug=True)


 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
 * Restarting with stat


SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [13]:
import pickle

# Save the model
pickle.dump(model, open('fake_news_model.pkl', 'wb'))

# Save the vectorizer
pickle.dump(vectorizer, open('tfidf_vectorizer.pkl', 'wb'))
