In [1]:
# Step 1: Import libraries
import pandas as pd

# Step 2: Load the data
fake_news = pd.read_csv('Fake.csv')
true_news = pd.read_csv('True.csv')

# Step 3: Show the data
print("Fake News Example:")
print(fake_news.head())

print("\nTrue News Example:")
print(true_news.head())

Fake News Example:
                                               title  \
0   Donald Trump Sends Out Embarrassing New Year’...   
1   Drunk Bragging Trump Staffer Started Russian ...   
2   Sheriff David Clarke Becomes An Internet Joke...   
3   Trump Is So Obsessed He Even Has Obama’s Name...   
4   Pope Francis Just Called Out Donald Trump Dur...   

                                                text subject  \
0  Donald Trump just couldn t wish all Americans ...    News   
1  House Intelligence Committee Chairman Devin Nu...    News   
2  On Friday, it was revealed that former Milwauk...    News   
3  On Christmas day, Donald Trump announced that ...    News   
4  Pope Francis used his annual Christmas Day mes...    News   

                date  
0  December 31, 2017  
1  December 31, 2017  
2  December 30, 2017  
3  December 29, 2017  
4  December 25, 2017  

True News Example:
                                               title  \
0  As U.S. budget fight looms, Republicans fl

In [2]:
# Step 1: Add a new column "label" to each dataframe
fake_news['label'] = 0   # Fake news = 0
true_news['label'] = 1   # True news = 1

# Step 2: Combine both datasets into one
data = pd.concat([fake_news, true_news], axis=0)

# Step 3: Shuffle the dataset (so that fake and true are mixed)
data = data.sample(frac=1).reset_index(drop=True)

# Step 4: Show the combined dataset
print(data.head())

                                               title  \
0  BOOM! CLINTON RAP: ‘Pay My Foundation for the ...   
1   There’s A Bill In The House That Would Comple...   
2   Sarah Palin Calls For Executing Democratic ‘C...   
3  McMaster says 'not concerned' after Kushner ba...   
4   Congressional Black Caucus Cries Foul Over Su...   

                                                text       subject  \
0                                                         politics   
1  For Republicans, the Robert Mueller investigat...          News   
2  Sarah Palin agrees with Donald Trump that Demo...          News   
3  TAORMINA, Italy (Reuters) - Asked about report...  politicsNews   
4  Earlier this week, following Hillary Clinton s...          News   

                date  label  
0        Nov 3, 2016      0  
1    August 28, 2017      0  
2   October 20, 2016      0  
3      May 27, 2017       1  
4  February 11, 2016      0  


In [3]:
from sklearn.model_selection import train_test_split

# We'll use only the "text" column to predict the "label"
X = data['text']  # Features (input)
y = data['label'] # Labels (output)

# Split into 80% training and 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Training data size:", X_train.shape)
print("Testing data size:", X_test.shape)

Training data size: (35918,)
Testing data size: (8980,)


In [4]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Create TF-IDF vectorizer
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)

# Fit and transform the training data
X_train_tfidf = vectorizer.fit_transform(X_train)

# Only transform the test data
X_test_tfidf = vectorizer.transform(X_test)

print("TF-IDF transformation complete!")
print("Shape of X_train_tfidf:", X_train_tfidf.shape)
print("Shape of X_test_tfidf:", X_test_tfidf.shape)

TF-IDF transformation complete!
Shape of X_train_tfidf: (35918, 111467)
Shape of X_test_tfidf: (8980, 111467)


In [7]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Step 1: Create the model
model = LogisticRegression()

# Step 2: Train the model
model.fit(X_train_tfidf, y_train)

# Step 3: Predict on the test data
y_pred = model.predict(X_test_tfidf)

# Step 4: Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.9838530066815144

Confusion Matrix:
 [[4555   92]
 [  53 4280]]

Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.98      0.98      4647
           1       0.98      0.99      0.98      4333

    accuracy                           0.98      8980
   macro avg       0.98      0.98      0.98      8980
weighted avg       0.98      0.98      0.98      8980



In [8]:
def predict_news(news_text):
    # Transform the news text to TF-IDF vector
    news_tfidf = vectorizer.transform([news_text])
    # Predict using the trained model
    prediction = model.predict(news_tfidf)
    
    # Output
    if prediction[0] == 0:
        print("🔴 This news is likely FAKE.")
    else:
        print("🟢 This news is likely REAL.")

# Example:
sample_news = "Donald Trump claims massive voter fraud during election."
predict_news(sample_news)

🔴 This news is likely FAKE.


In [10]:
import pickle

# Save the trained model
with open('fake_news_model.pkl', 'wb') as model_file:
    pickle.dump(model, model_file)

# Save the TF-IDF vectorizer
with open('tfidf_vectorizer.pkl', 'wb') as vectorizer_file:
    pickle.dump(vectorizer, vectorizer_file)

print("✅ Model and vectorizer saved successfully!")

✅ Model and vectorizer saved successfully!
