In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
import joblib

# Step 1: Load the datasets
fake_df = pd.read_csv('Fake.csv')
true_df = pd.read_csv('True.csv')

# Step 2: Add a label column to each dataset
fake_df['label'] = 'fake'
true_df['label'] = 'true'

# Combine the datasets
df = pd.concat([fake_df, true_df])

# Shuffle the dataset
df = df.sample(frac=1).reset_index(drop=True)

# Display the first few rows of the combined dataset
print(df.head())

# Step 3: Split the dataset into training and testing sets
X = df['text']  # Features (the news articles)
y = df['label']  # Labels (real or fake)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Feature Extraction
# Initialize the TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)

# Fit and transform the training data, and transform the testing data
tfidf_train = tfidf_vectorizer.fit_transform(X_train)
tfidf_test = tfidf_vectorizer.transform(X_test)

# Step 5: Build the Model
# Initialize the PassiveAggressiveClassifier
pac = PassiveAggressiveClassifier(max_iter=50)
pac.fit(tfidf_train, y_train)

# Make predictions
y_pred = pac.predict(tfidf_test)

# Calculate the accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

# Display the confusion matrix
confusion = confusion_matrix(y_test, y_pred)
print('Confusion Matrix:')
print(confusion)

# Step 6: Save the Model
# Save the model and vectorizer
joblib.dump(pac, 'fake_news_detector.pkl')
joblib.dump(tfidf_vectorizer, 'tfidf_vectorizer.pkl')

# Step 7: Load the Model for Future Predictions
# Load the model and vectorizer
pac = joblib.load('fake_news_detector.pkl')
tfidf_vectorizer = joblib.load('tfidf_vectorizer.pkl')

# Step 8: Interactive Prediction
# Ask the user to enter news text
user_input = input("Enter the news article text: ")

# Transform the user's input text
user_tfidf = tfidf_vectorizer.transform([user_input])

# Predict whether the news is fake or true
prediction = pac.predict(user_tfidf)

# Output the result
print(f"The news article is classified as: {prediction[0]}")


                                               title  \
0  New Zealand's populist Peters garners attentio...   
1   Trump Spokeswoman Awkwardly Admits She Has No...   
2   A POEM: ‘Twas The Night Before CNN’s Christmas…’   
3  Senator Grassley expresses reservations on two...   
4  Russia's Putin, Egypt's Sisi to sign nuclear p...   

                                                text       subject  \
0  WELLINGTON (Reuters) - Neither major New Zeala...     worldnews   
1  Donald Trump s surrogates are desperately tryi...          News   
2  ACR s BOILER ROOM presents a Christmas poem Tw...       US_News   
3  WASHINGTON (Reuters) - The Republican chairman...  politicsNews   
4  CAIRO (Reuters) - Russian President Vladimir P...     worldnews   

                 date label  
0  September 4, 2017   true  
1       March 6, 2017  fake  
2   December 25, 2017  fake  
3  December 12, 2017   true  
4  December 11, 2017   true  
Accuracy: 99.48%
Confusion Matrix:
[[4688   28]
 [  19 4245]]
