<a href="https://colab.research.google.com/github/rohan00112233/Django_Chatbot_project/blob/master/Fake_News_Detection1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# ===================================================================
# Step 1: Import Libraries and Load Data
# ===================================================================
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Load the datasets from your uploaded files
try:
    df_true = pd.read_csv('True.csv')
    df_fake = pd.read_csv('Fake.csv')
    print("Files 'True.csv' and 'Fake.csv' loaded successfully!")
    print("-" * 50)
except FileNotFoundError:
    print("Error: Make sure you have uploaded 'True.csv' and 'Fake.csv' using the file browser on the left.")

# ===================================================================
# Step 2: Preprocessing and Merging
# ===================================================================
# Add a 'label' column: 1 for real, 0 for fake
df_true['label'] = 1
df_fake['label'] = 0

# Combine the two dataframes
df = pd.concat([df_true, df_fake], ignore_index=True)

# Shuffle the dataset
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

# Combine title and text into a single 'content' feature
df['content'] = df['title'] + ' ' + df['text']
print("Dataframes combined and preprocessed.")
print("-" * 50)


# ===================================================================
# Step 3: Define Features and Split Data
# ===================================================================
X = df['content']
y = df['label']

# Split data into 80% training and 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f"Training data size: {X_train.shape[0]} samples")
print(f"Testing data size: {X_test.shape[0]} samples")
print("-" * 50)

# ===================================================================
# Step 4: Vectorize Text with TF-IDF
# ===================================================================
# Initialize the vectorizer
tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)

# Vectorize the training and testing data
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)
print("Text data vectorized using TF-IDF.")
print("-" * 50)

# ===================================================================
# Step 5: Train the Logistic Regression Model
# ===================================================================
# Initialize and train the model
model = LogisticRegression(max_iter=1000)
print("Training the Logistic Regression model...")
model.fit(X_train_tfidf, y_train)
print("Model training complete!")
print("-" * 50)

# ===================================================================
# Step 6: Evaluate the Model
# ===================================================================
# Make predictions on the test set
y_pred = model.predict(X_test_tfidf)

# Calculate and print the results
accuracy = accuracy_score(y_test, y_pred)
print(f"✅ Model Accuracy: {accuracy * 100:.2f}%")
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['Fake News (0)', 'Real News (1)']))

Files 'True.csv' and 'Fake.csv' loaded successfully!
--------------------------------------------------
Dataframes combined and preprocessed.
--------------------------------------------------
Training data size: 35918 samples
Testing data size: 8980 samples
--------------------------------------------------
Text data vectorized using TF-IDF.
--------------------------------------------------
Training the Logistic Regression model...
Model training complete!
--------------------------------------------------
✅ Model Accuracy: 98.62%

Classification Report:
               precision    recall  f1-score   support

Fake News (0)       0.99      0.98      0.99      4669
Real News (1)       0.98      0.99      0.99      4311

     accuracy                           0.99      8980
    macro avg       0.99      0.99      0.99      8980
 weighted avg       0.99      0.99      0.99      8980

