In [4]:
# ✅ Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# ✅ Load uploaded CSV files
fake_df = pd.read_csv("Fake.csv")
real_df = pd.read_csv("True.csv")

# ✅ Add labels: 0 = Fake, 1 = Real
fake_df["label"] = 0
real_df["label"] = 1

# ✅ Use only titles and labels
fake_df = fake_df[["title", "label"]]
real_df = real_df[["title", "label"]]

# ✅ Combine and shuffle
data = pd.concat([fake_df, real_df])
data = data.dropna().sample(frac=1).reset_index(drop=True)

# ✅ Features and labels
X = data["title"]
y = data["label"]

# ✅ Vectorize text using TF-IDF
vectorizer = TfidfVectorizer(stop_words="english", max_df=0.7)
X_vectorized = vectorizer.fit_transform(X)

# ✅ Split data
X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y, test_size=0.2, random_state=42)

# ✅ Train model
model = LogisticRegression()
model.fit(X_train, y_train)

# ✅ Predict and accuracy
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print("Accuracy:", round(acc * 100, 2), "%")

# ✅ Custom prediction
def predict_fake_news(text):
    vec = vectorizer.transform([text])
    pred = model.predict(vec)
    return "REAL NEWS ✅" if pred[0] == 1 else "FAKE NEWS ⚠️"

# ✅ User input (works fine in Colab)
user_input = input("Enter a news headline to check: ")
print("Prediction:", predict_fake_news(user_input))


Accuracy: 94.57 %
Enter a news headline to check: Government announces free education for all
Prediction: FAKE NEWS ⚠️
