In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

In [None]:
df = pd.read_csv('True.csv')
_df = pd.read_csv('Fake.csv')
df
_df

In [None]:
print("True news shape:", df.shape)
print("Fake news shape:", _df.shape)
print("True news columns:", df.columns.tolist())
print("Fake news columns:", _df.columns.tolist())

In [None]:
df['label'] = 'true'
_df['label'] = 'fake'
a_df = pd.concat([df, _df], ignore_index=True)
a_df.drop_duplicates(inplace=True)
print("\nCombined dataset shape:", df.shape)
print(df['label'].value_counts())

In [None]:
df.head()

In [None]:
plt.figure()
df['label'].value_counts().plot(kind='bar')
plt.title('Class Distribution')
plt.xlabel('Label')
plt.ylabel('Count')
plt.show()

In [None]:
a_df['text'] = a_df['text'].astype(str)
X = a_df['text']
y = a_df['label']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [None]:
bow_pipeline = Pipeline([
    ('cv', CountVectorizer()),
    ('clf', LogisticRegression(max_iter=1000)),
])

bow_pipeline.fit(X_train, y_train)
y_pred = bow_pipeline.predict(X_test)

# Evaluation
print("\nAccuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


In [None]:
pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(stop_words='english')),
    ('clf', LogisticRegression(max_iter=1000)),
])

pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)

# Evaluation
print("\nAccuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

In [None]:
import joblib

joblib.dump(pipeline, 'Fake_news_detection.pkl')