In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns 

In [None]:
fake=pd.read_csv("data/Fake.csv.zip")
true=pd.read_csv("data/True.csv.zip")

fake['label']=0
true['label']=1

df=pd.concat([fake,true],axis=0)
df=df.sample(frac=1,random_state=42).reset_index(drop=True)

df.head()

In [None]:
df.shape

In [None]:
df.info()

BASIC DATA CLEANING

In [None]:
df.isnull().sum()

In [None]:
df['label'].value_counts()

In [None]:
sns.countplot(x='label',data=df)
plt.title("Fake vs Real Distrivution")
plt.show()

In [None]:
df['content']=df['title']+ " "+ df['text']
df=df[['content','label']]
df.head()

TEXT PREPROCESSING

In [None]:
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

nltk.download('stopwords')
nltk.download('wordnet')

In [None]:
lemmatizer=WordNetLemmatizer()
stop_word=set(stopwords.words("english"))

def clean_text(text):
    text=text.lower()
    text=re.sub(r'[^a-zA-Z]', ' ', text)
    words=text.split()
    filter_word=[]
    for word in words:
        if word not in stop_word:
            filter_word.append(word)
    lemmatize_word=[]
    for word in filter_word:
        lemmatize_word.append(lemmatizer.lemmatize(word))
    
    return " ".join(lemmatize_word)

In [None]:
df['content']=df['content'].apply(clean_text)

Train-Test Split

In [None]:
from sklearn.model_selection import train_test_split

X=df['content']
y=df['label']

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)
print(X_train.shape)
print(X_test.shape)

Convert Text to Numbers (TF-IDF)

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(max_features=5000)

X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)


Training Model

In [None]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(X_train_tfidf, y_train)

Evaluate Model

In [None]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

y_pred = model.predict(X_test_tfidf)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


In [None]:
from sklearn.metrics import confusion_matrix
cm=confusion_matrix(y_test,y_pred)
plt.figure(figsize=(5,4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()

Improve Model (SVM)

In [None]:
from sklearn.svm import LinearSVC
svm_model=LinearSVC()
svm_model.fit(X_train_tfidf,y_train)

y_pred_svm=svm_model.predict(X_test_tfidf)
print("Accuracy:", accuracy_score(y_test, y_pred_svm))
print("\nClassification Report:\n", classification_report(y_test, y_pred_svm))

Comparing Model

In [None]:
from sklearn.metrics import precision_score,recall_score,f1_score
result={
    "Model":["LogisticRegression","SVM"],
    "Accuracy": [
        accuracy_score(y_test, y_pred),
        accuracy_score(y_test, y_pred_svm)
    ],
    "Percision": [
        precision_score(y_test, y_pred, ),
        precision_score(y_test, y_pred_svm, )
    ],
    "Recall": [
        recall_score(y_test, y_pred, ),
        recall_score(y_test, y_pred_svm, )
    ],
    "F1-Score": [
        f1_score(y_test, y_pred, ),
        f1_score(y_test, y_pred_svm, )
    ]
}
comparison_df = pd.DataFrame(result).set_index("Model")
comparison_df


In [None]:
def predict_news(news_text):
    
    # Step 1: Clean the text (IMPORTANT — same preprocessing)
    cleaned_text = clean_text(news_text)
    
    # Step 2: Convert to TF-IDF
    vectorized_text = vectorizer.transform([cleaned_text])
    
    # Step 3: Predict
    prediction = svm_model.predict(vectorized_text)
    
    # Step 4: Convert label to readable form
    if prediction[0] == 0:
        return "Fake News"
    else:
        return "Real News"


In [None]:
sample_news ="The government announced a new economic reform plan to boost employment across the country."
print(predict_news(sample_news))


In [None]:
sample_news = """
President Donald Trump was sworn in as the 45th President of the United States on January 20, 2017, following his victory in the 2016 presidential election. The inauguration ceremony took place at the U.S. Capitol in Washington, D.C., where Trump delivered his inaugural address outlining his administration’s priorities, including economic reform, immigration policy, and national security.
"""


print(predict_news(sample_news))