In [3]:
import pandas as pd

In [4]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import PassiveAggressiveClassifier

In [5]:
df_fake = pd.read_csv("Fake.csv")
df_real = pd.read_csv("True.csv")

In [10]:
# Add labels
df_fake["label"] = "FAKE ❌"
df_real["label"] = "REAL ✅"

In [11]:
# Combine datasets
df = pd.concat([df_fake, df_real], ignore_index=True)

# Shuffle the data
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

df_fake.head()

Unnamed: 0,title,text,subject,date,label
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017",FAKE ❌
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017",FAKE ❌
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017",FAKE ❌
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017",FAKE ❌
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017",FAKE ❌


In [12]:
df_real.head()

Unnamed: 0,title,text,subject,date,label
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017",REAL ✅
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017",REAL ✅
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017",REAL ✅
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017",REAL ✅
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017",REAL ✅


In [13]:
df_fake.describe()

Unnamed: 0,title,text,subject,date,label
count,23481,23481.0,23481,23481,23481
unique,17903,17455.0,6,1681,1
top,MEDIA IGNORES Time That Bill Clinton FIRED His...,,News,"May 10, 2017",FAKE ❌
freq,6,626.0,9050,46,23481


In [14]:
df_real.describe()

Unnamed: 0,title,text,subject,date,label
count,21417,21417,21417,21417,21417
unique,20826,21192,2,716,1
top,Factbox: Trump fills top jobs for his administ...,(Reuters) - Highlights for U.S. President Dona...,politicsNews,"December 20, 2017",REAL ✅
freq,14,8,11272,182,21417


In [15]:
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
X = vectorizer.fit_transform(df['text'])  # Or df['title'] if requried 
y = df['label']

In [17]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [18]:
model = PassiveAggressiveClassifier(max_iter=50) #using PassiveaggressiveClassifer insted of Logestic classifer
model.fit(X_train, y_train)

In [19]:
y_pr=model.predict(X_test)

In [20]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

print("Accuracy:\n", accuracy_score(y_test, y_pr))
print("\n")
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pr))


Accuracy:
 0.9929844097995546


Confusion Matrix:
 [[4681   29]
 [  34 4236]]


In [21]:
#function for checking news is real or fake
def predict_news(title, text, vectorizer, model):
    # Combine title and text 
    combined_input = [title + " " + text]
    
    # transformer is used because the vectorizer is already trained
    vectorized_input = vectorizer.transform(combined_input)
    
    # Predict 
    prediction = model.predict(vectorized_input)
    
    return prediction[0]  #predicts wether news is real for fake


In [22]:
#Example
title = "Let it be known:New government reforms introduced"

text = "Reform is in the air. The government has introduced an economic programme..."

combined_input = [title + " " + text]
combined_vector = vectorizer.transform(combined_input)
prediction = model.predict(combined_vector)
print("Prediction:", prediction[0])


Prediction: REAL ✅
