In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
import re
from sklearn.linear_model import PassiveAggressiveClassifier
import string
import urllib
from newspaper import Article
from wordcloud import WordCloud
from sklearn.metrics import confusion_matrix, accuracy_score


In [2]:
data=pd.read_csv(r'E:\Downloads\Kaggle Datasets\fake-news\train.csv')
data=data.dropna()
#since null value rows are dropped indexes needs to be reset
df=data.copy()
df.reset_index(inplace=True)


In [3]:
conversion_dict = {0: 'Real', 1: 'Fake'}
df['label'] = df['label'].replace(conversion_dict)

In [4]:
df

Unnamed: 0,index,id,title,author,text,label
0,0,0,House Dem Aide: We Didn’t Even See Comey’s Let...,Darrell Lucus,House Dem Aide: We Didn’t Even See Comey’s Let...,Fake
1,1,1,"FLYNN: Hillary Clinton, Big Woman on Campus - ...",Daniel J. Flynn,Ever get the feeling your life circles the rou...,Real
2,2,2,Why the Truth Might Get You Fired,Consortiumnews.com,"Why the Truth Might Get You Fired October 29, ...",Fake
3,3,3,15 Civilians Killed In Single US Airstrike Hav...,Jessica Purkiss,Videos 15 Civilians Killed In Single US Airstr...,Fake
4,4,4,Iranian woman jailed for fictional unpublished...,Howard Portnoy,Print \nAn Iranian woman has been sentenced to...,Fake
...,...,...,...,...,...,...
18280,20795,20795,Rapper T.I.: Trump a ’Poster Child For White S...,Jerome Hudson,Rapper T. I. unloaded on black celebrities who...,Real
18281,20796,20796,"N.F.L. Playoffs: Schedule, Matchups and Odds -...",Benjamin Hoffman,When the Green Bay Packers lost to the Washing...,Real
18282,20797,20797,Macy’s Is Said to Receive Takeover Approach by...,Michael J. de la Merced and Rachel Abrams,The Macy’s of today grew from the union of sev...,Real
18283,20798,20798,"NATO, Russia To Hold Parallel Exercises In Bal...",Alex Ansary,"NATO, Russia To Hold Parallel Exercises In Bal...",Fake


In [5]:
ps = PorterStemmer()

def stemming(corpus):
    # Pick all alphabet characters - lowercase and uppercase...all others such as numbers and punctuations will be removed. Numbers or punctuations will be replaced by a whitespace
    stemmed_corpus = re.sub('[^a-zA-Z]',' ',corpus)
    
    # Converting all letters to lowercase 
    stemmed_corpus = stemmed_corpus.lower()
    
    # Converting all to a splitted case or a list
    stemmed_corpus = stemmed_corpus.split()
    
    # Applying stemming, so we get the root words wherever possible + remove stopwords as well
    stemmed_corpus = [ps.stem(word) for word in stemmed_corpus] #if not word in stopwords.words('english')]
    
    # Join all the words in final content
    stemmed_corpus = ' '.join(stemmed_corpus)
    return stemmed_corpus


In [44]:
df['text'] = df['text'].apply(stemming)

In [6]:
df['text']

0        House Dem Aide: We Didn’t Even See Comey’s Let...
1        Ever get the feeling your life circles the rou...
2        Why the Truth Might Get You Fired October 29, ...
3        Videos 15 Civilians Killed In Single US Airstr...
4        Print \nAn Iranian woman has been sentenced to...
                               ...                        
18280    Rapper T. I. unloaded on black celebrities who...
18281    When the Green Bay Packers lost to the Washing...
18282    The Macy’s of today grew from the union of sev...
18283    NATO, Russia To Hold Parallel Exercises In Bal...
18284      David Swanson is an author, activist, journa...
Name: text, Length: 18285, dtype: object

In [7]:
x_train,x_test,y_train,y_test=train_test_split(df['text'], df['label'], test_size=0.20, random_state=7, shuffle=True)
tfidf_vectorizer=TfidfVectorizer(stop_words='english', max_df=0.75)

In [8]:
vec_train=tfidf_vectorizer.fit_transform(x_train.values.astype('U')) 
vec_test=tfidf_vectorizer.transform(x_test.values.astype('U'))

In [9]:
pac=PassiveAggressiveClassifier(max_iter=50)
pac.fit(vec_train,y_train)

PassiveAggressiveClassifier(max_iter=50)

In [12]:
y_predpac=pac.predict(vec_test)
scorepac=accuracy_score(y_test,y_predpac)
print(f'Passive Aggressive Accuracy: {round(scorepac*100,2)}%')

Passive Aggressive Accuracy: 96.5%


In [13]:
def findlabelpac(newtext):
    vec_newtestpac=tfidf_vectorizer.transform([newtext])
    y_pred1pac=pac.predict(vec_newtestpac)
    return y_pred1pac[0]

In [22]:

url = "https://www.nytimes.com/2023/02/19/us/chris-hinds-wheelchair-denver-city-council.html"
url = urllib.parse.unquote(url)
article = Article(str(url))
article.download()
article.parse()
article.nlp()
title = article.title
news = article.text
keywords = article.keywords


In [23]:
news

'A debate for Denver’s District 10 City Council seat was scheduled to soon begin, but as the incumbent, Chris Hinds, arrived, organizers appeared puzzled when they saw him in a wheelchair, which he has used since 2008.\n\nThe debate stage at Cleo Parker Robinson Dance, which was sponsoring the event, did not have an accessible option for wheelchairs. Realizing it would be difficult to lift the 400-pound wheelchair and the 200-pound councilman onto the stage, organizers offered a suggestion, Mr. Hinds said: Could he get himself onto the stage and then they could lift the wheelchair?\n\nWith his three opponents already there, and roughly 100 attendees in their seats, Mr. Hinds — who is paralyzed from the chest down from an accident while cycling — began to lift himself with his hands onto the stage.\n\n“I’m thinking, ‘I look like a circus monkey. I am a circus monkey,’” Mr. Hinds said on Saturday. “And all these folks who are here to determine who their next City Council member is — they

In [24]:
news = stemming(news)
news

'a debat for denver s district citi council seat wa schedul to soon begin but as the incumb chri hind arriv organ appear puzzl when they saw him in a wheelchair which he ha use sinc the debat stage at cleo parker robinson danc which wa sponsor the event did not have an access option for wheelchair realiz it would be difficult to lift the pound wheelchair and the pound councilman onto the stage organ offer a suggest mr hind said could he get himself onto the stage and then they could lift the wheelchair with hi three oppon alreadi there and roughli attende in their seat mr hind who is paralyz from the chest down from an accid while cycl began to lift himself with hi hand onto the stage i m think i look like a circu monkey i am a circu monkey mr hind said on saturday and all these folk who are here to determin who their next citi council member is they see me just flounder'

In [25]:
labeltestpac = findlabelpac(news)
print("Passive Aggressive Prediction:",labeltestpac)

Passive Aggressive Prediction: Real
