In [None]:
import numpy as np
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer  # Fixed typo: "IfidfVectorizer" → "TfidfVectorizer"
from sklearn.model_selection import train_test_split         # Fixed typo: "sk.learn" → "sklearn"
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score


In [None]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [None]:
print(stopwords.words('english'))

['a', 'about', 'above', 'after', 'again', 'against', 'ain', 'all', 'am', 'an', 'and', 'any', 'are', 'aren', "aren't", 'as', 'at', 'be', 'because', 'been', 'before', 'being', 'below', 'between', 'both', 'but', 'by', 'can', 'couldn', "couldn't", 'd', 'did', 'didn', "didn't", 'do', 'does', 'doesn', "doesn't", 'doing', 'don', "don't", 'down', 'during', 'each', 'few', 'for', 'from', 'further', 'had', 'hadn', "hadn't", 'has', 'hasn', "hasn't", 'have', 'haven', "haven't", 'having', 'he', "he'd", "he'll", 'her', 'here', 'hers', 'herself', "he's", 'him', 'himself', 'his', 'how', 'i', "i'd", 'if', "i'll", "i'm", 'in', 'into', 'is', 'isn', "isn't", 'it', "it'd", "it'll", "it's", 'its', 'itself', "i've", 'just', 'll', 'm', 'ma', 'me', 'mightn', "mightn't", 'more', 'most', 'mustn', "mustn't", 'my', 'myself', 'needn', "needn't", 'no', 'nor', 'not', 'now', 'o', 'of', 'off', 'on', 'once', 'only', 'or', 'other', 'our', 'ours', 'ourselves', 'out', 'over', 'own', 're', 's', 'same', 'shan', "shan't", 'she

### Data pre-processing


In [None]:
## loading datasets
data=pd.read_csv('/content/fake_news_dataset_10000.csv')
data.head()

Unnamed: 0,id,title,author,text,label
0,1,Ok member want building realize Mr.,Bobby Harris,Well home health less art until style people. ...,1
1,2,Information seven floor class early west.,Kimberly White,Message break dinner follow. Whatever listen c...,0
2,3,Participant increase spring.,Christina Rodriguez,Nature list item offer. Bring mother growth fu...,0
3,4,Site father end similar husband.,Stephen Sanchez,Chance police station short. Hot month wait li...,0
4,5,Challenge religious place above forward month.,Robert Woods,Among seven cup operation offer up case. Write...,0


In [None]:
data.shape

(10000, 5)

In [None]:
##missing values in datasets
data.isnull().sum()

Unnamed: 0,0
id,0
title,0
author,0
text,0
label,0


In [None]:
#merging the author name and news title
data['content']=data['author']+data['title']
data['content']

Unnamed: 0,content
0,Bobby HarrisOk member want building realize Mr.
1,Kimberly WhiteInformation seven floor class ea...
2,Christina RodriguezParticipant increase spring.
3,Stephen SanchezSite father end similar husband.
4,Robert WoodsChallenge religious place above fo...
...,...
9995,Cynthia RichardsonPhone marriage few forget st...
9996,Marc ByrdLeave enter forward on ago.
9997,Mike CarterBreak decide professor budget resul...
9998,Shannon CookI list may opportunity rule.


In [None]:
#seperating the data and label
x=data.drop(columns='label',axis=1)
y=data['label']

In [None]:
x

Unnamed: 0,id,title,author,text,content
0,1,Ok member want building realize Mr.,Bobby Harris,Well home health less art until style people. ...,Bobby HarrisOk member want building realize Mr.
1,2,Information seven floor class early west.,Kimberly White,Message break dinner follow. Whatever listen c...,Kimberly WhiteInformation seven floor class ea...
2,3,Participant increase spring.,Christina Rodriguez,Nature list item offer. Bring mother growth fu...,Christina RodriguezParticipant increase spring.
3,4,Site father end similar husband.,Stephen Sanchez,Chance police station short. Hot month wait li...,Stephen SanchezSite father end similar husband.
4,5,Challenge religious place above forward month.,Robert Woods,Among seven cup operation offer up case. Write...,Robert WoodsChallenge religious place above fo...
...,...,...,...,...,...
9995,9996,Phone marriage few forget student young special.,Cynthia Richardson,Bring old idea according whatever. Ball kid fa...,Cynthia RichardsonPhone marriage few forget st...
9996,9997,Leave enter forward on ago.,Marc Byrd,Respond pretty age performance drop. Book stil...,Marc ByrdLeave enter forward on ago.
9997,9998,Break decide professor budget result walk.,Mike Carter,Red campaign various daughter. Culture box for...,Mike CarterBreak decide professor budget resul...
9998,9999,I list may opportunity rule.,Shannon Cook,Establish hour improve in. Trial everyone spee...,Shannon CookI list may opportunity rule.


In [None]:
y

Unnamed: 0,label
0,1
1,0
2,0
3,0
4,0
...,...
9995,1
9996,1
9997,1
9998,0


In [None]:
## stemming procedure
stem=PorterStemmer()

In [None]:
def stemming(content):
   stemmed_content = re.sub('[^a-zA-Z]',' ',content)
   stemmed_content= stemmed_content.lower()
   stemmed_content=stemmed_content.split()
   stemmed_contenr=[stem.stem(word) for word in stemmed_content if not word in stopwords.words('english')]
   stemmed_content=' '.join(stemmed_contenr)
   return stemmed_content

In [None]:

data['content'] = data['content'].apply(stemming)

In [None]:
print(data['content'])

0              bobbi harrisok member want build realiz mr
1       kimberli whiteinform seven floor class earli west
2              christina rodriguezparticip increas spring
3           stephen sanchezsit father end similar husband
4         robert woodschalleng religi place forward month
                              ...                        
9995    cynthia richardsonphon marriag forget student ...
9996                      marc byrdleav enter forward ago
9997    mike carterbreak decid professor budget result...
9998                 shannon cooki list may opportun rule
9999                 christian daniel jr nation bodi week
Name: content, Length: 10000, dtype: object


In [None]:
##separating the data and label
x=data['content'].values
y=data['label'].values

In [None]:
x

array(['bobbi harrisok member want build realiz mr',
       'kimberli whiteinform seven floor class earli west',
       'christina rodriguezparticip increas spring', ...,
       'mike carterbreak decid professor budget result walk',
       'shannon cooki list may opportun rule',
       'christian daniel jr nation bodi week'], dtype=object)

In [None]:
y

array([1, 0, 0, ..., 1, 0, 0])

# New Section

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer()  # Fix the typo: IfidfVectorizer ➜ TfidfVectorizer
vectorizer.fit(x)
x = vectorizer.transform(x)


In [None]:
x

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 60701 stored elements and shape (10000, 11371)>

In [99]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=.2,stratify=y,random_state=2)

In [100]:
model=LogisticRegression()

In [101]:
model.fit(x_train,y_train)

In [102]:
#accuracy score
x_train_prediction=model.predict(x_train)

In [111]:
accuracy_train=accuracy_score(x_train_prediction,y_train)

In [112]:
accuracy_train

0.80475

In [105]:
x_test_prediction=model.predict(x_test)

In [106]:
accuracy_test=accuracy_score(x_test_prediction,y_test)

In [107]:
accuracy_test

0.4805

In [108]:
## making and predictive system
x_new=x_test[0]
prediction=model.predict(x_new)
prediction
if prediction[0]==0:
  print('The news is real ')
else:
    print('The news is fake')

The news is real 


In [109]:
y_test[5]

np.int64(1)