In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import itertools
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

In [None]:
#importing data
data = pd.read_csv("../input/fake-news-detection/data.csv")

In [None]:
data.shape
data.head(2)

In [None]:
sns.countplot(data.isna().sum())

In [None]:
data = data.dropna()

In [None]:
sns.countplot(data.isna().sum())

We will shuffle the data to prevent bias:

In [None]:
from sklearn.utils import shuffle
data = shuffle(data)
data = data.reset_index(drop=True)

Removing the title (we will only use the text):

In [None]:
data.drop(["URLs"],axis=1,inplace=True)

In [None]:
data['Headline'] = data['Headline'].apply(lambda x: x.lower())

In [None]:
result = data['Label']

In [None]:
data.isnull().sum()


Split the dataset into training and testing sets.

In [None]:
x_train,x_test,y_train,y_test=train_test_split(data['Body'], result, test_size=0.2, random_state=7)

In [None]:
x_train[0]

 initialize a TfidfVectorizer

In [None]:
#DataFlair - Initialize a TfidfVectorizer
tfidf_vectorizer=TfidfVectorizer(stop_words='english', max_df=0.7)
#DataFlair - Fit and transform train set, transform test set
tfidf_train=tfidf_vectorizer.fit_transform(x_train) 
tfidf_test=tfidf_vectorizer.transform(x_test)

In [None]:
#Initialize a PassiveAggressiveClassifier
pac=PassiveAggressiveClassifier(max_iter=50)
pac.fit(tfidf_train,y_train)

In [None]:
y_pred=pac.predict(tfidf_test)
score=accuracy_score(y_test,y_pred)
print(f'Accuracy: {round(score*100,2)}%')

In [None]:
#DataFlair - Build confusion matrix
confusion_matrix(y_test,y_pred, labels=[0,1])

In [None]:
def fake_news_det(news):
    input_data = [news]
    vectorized_input_data = tfidf_vectorizer.transform(input_data)
    prediction = pac.predict(vectorized_input_data)
    print(prediction)

In [None]:
fake_news_det('U.S. Secretary of State John F. Kerry said Monday that he will stop in Paris later this week, amid criticism that no top American officials attended Sundayâ€™s unity march against terrorism.')

In [None]:
fake_news_det("""Go to Article 
President Barack Obama has been campaigning hard for the woman who is supposedly going to extend his legacy four more years. The only problem with stumping for Hillary Clinton, however, is sheâ€™s not exactly a candidate easy to get too enthused about.  """)

In [None]:
import pickle
pickle.dump(pac,open('model.h5', 'wb'))

In [None]:
# load the model from disk
loaded_model = pickle.load(open('./model.h5', 'rb'))

In [None]:
def fake_news_det1(news):
    input_data = [news]
    vectorized_input_data = tfidf_vectorizer.transform(input_data)
    prediction = loaded_model.predict(vectorized_input_data)
    if prediction
    print(prediction)

In [None]:
fake_news_det1("""Go to Article 
President Barack Obama has been campaigning hard for the woman who is supposedly going to extend his legacy four more years. The only problem with stumping for Hillary Clinton, however, is sheâ€™s not exactly a candidate easy to get too enthused about.  """)

In [None]:
fake_news_det1("""U.S. Secretary of State John F. Kerry said Monday that he will stop in Paris later this week, amid criticism that no top American officials attended Sundayâ€™s unity march against terrorism.""")

In [None]:
fake_news_det('''U.S. Secretary of State John F. Kerry said Monday that he will stop in Paris later this week, amid criticism that no top American officials attended Sundayâ€™s unity march against terrorism.''')

In [None]:
fake_news_det("""The second Covid-19 wave in India is now on the "downswing," the Centre said on Thursday, highlighting that the current number of active cases is still "very high" and advised states and Union territories (UTs) to not let down their guards.""")

In [None]:
fake_news_det("potatoes are good for digestion")