In [None]:
# importing important library 
import numpy as np
import pandas as pd 
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, f1_score, classification_report, accuracy_score
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier

In [None]:
# importing dataset
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
# Reading data from True.csv
real_news_data = pd.read_csv("/kaggle/input/fake-and-real-news-dataset/True.csv")
real_news_data

In [None]:
# Reading data from Fake.csv
fake_news_data = pd.read_csv("/kaggle/input/fake-and-real-news-dataset/Fake.csv")
fake_news_data

In [None]:
# Adding extra coloum of label
real_news_data['label'] = 0
fake_news_data['label'] = 1

# Concatenating real and fake data
df = pd.concat([real_news_data, fake_news_data], ignore_index=True)
df

In [None]:
# Checking for any null value in data
df.isnull().sum()

In [None]:
labels = df.label
labels.head()

In [None]:
# splitting test and train data
x_train, x_test, y_train, y_test = train_test_split(df["text"],labels,test_size=0.2)

In [None]:
x_train.head()

In [None]:
# NLP
vector = TfidfVectorizer(stop_words="english",max_df=0.7)

In [None]:
tf_train = vector.fit_transform(x_train)
tf_test = vector.transform(x_test)

In [None]:
# Training model using Passive Aggressive Classifier
pac = PassiveAggressiveClassifier(max_iter=50)
pac.fit(tf_train,y_train)

In [None]:
y_pred = pac.predict(tf_test)
y_pred

In [None]:
# Scores
f1_score = f1_score(y_test, y_pred, average='weighted')
accuracy = accuracy_score(y_test, y_pred)

print("Train data Score: ", pac.score(tf_train, y_train))
print("Test data Score:", pac.score(tf_test, y_test))
print("F1 Score:", f1_score)
print("Accuracy Score:", accuracy)