In [None]:
# Importing necessary libraries
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.linear_model import PassiveAggressiveClassifier
import re
import string
import nltk
nltk.download()
from nltk.corpus import stopwords

# Loading the dataset
df = pd.read_csv('fake_news_dataset.csv')

# Preprocessing the data
def preprocess_text(text):
    # Removing URLs
    text = re.sub(r'http\S+', '', text)
    # Removing punctuation marks
    text = text.translate(str.maketrans('', '', string.punctuation))
    # Converting text to lowercase
    text = text.lower()
    # Tokenizing text
    words = nltk.word_tokenize(text)
    # Removing stop words
    words = [word for word in words if word not in stopwords.words('english')]
    # Joining words back to form text
    text = ' '.join(words)
    return text

df['text'] = df['text'].apply(preprocess_text)

# Creating feature matrix and target vector
tfidf_vectorizer = TfidfVectorizer()
X = tfidf_vectorizer.fit_transform(df['text'])
y = df['label']

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Training the model
pac = PassiveAggressiveClassifier()
pac.fit(X_train, y_train)

# Predicting the labels of the test set
y_pred = pac.predict(X_test)

# Evaluating the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
print("Accuracy:", accuracy)
print("Confusion Matrix:\n", conf_matrix)
