In [1]:
# import necessary libraries
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score

In [2]:
# load the dataset into a pandas dataframe
df = pd.read_csv('/content/drive/MyDrive/news.csv')

In [3]:
# separate the target variable from the independent variables
X = df['text']
y = df['label']

In [4]:
# preprocess the data by removing stop words, converting to lowercase, and removing punctuation and special characters
tfidf = TfidfVectorizer(stop_words='english', lowercase=True, strip_accents='ascii', token_pattern=r'\b[a-zA-Z]{3,}\b', max_df=0.7)

In [5]:
# split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# transform the text data using TfidfVectorizer
X_train = tfidf.fit_transform(X_train)
X_test = tfidf.transform(X_test)

In [7]:
# train the PassiveAggressiveClassifier on the training data
pac = PassiveAggressiveClassifier(max_iter=50)
pac.fit(X_train, y_train)

In [8]:
# make predictions on the testing data and assess the accuracy of the model
y_pred = pac.predict(X_test)
score = accuracy_score(y_test, y_pred)
print(f'Accuracy: {round(score*100,2)}%')

Accuracy: 93.69%
