## Step 2: Apply classical ML models

In [1]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.naive_bayes import MultinomialNB


In [2]:
news = pd.read_csv("data/news.csv")
news.dropna(subset=['text'], inplace=True)

vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(news['text'])


y = news['label']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [3]:
lr = LogisticRegression()

lr.fit(X_train, y_train)

y_pred = lr.predict(X_test)

print(classification_report(y_test, y_pred, digits=5))


              precision    recall  f1-score   support

           0    0.99008   0.98812   0.98910      4546
           1    0.98749   0.98955   0.98852      4308

    accuracy                        0.98882      8854
   macro avg    0.98879   0.98884   0.98881      8854
weighted avg    0.98882   0.98882   0.98882      8854



In [4]:
nb = MultinomialNB()
nb.fit(X_train, y_train)

y_pred = nb.predict(X_test)

print(classification_report(y_test, y_pred, digits=5))


              precision    recall  f1-score   support

           0    0.95791   0.93621   0.94694      4546
           1    0.93426   0.95659   0.94529      4308

    accuracy                        0.94613      8854
   macro avg    0.94608   0.94640   0.94611      8854
weighted avg    0.94640   0.94613   0.94614      8854



In [5]:
rf = RandomForestClassifier()

rf.fit(X_train, y_train)

y_pred = rf.predict(X_test)

print(classification_report(y_test, y_pred, digits=5))


              precision    recall  f1-score   support

           0    0.99317   0.99186   0.99252      4546
           1    0.99142   0.99280   0.99211      4308

    accuracy                        0.99232      8854
   macro avg    0.99230   0.99233   0.99231      8854
weighted avg    0.99232   0.99232   0.99232      8854



In [6]:
svm = SVC(kernel='linear')

svm.fit(X_train, y_train)

y_pred = svm.predict(X_test)

print(classification_report(y_test, y_pred, digits=5))


              precision    recall  f1-score   support

           0    0.99669   0.99450   0.99560      4546
           1    0.99421   0.99652   0.99536      4308

    accuracy                        0.99548      8854
   macro avg    0.99545   0.99551   0.99548      8854
weighted avg    0.99549   0.99548   0.99548      8854

