In [1]:
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

categories = ['alt.atheism', 'rec.motorcycles', 'comp.graphics', 'sci.med']
newsgroups = fetch_20newsgroups(subset='all', categories=categories, remove=('headers', 'footers', 'quotes'))

X = TfidfVectorizer(stop_words='english').fit_transform(newsgroups.data)
y = newsgroups.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

for name, model in [("Naïve Bayes", MultinomialNB()), ("SVM", SVC(kernel='linear'))]:
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f"\n=== {name} Classifier Performance ===")
    print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
    print("Classification Report:\n", classification_report(y_test, y_pred, target_names=categories))


=== Naïve Bayes Classifier Performance ===
Accuracy: 0.9069
Classification Report:
                  precision    recall  f1-score   support

    alt.atheism       0.97      0.84      0.90       174
rec.motorcycles       0.94      0.93      0.93       192
  comp.graphics       0.84      0.96      0.90       186
        sci.med       0.90      0.90      0.90       200

       accuracy                           0.91       752
      macro avg       0.91      0.91      0.91       752
   weighted avg       0.91      0.91      0.91       752


=== SVM Classifier Performance ===
Accuracy: 0.9029
Classification Report:
                  precision    recall  f1-score   support

    alt.atheism       0.95      0.84      0.89       174
rec.motorcycles       0.99      0.90      0.94       192
  comp.graphics       0.81      0.95      0.87       186
        sci.med       0.89      0.92      0.90       200

       accuracy                           0.90       752
      macro avg       0.91      0.9