In [1]:
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report

# Define the categories
categories = ['alt.atheism', 'soc.religion.christian', 'comp.graphics', 'sci.med']

# Load the dataset
data = fetch_20newsgroups(subset='all', categories=categories, remove=('headers', 'footers', 'quotes'))

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)

# Vectorize the text data
vectorizer = TfidfVectorizer(stop_words='english')
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Train the SVM classifier
svm = SVC(kernel='linear')
svm.fit(X_train_vec, y_train)

# Predict on test set
y_pred = svm.predict(X_test_vec)

# Show classification report
print("Classification Report:\n")
print(classification_report(y_test, y_pred, target_names=categories))

Classification Report:

                        precision    recall  f1-score   support

           alt.atheism       0.86      0.80      0.83       175
soc.religion.christian       0.93      0.94      0.94       200
         comp.graphics       0.86      0.93      0.89       200
               sci.med       0.86      0.83      0.85       177

              accuracy                           0.88       752
             macro avg       0.88      0.88      0.88       752
          weighted avg       0.88      0.88      0.88       752

