In [None]:
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import fetch_20newsgroups
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Load data
categories = ["comp.graphics", "sci.space"]
dataset = fetch_20newsgroups(subset="train", categories=categories)
# Split data into training and test sets
X = dataset.data
y = dataset.target
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Define the pipeline
pipeline = Pipeline([("tfidf", TfidfVectorizer()), ("clf", SVC())])
# Define parameters for grid search
parameters = {
    "tfidf__ngram_range": [(1, 1), (1, 2)],
    "tfidf__min_df": [2, 3],
    "clf__kernel": ["linear", "rbf"],
    "clf__C": [1, 10],
}
# Perform grid search
grid_search = GridSearchCV(pipeline, parameters, cv=5)
grid_search.fit(X_train, y_train)
# Evaluate performance on test set
predictions = grid_search.predict(X_test)
print(classification_report(y_test, predictions))