### Import Necessary Libraries
Let's start by importing the necessary libraries for data processing and modeling.

In [3]:
import nltk
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

### Load and Preprocess Data
We'll load the 20 Newsgroups dataset and preprocess it for classification. Then converts text documents into a TF-IDF (Term Frequency-Inverse Document Frequency) matrix representation, where each row corresponds to a document and each column to a word feature.

In [4]:
# Load the 20 Newsgroups dataset
data = fetch_20newsgroups(subset="all", categories=None, shuffle=True, random_state=42)

# Preprocess the data
vectorizer = TfidfVectorizer(max_features=10000, stop_words="english")
X = vectorizer.fit_transform(data.data)
y = data.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Train the Classifier
Next, we'll train a Logistic Regression classifier on the preprocessed data.

In [5]:
# Train a Logistic Regression classifier
classifier = LogisticRegression(max_iter=1000)
classifier.fit(X_train, y_train)

### Evaluate the Model
We'll evaluate the trained classifier on the test set.

In [6]:
# Evaluate the classifier
y_pred = classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Display classification report
print(classification_report(y_test, y_pred, target_names=data.target_names))

Accuracy: 0.89
                          precision    recall  f1-score   support

             alt.atheism       0.87      0.91      0.89       151
           comp.graphics       0.76      0.83      0.80       202
 comp.os.ms-windows.misc       0.82      0.82      0.82       195
comp.sys.ibm.pc.hardware       0.68      0.73      0.71       183
   comp.sys.mac.hardware       0.87      0.84      0.85       205
          comp.windows.x       0.88      0.82      0.85       215
            misc.forsale       0.83      0.79      0.81       193
               rec.autos       0.91      0.92      0.92       196
         rec.motorcycles       0.96      0.95      0.95       168
      rec.sport.baseball       0.94      0.96      0.95       211
        rec.sport.hockey       0.95      0.97      0.96       198
               sci.crypt       0.98      0.94      0.96       201
         sci.electronics       0.84      0.86      0.85       202
                 sci.med       0.95      0.94      0.94     

### Make Predictions
We'll define a function to predict the category of new text inputs using the trained model.

In [7]:
def predict_topic(text):
    text_vectorized = vectorizer.transform([text])
    predicted_category = data.target_names[classifier.predict(text_vectorized)[0]]
    return predicted_category

Finally, let's test our prediction function with sample text.

In [8]:
sample_text = "The latest technology trends in machine learning and artificial intelligence"
predicted_topic = predict_topic(sample_text)
print(f"Predicted Topic: {predicted_topic}")

Predicted Topic: sci.med
