In [4]:
from sklearn.datasets import fetch_20newsgroups
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn import metrics

# Load the 20 newsgroups dataset
newsgroups = fetch_20newsgroups(subset='all', remove=('headers', 'footers', 'quotes'))

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(newsgroups.data, newsgroups.target,
                                                    test_size=0.2, random_state=42)

# Vectorize the text data
vectorizer = CountVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Train the Naive Bayes Classifier
nb_classifier = MultinomialNB()
nb_classifier.fit(X_train_vec, y_train)

# Make predictions
y_pred = nb_classifier.predict(X_test_vec)

# Calculate Accuracy, Precision, and Recall
accuracy = metrics.accuracy_score(y_test, y_pred)
precision = metrics.precision_score(y_test, y_pred, average='weighted', zero_division=0)
recall = metrics.recall_score(y_test, y_pred, average='weighted', zero_division=0)

# Print the results
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")

# Additional evaluation: Confusion Matrix and Classification Report
conf_matrix = metrics.confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Classification report gives more detailed metrics for each class
class_report = metrics.classification_report(y_test, y_pred, target_names=newsgroups.target_names)
print("Classification Report:")
print(class_report)


Accuracy: 0.6175
Precision: 0.6960
Recall: 0.6175
Confusion Matrix:
[[ 38   1   0   0   0   0   0   1   5   0   0   1   0   1   3  75   2  13
    8   3]
 [  1 151   0   9   1   6   1   1   6   0   1   8   0   2   5   6   1   2
    1   0]
 [  1  50   8  48   5  51   0   0   7   0   0  13   2   0   1   5   1   0
    3   0]
 [  0  16   0 134   6  10   2   0   1   0   0   5   2   1   0   4   0   1
    1   0]
 [  2  15   1  16 119   2   2   1  12   0   0  13   3   5   2   7   0   2
    3   0]
 [  0  29   0   5   0 171   0   0   2   0   0   0   0   1   1   4   0   1
    1   0]
 [  0  12   0  26   5   3 103   4   2   0   1  11   6   3   4   5   1   3
    4   0]
 [  1   3   0   0   0   1   2 124  10   0   1   8   3   0   4   8   1  12
   18   0]
 [  0   2   0   0   0   1   4   8  98   1   1   5   0   1   3  11   3  17
   13   0]
 [  0   2   0   0   0   0   0   0  10 142   6   2   0   1   0  23   0  15
   10   0]
 [  1   1   0   0   0   0   0   0   5   0 159   2   0   2   0  13   0   5
   10   