In [34]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report
# Sample dataset
documents = [
    ("The team won the championship. What a victory!", "sports"),
    ("The election results are in. The new mayor has been elected.", "politics"),
    ("New technology breakthrough announced. Exciting developments ahead.", "technology"),
    ("The match ended in a draw. Both teams played exceptionally well.", "sports"),
    ("The government announces new policies. Changes ahead for the country.", "politics"),
    ("The latest smartphone was released today. Amazing features!", "technology")
]
# Extract features (text) and labels (categories)
X, y = zip(*documents)

# Vectorize the text data
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(X)
# Create and train the Multinomial Naive Bayes classifier
clf = MultinomialNB()
clf.fit(X, y)
# Example documents for prediction
new_documents = [
    "The team won cricket.",
    "New Mayor in Houston.",
    "iphone smartphone just released."
]

# Vectorize the new documents
X_new = vectorizer.transform(new_documents)

# Make predictions
predicted_categories = clf.predict(X_new)
print(predicted_categories)

# Print predictions
for doc, category in zip(new_documents, predicted_categories):
    print(f"Document: {doc} | Predicted Category: {category}")
# Example true labels for evaluation
true_labels = ["sports", "politics", "technology"]

# Evaluate the model
print("\nClassification Report:")
print(classification_report(true_labels, predicted_categories))

# Make predictions and get probability estimates
predicted_probs = clf.predict_proba(X_new)
predicted_categories = clf.classes_

# Print predictions and probabilities for each category
for i, doc in enumerate(new_documents):
    print(f"\nDocument: {doc}")
    for j, category in enumerate(predicted_categories):
        print(f"Probability of '{category}': {predicted_probs[i][j]}")


['sports' 'politics' 'technology']
Document: The team won cricket. | Predicted Category: sports
Document: New Mayor in Houston. | Predicted Category: politics
Document: iphone smartphone just released. | Predicted Category: technology

Classification Report:
              precision    recall  f1-score   support

    politics       1.00      1.00      1.00         1
      sports       1.00      1.00      1.00         1
  technology       1.00      1.00      1.00         1

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3


Document: The team won cricket.
Probability of 'politics': 0.18395714795456897
Probability of 'sports': 0.7166915094469604
Probability of 'technology': 0.09935134259846995

Document: New Mayor in Houston.
Probability of 'politics': 0.7003056614102969
Probability of 'sports': 0.1421025922146052
Probability of 'technology': 0.1575917463750974

Document: iphon