In [19]:
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline

# Define the dataset
documents = [
    "urgent money making offer",   # Spam
    "limited time emergency sale", # Spam
    "normal email from friend",    # Not Spam
    "hello, let's catch up",       # Not Spam
    "special offer just for you",  # Spam
    "buy now or miss out",         # Spam
    "meeting agenda",              # Not Spam
    "weekly newsletter",           # Not Spam
    "financial report",            # Not Spam
    "update on project"            # Not Spam
]

# Labels: 1 for spam, 0 for not spam
labels = [1, 1, 0, 0, 1, 1, 0, 0, 0, 0]

# Create the model pipeline
model = make_pipeline(
    CountVectorizer(),
    MultinomialNB()
)

# Fit the model with the example data
model.fit(documents, labels)

# Define a new email to classify
new_email = ["emergency money"]

# Predict the class of the new email
prediction = model.predict(new_email)

# Calculate the probabilities
probabilities = model.predict_proba(new_email)

# Print the prediction and probabilities
print("Prediction (1 for spam, 0 for not spam):", prediction[0])
print("Probabilities (not spam, spam):", probabilities)


Prediction (1 for spam, 0 for not spam): 1
Probabilities (not spam, spam): [[0.28049793 0.71950207]]


In [22]:
BernoulliNB_model = make_pipeline(
    CountVectorizer(binary=True),  # Use binary=True to binarize the features
    BernoulliNB()
)

# Fit the model with the example data
BernoulliNB_model.fit(documents, labels)

# Predict the class of the new email
BernoulliNB_prediction = BernoulliNB_model.predict(new_email)

# Calculate the probabilities
BernoulliNB_probabilities = BernoulliNB_model.predict_proba(new_email)

# Print the prediction and probabilities
print("Prediction (1 for spam, 0 for not spam):", BernoulliNB_prediction[0])
print("Probabilities (not spam, spam):", BernoulliNB_probabilities[0])

Prediction (1 for spam, 0 for not spam): 0
Probabilities (not spam, spam): [0.73484902 0.26515098]


In [24]:
from sklearn.naive_bayes import ComplementNB

ComplementNB_model = make_pipeline(
    CountVectorizer(binary=True),  # Use binary=True to binarize the features
    ComplementNB()
)

ComplementNB_model.fit(documents, labels)

# Predict the class of the new email
ComplementNB_prediction = ComplementNB_model.predict(new_email)

# Calculate the probabilities
ComplementNB_probabilities = ComplementNB_model.predict_proba(new_email)

# Print the prediction and probabilities
print("Prediction (1 for spam, 0 for not spam):", ComplementNB_prediction[0])
print("Probabilities (not spam, spam):", ComplementNB_probabilities[0])

Prediction (1 for spam, 0 for not spam): 1
Probabilities (not spam, spam): [0.20628624 0.79371376]
