In [None]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline

# Sample data with reviews and their corresponding sentiment labels
data = [
    ("This is a great movie", "pos"),
    ("I hated this movie", "neg"),
    ("This was a great experience", "pos"),
    ("I did not like the movie", "neg"),
    ("This is a horrible movie", "neg")
]

# Split data into training sentences and labels
train_texts, train_labels = zip(*data)

# Create a pipeline that performs two main tasks:
# 1. CountVectorizer(): Converts a collection of text documents to a matrix of token counts.
#    This implementation produces a sparse representation of the counts using scipy.sparse.csr_matrix.

# 2. MultinomialNB(): Implements the naive Bayes algorithm for multinomially distributed data,
#    and is one of the two classic naive Bayes variants used in text classification.
# The pipeline object packages these two steps into a single unit. It behaves like a compound classifier:

# First, the text data is transformed into a format that the algorithm can work with (CountVectorizer),
# and then the transformed data is fed into the Naive Bayes classifier (MultinomialNB).

model = make_pipeline(CountVectorizer(), MultinomialNB())

# Training the model with the training texts and labels.
# The pipeline first transforms the text using CountVectorizer and then fits the Naive Bayes classifier
# with the vectorized text.
model.fit(train_texts, train_labels)

# Predicting the sentiment of new texts
test_texts = ["This movie was good"]
predictions = model.predict(test_texts)

# Output the prediction
print(f"The review '{test_texts[0]}' is predicted to be {predictions[0]}")
