In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

data = {'text': ['I love this product', 'This is the worst experience I have ever had', 'Amazing, I am so happy!',
                 'I am really disappointed with this purchase'],
        'sentiment': [1, 0, 1, 0]}

df = pd.DataFrame(data)
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['sentiment'], test_size=0.5, random_state=42)

# Convert text data to numerical features using TF-IDF
vectorizer = TfidfVectorizer(stop_words='english')
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Create and train a Naive Bayes classifier
model = MultinomialNB().fit(X_train_tfidf, y_train)

# Predict the sentiment on the test data
y_pred = model.predict(X_test_tfidf)

# Print results
print(f"Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%")
print("Classification Report:\n", classification_report(y_test, y_pred, zero_division=1))

# Example of predicting new text
new_text = ["I am so excited about this product!", "I hate it, totally worthless."]
predictions = model.predict(vectorizer.transform(new_text))

for text, sentiment in zip(new_text, predictions):
    print(f"Text: '{text}' -> Sentiment: {'Positive' if sentiment == 1 else 'Negative'}")


Accuracy: 0.00%
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.00      0.00       2.0
           1       0.00      1.00      0.00       0.0

    accuracy                           0.00       2.0
   macro avg       0.50      0.50      0.00       2.0
weighted avg       1.00      0.00      0.00       2.0

Text: 'I am so excited about this product!' -> Sentiment: Positive
Text: 'I hate it, totally worthless.' -> Sentiment: Positive
