<a href="https://colab.research.google.com/github/peaceemenike/Artificial-Intelligent/blob/main/Naive%20Bayes%20Sentiment%20Classifier/Naive_Bayes_Sentiment_Classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# PROJECT 3: Naive Bayes Sentiment Classifier
import math
from collections import defaultdict, Counter

# Sample Dataset
dataset = [
    ("I love this product", "positive"),
    ("It is amazing and works well", "positive"),
    ("My son enjoyed it so much", "positive"),
    ("Absolutely fantastic experience", "positive"),
    ("Best thing I bought this year", "positive"),

    ("I hate this item", "negative"),
    ("Terrible and disappointing", "negative"),
    ("It broke after one day", "negative"),
    ("Worst purchase ever", "negative"),
    ("Not worth the money", "negative"),
]


# Preprocessing: Tokenizer


def tokenize(text):
    """
    Splits text into lowercase tokens (words).
    """
    return text.lower().split()


# Train Naive Bayes Classifier
class NaiveBayesClassifier:

    def __init__(self):
        self.class_priors = {}
        self.word_likelihoods = {}
        self.class_word_counts = {}
        self.vocab = set()

    def train(self, data):
        """
        Train Naive Bayes model using bag-of-words.
        """
        label_counts = Counter()
        word_counts = defaultdict(Counter)

        # Count labels and words
        for text, label in data:
            label_counts[label] += 1
            words = tokenize(text)

            for word in words:
                word_counts[label][word] += 1
                self.vocab.add(word)

        # Compute priors
        total_docs = sum(label_counts.values())
        self.class_priors = {
            label: label_counts[label] / total_docs
            for label in label_counts
        }

        # Save counts and compute likelihoods
        self.class_word_counts = word_counts
        self.word_likelihoods = {}

        for label in word_counts:
            total_words = sum(word_counts[label].values())
            vocab_size = len(self.vocab)

            self.word_likelihoods[label] = {
                word: (word_counts[label][word] + 1) /
                      (total_words + vocab_size)     # Laplace smoothing
                for word in self.vocab
            }

    # Predict using Bayes Rule
    def predict(self, text):
        """
        Returns the most probable class (positive/negative)
        for the given text.
        """

        words = tokenize(text)
        scores = {}

        for label in self.class_priors:
            # Start with log prior
            score = math.log(self.class_priors[label])

            # Add log likelihoods
            for word in words:
                if word in self.vocab:
                    score += math.log(self.word_likelihoods[label][word])

            scores[label] = score

        # Return label with highest probability
        return max(scores, key=scores.get)



# Train & Test the Model
nb = NaiveBayesClassifier()
nb.train(dataset)

test_sentences = [
    "I really love this",
    "This was the worst ever",
    "Amazing quality",
    "Not good at all",
    "Pretty fun and enjoyable"
]

for text in test_sentences:
    prediction = nb.predict(text)
    print(f"Text: {text}\nPrediction: {prediction}\n")


# Interactive Mode
print("Try your own message:")
user_text = input("Enter a sentence: ")
print("Predicted sentiment:", nb.predict(user_text))


Text: I really love this
Prediction: positive

Text: This was the worst ever
Prediction: negative

Text: Amazing quality
Prediction: positive

Text: Not good at all
Prediction: negative

Text: Pretty fun and enjoyable
Prediction: negative

Try your own message:
Enter a sentence: i love you
Predicted sentiment: positive
