In [14]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer

class NaiveBayesClassifier:
    def __init__(self, alpha=1.0):
        self.alpha = alpha
        self.class_probabilities = {}
        self.word_probabilities = {}

    def fit(self, X, y):
        unique_classes, class_counts = np.unique(y, return_counts=True)
        total_samples = len(y)
        for i, class_name in enumerate(unique_classes):
            self.class_probabilities[class_name] = (class_counts[i] + self.alpha) / (total_samples + len(unique_classes) * self.alpha)

        self.word_probabilities = {}
        for class_name in unique_classes:
            class_samples = X[y == class_name]
            word_counts = np.sum(class_samples, axis=0)
            total_words_in_class = np.sum(word_counts)
            self.word_probabilities[class_name] = (word_counts + self.alpha) / (total_words_in_class + X.shape[1] * self.alpha)

    def predict(self, X):
        predictions = []
        for sample in X:
            best_class = None
            max_log_prob = float('-inf')
            for class_name, class_prob in self.class_probabilities.items():
                log_prob = np.log(class_prob)
                for i, word_count in enumerate(sample):
                    if word_count > 0:
                        log_prob += word_count * np.log(self.word_probabilities[class_name][i])
                if log_prob > max_log_prob:
                    max_log_prob = log_prob
                    best_class = class_name
            predictions.append(best_class)
        return predictions

if __name__ == "__main__":
    data = pd.read_csv("data.csv")

    X = data['text']
    y = data['label']

    vectorizer = CountVectorizer()
    X = vectorizer.fit_transform(X)

    classifier = NaiveBayesClassifier()
    classifier.fit(X.toarray(), y)

    new_data = pd.read_csv("new_data.csv")
    new_input_texts = new_data['text']

    new_input_vector = vectorizer.transform(new_input_texts).toarray()

    predictions = classifier.predict(new_input_vector)

    for i, prediction in enumerate(predictions):
        print(f"Input: '{new_input_texts[i]}' => Predicted label: {prediction}")


Input: 'Good Product' => Predicted label: P
Input: 'Terrible' => Predicted label: N
Input: 'Amazing' => Predicted label: P
