In [2]:
import numpy as np
import re
from collections import defaultdict

In [15]:
import numpy as np
import re
from collections import defaultdict

class NaiveBayesText:
    def __init__(self):
        self.classes = None
        self.prior_probs = {}
        self.word_probs = defaultdict(lambda: defaultdict(lambda: 0))

    def preprocess(self, text):
        # Remove caracteres especiais e converte para minúsculas
        text = re.sub(r'\W', ' ', text)
        text = text.lower()
        text = text.split()
        return text

    def fit(self, X, y):
        n_samples = len(X)
        self.classes = np.unique(y)
        word_counts = defaultdict(lambda: defaultdict(int))
        class_counts = defaultdict(int)
        total_words = defaultdict(int)
        
        for text, label in zip(X, y):
            words = self.preprocess(text)
            class_counts[label] += 1
            for word in words:
                word_counts[label][word] += 1
                total_words[label] += 1

        for label in self.classes:
            self.prior_probs[label] = class_counts[label] / float(n_samples)
            for word in word_counts[label]:
                self.word_probs[label][word] = (word_counts[label][word] + 1) / (total_words[label] + len(word_counts[label]))

    def _calc_likelihood(self, text, cls):
        likelihood = np.log(self.prior_probs[cls])
        words = self.preprocess(text)
        for word in words:
            if word in self.word_probs[cls]:
                likelihood += np.log(self.word_probs[cls][word])
            else:
                likelihood += np.log(1 / (sum(self.word_probs[cls].values()) + len(self.word_probs[cls])))
        return likelihood

    def predict(self, X):
        y_pred = []
        for text in X:
            posteriors = [self._calc_likelihood(text, cls) for cls in self.classes]
            y_pred.append(self.classes[np.argmax(posteriors)])
        return np.array(y_pred)

In [16]:
# Exemplo de uso
if __name__ == "__main__":
    # Exemplo de dados
    X_train = [
        "Este é um exemplo de texto positivo",
        "Eu amo aprender novas coisas",
        "Este texto é negativo e triste",
        "Eu odeio quando isso acontece"
    ]
    y_train = np.array([1, 1, 0, 0])  # 1 para positivo, 0 para negativo

    X_test = [
        "Eu amo este exemplo",
        "Odeio coisas tristes"
    ]

    nb_text = NaiveBayesText()
    nb_text.fit(X_train, y_train)  # Corrigido aqui
    y_pred = nb_text.predict(X_test)

    print(f"Previsões: {y_pred}")


Previsões: [0 0]
