In [44]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
import pickle

### Definição do dataframe

In [45]:
df = pd.read_csv('feedbacks.csv')

### Pré-processamento de dados

In [46]:
X = df["mensagem"]
y = df["sentimento"]

# Separar dados em treino e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Vetorização
vectorizer = CountVectorizer()
X_train_vect = vectorizer.fit_transform(X_train)
X_test_vect = vectorizer.transform(X_test)


### Treinando diferentes modelos e pegando o com melhor classificação

In [47]:
models = {
    'Naive Bayes': MultinomialNB(),
    'Logistic Regression': LogisticRegression(random_state=42, solver='liblinear'),
    'SVM': SVC(random_state=42, kernel='linear')
}

best_model = None
best_accuracy = 0
best_report = None

for name, model in models.items():
    print(f"Treinando modelo: {name}")
    model.fit(X_train_vect, y_train)
    y_pred = model.predict(X_test_vect)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)

    print(f"Acurácia ({name}):", accuracy)
    print(f"Relatório de Classificação ({name}):\n", report)
    print("-" * 30)

    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_model = model
        best_report = report
        best_model_name = name

Treinando modelo: Naive Bayes
Acurácia (Naive Bayes): 0.9601328903654485
Relatório de Classificação (Naive Bayes):
               precision    recall  f1-score   support

    Negativo       0.97      0.95      0.96       150
    Positivo       0.95      0.97      0.96       151

    accuracy                           0.96       301
   macro avg       0.96      0.96      0.96       301
weighted avg       0.96      0.96      0.96       301

------------------------------
Treinando modelo: Logistic Regression
Acurácia (Logistic Regression): 0.9568106312292359
Relatório de Classificação (Logistic Regression):
               precision    recall  f1-score   support

    Negativo       0.95      0.96      0.96       150
    Positivo       0.96      0.95      0.96       151

    accuracy                           0.96       301
   macro avg       0.96      0.96      0.96       301
weighted avg       0.96      0.96      0.96       301

------------------------------
Treinando modelo: SVM
Acurác

### Classificação de acurácia do modelo

In [48]:
print(f"\nMelhor modelo encontrado: {best_model_name}")
print(f"Acurácia do melhor modelo: {best_accuracy}\n\n")
print(f"Relatório de Classificação do melhor modelo:\n", best_report)


Melhor modelo encontrado: Naive Bayes
Acurácia do melhor modelo: 0.9601328903654485


Relatório de Classificação do melhor modelo:
               precision    recall  f1-score   support

    Negativo       0.97      0.95      0.96       150
    Positivo       0.95      0.97      0.96       151

    accuracy                           0.96       301
   macro avg       0.96      0.96      0.96       301
weighted avg       0.96      0.96      0.96       301



### Salvando o modelo

In [49]:
with open("model_clf.pkl", 'wb') as f:
    pickle.dump({
        "model": best_model,
        "vectorizer": vectorizer,
    }, f)