In [6]:
# Tratamiento de datos
# ==============================================================================
import pandas as pd
import numpy as np
import re, spacy
from tqdm import tqdm
nlp = spacy.load('es_core_news_sm')
import emoji

# Gráficos
# ==============================================================================
import matplotlib.pyplot as plt
#import seaborn as sns
import itertools

# Preprocesado y modelado
# ==============================================================================
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB, BernoulliNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

# Benchmark
# ==============================================================================
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.metrics import classification_report, confusion_matrix

# Configuración warnings
# ==============================================================================
import warnings
warnings.filterwarnings('ignore')

# Exportación e importación 
# ==============================================================================
import pickle

### Importamos los modelos

In [7]:
#Importamos los modelos para la clasificación

filename = './models/model_1_tweets.pickle'
classifier_model_1 = pickle.load(open(filename, 'rb'))

filename = './models/model_2_tweets.pickle'
classifier_model_2 = pickle.load(open(filename, 'rb'))

filename = './models/model_3_tweets.pickle'
classifier_model_3 = pickle.load(open(filename, 'rb'))

filename = './models/model_4_tweets.pickle'
classifier_model_4 = pickle.load(open(filename, 'rb'))

#Importamos el modelo para crear la Bolsa de Palabras

filename = './models/vectorizer_bow_tweets.pickle'
classifier_model = pickle.load(open(filename, 'rb'))

### Creamos la Bolsa de Palabras

In [8]:
filename = './models/normalized_test_tweets_string.pickle'
X_norm_test = pickle.load(open(filename, 'rb'))

filename = './models/gold_test_string.pickle'
y_test = pickle.load(open(filename, 'rb'))

X_test = [doc[0] for doc in X_norm_test]
vectorizer = TfidfVectorizer(max_features=1500)
vectorizer.fit(X_test)
X_test = vectorizer.transform(X_test) 

### Predicción

In [9]:
predictions_1 = classifier_model_1.predict(X_test)
predictions_2 = classifier_model_2.predict(X_test)
predictions_3 = classifier_model_3.predict(X_test)
predictions_4 = classifier_model_4.predict(X_test)

labels = ['anger', 'disgust', 'fear', 'joy', 'others', 'sadness', 'surprise']

In [10]:
#Obtenemos las métricas de rendimiento de los modelos con los datos de prueba

print('Classification report model 1')
print(classification_report(y_true=y_test, y_pred=predictions_1, labels=labels))
print('Classification report model 2')
print(classification_report(y_true=y_test, y_pred=predictions_2, labels=labels))
print('Classification report model 3')
print(classification_report(y_true=y_test, y_pred=predictions_3, labels=labels))
print('Classification report model 4')
print(classification_report(y_true=y_test, y_pred=predictions_4, labels=labels))

Classification report model 1
              precision    recall  f1-score   support

       anger       0.12      0.06      0.08       168
     disgust       0.00      0.00      0.00        33
        fear       0.00      0.00      0.00        21
         joy       0.23      0.08      0.12       354
      others       0.49      0.85      0.62       814
     sadness       0.16      0.03      0.05       199
    surprise       0.00      0.00      0.00        67

    accuracy                           0.45      1656
   macro avg       0.14      0.15      0.13      1656
weighted avg       0.32      0.45      0.35      1656

Classification report model 2
              precision    recall  f1-score   support

       anger       0.10      0.05      0.07       168
     disgust       0.00      0.00      0.00        33
        fear       0.00      0.00      0.00        21
         joy       0.23      0.09      0.13       354
      others       0.49      0.83      0.62       814
     sadness      