<a href="https://colab.research.google.com/github/mxag11z/EMO/blob/main/ModeloRegresion.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Regresion 4 emociones Joy, sadness, anger, fear

Estrategia 1: Utilizar la traducción automática, traducir el conjunto de datos de evaluación en español al inglés y evaluar un modelo entrenado en el conjunto de entrenamiento en inglés.


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import nltk
import numpy as np
from sklearn import linear_model
import sklearn.metrics
from sklearn.feature_extraction.text import CountVectorizer

In [7]:
def read_data(emotion):
    """
    Lee los datos de train y test para una emoción
    """
    # Datos de entrenamiento
    with open(f"/content/drive/MyDrive/PLN project/data/en/train/{emotion}.txt", 'r', encoding='utf-8') as f:
        train_X = f.readlines()
    with open(f"/content/drive/MyDrive/PLN project/data/en/train/{emotion}_labels.txt", 'r', encoding='utf-8') as f:
        train_y = [float(line.strip()) for line in f.readlines()]

    # Datos de test
    with open(f"/content/drive/MyDrive/PLN project/data/es_test_translatedEn/translates_testToEn_{emotion}.txt", 'r', encoding='utf-8') as f:
        test_X = f.readlines()
    with open(f"/content/drive/MyDrive/PLN project/data/es/test/{emotion}_labels.txt", 'r', encoding='utf-8') as f:
        test_y = [float(line.strip()) for line in f.readlines()]

    return (train_X, train_y), (test_X, test_y)

Palabras que contribuyen a predecir la aparición de la emocion (positive features)o su asuencia (negative features)

In [8]:
def analyze_weights(model, vectorizer, emotion, num_features=5):
    """
    Analiza los pesos más importantes del modelo
    """
    reverse_vocab = {v: k for k, v in vectorizer.vocabulary_.items()}
    sort_index = np.argsort(model.coef_)

    print(f"\nTop positive features for {emotion}:")
    for k in reversed(sort_index[-num_features:]):
        print(f"{model.coef_[k]:.5f}\t{reverse_vocab[k]}")

    print(f"\nTop negative features for {emotion}:")
    for k in sort_index[:num_features]:
        print(f"{model.coef_[k]:.5f}\t{reverse_vocab[k]}")

# Función para entrenar y evaluar el modelo para una emoción

In [10]:
def train_emotion_regressor(emotion,alpha=100):
    """
    Entrena y evalúa un regresor para una emoción específica
    """

    vectorizer = CountVectorizer(
        max_features=10000,
        ngram_range=(1,2),
        lowercase=True,
        strip_accents=None,
        binary=True
    )

    # Cargar datos
    (train_X, train_y), (test_X, test_y) = read_data(emotion)

    X_train = vectorizer.fit_transform(train_X)
    X_test = vectorizer.transform(test_X)

    # Entrenar modelo
    model = linear_model.Ridge(alpha=alpha, fit_intercept=True)
    model.fit(X_train, train_y)

    # Evaluar
    preds = model.predict(X_test)
    mae = sklearn.metrics.mean_absolute_error(test_y, preds)

    # Analizar pesos
    analyze_weights(model, vectorizer, emotion)

    return model, vectorizer, mae

# Función para entrenar y evaluar todas las emociones

In [12]:
def train_all_emotions(data_path):
    """
    Entrena y evalúa regresores para todas las emociones
    """
    emotions = ['joy', 'anger', 'sadness', 'fear']
    results = {} #set

    for emotion in emotions:
        print(f"\n-----Processing {emotion}")
        model, vectorizer, mae = train_emotion_regressor(emotion)
        print(f"MAE{emotion}: {mae:.4f}")

        results[emotion] = {
            'model': model,
            'vectorizer': vectorizer,
            'mae': mae
        }

    return results