### Modelo de Lenguaje LLM

In [1]:
import pandas as pd
import re
import joblib
from transformers import pipeline

Carga modelo LLM

In [2]:
llm_classifier = pipeline("sentiment-analysis", model="pysentimiento/robertuito-sentiment-analysis")

def limpieza(texto):
    texto = re.sub(r'http\S+|www\S+|https\S+', '', texto, flags=re.MULTILINE) # Eliminar URLs
    texto = re.sub(r'@\w+', '', texto) # Eliminar menciones @usuario
    return texto.strip() # Eliminar espacios extra

def clasificar_llm(texto):
    res = llm_classifier(texto, truncation=True)[0]
    mapeo = {"POS": "POSITIVO", "NEG": "NEGATIVO", "NEU": "NEUTRO"}
    return mapeo.get(res['label'], "NEUTRO")

Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.


Cargar modelo ML

In [3]:
svm_loaded = joblib.load("svm.pkl")
vectorizer_loaded = joblib.load("tfidf_vectorizer.pkl")

Cargar dataset

In [4]:
df = pd.read_csv(r"sentiment_analysis_dataset.csv")
df_muestra = df.sample(n=1000, random_state=1234)
df_muestra.shape

(1000, 5)

Aplica LLM

In [6]:
df_muestra['text_clean'] = df_muestra['text'].apply(limpieza)
df_muestra["LLM"] = df_muestra['text_clean'].apply(clasificar_llm)# 

Aplica ML

In [7]:
X_tfidf = vectorizer_loaded.transform(df_muestra['text'])
df_muestra['ML'] = svm_loaded.predict(X_tfidf)

In [8]:
pd.pivot_table(data=df_muestra, columns="LLM", index="ML", values="text", aggfunc="count", fill_value=0)

LLM,NEGATIVO,NEUTRO,POSITIVO
ML,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
NEGATIVO,324,96,52
NEUTRO,94,47,92
POSITIVO,130,70,95


In [9]:
df_muestra = df_muestra[["user", "text", "date", "emotion", "sentiment", "LLM", "ML"]]
df_muestra.to_csv('results.csv', index=False, encoding='utf-8-sig')