In [2]:
import streamlit as st
import pandas as pd
import logging
import time
import numpy as np
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
import torch
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.special import softmax
from matplotlib.patches import FancyBboxPatch

# Logging configuration
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Cargar el modelo y tokenizador localmente
def load_local_model():
    try:
        model_name = "cardiffnlp/twitter-roberta-base-sentiment"
        tokenizer = AutoTokenizer.from_pretrained(model_name)

        # Detectar si CUDA está disponible
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        # Intentar cargar el modelo y moverlo a la GPU si está disponible
        model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)

        return model, tokenizer

    except ImportError as e:
        st.error(f"Error importing required backend: {e}")
        st.stop()

# Cargar el modelo local
model, tokenizer = load_local_model()

# Mapeo de etiquetas
label_mapping = ['Negative', 'Neutral', 'Positive']

# Preprocesar texto
def preprocess(text):
    new_text = []
    for t in text.split(" "):
        t = '@user' if t.startswith('@') and len(t) > 1 else t
        t = 'http' if t.startswith('http') else t
        new_text.append(t)
    return " ".join(new_text)

# Función para obtener los scores de cada etiqueta (Negative, Neutral, Positive)
def get_sentiment_scores(text):
    text = preprocess(text)
    encoded_input = tokenizer(text, return_tensors='pt')
    output = model(**encoded_input)
    scores = output[0][0].detach().numpy()
    scores = softmax(scores)  # Aplicar softmax para obtener las probabilidades
    return scores

# Función para analizar los sentimientos de un archivo CSV y actualizar la barra de progreso
def analyze_sentiments_csv(df):
    total_chunks = len(df)
    progress_bar = st.progress(0)
    progress_text = st.empty()

    sentiments = []
    negative_scores = []
    neutral_scores = []
    positive_scores = []

    for idx, row in df.iterrows():
        text = row['text']
        # Obtener los scores de cada sentimiento
        try:
            scores = get_sentiment_scores(text)
            sentiments.append(label_mapping[np.argmax(scores)])  # El sentimiento con mayor puntuación
            negative_scores.append(scores[0])
            neutral_scores.append(scores[1])
            positive_scores.append(scores[2])
        except Exception as e:
            st.error(f"Error during sentiment analysis: {e}")
            sentiments.append("error")
            negative_scores.append(0)
            neutral_scores.append(0)
            positive_scores.append(0)

        # Actualizar barra de progreso
        progress_percentage = (idx + 1) / total_chunks
        progress_bar.progress(progress_percentage)
        progress_text.text(f"Processing {idx + 1} of {total_chunks}")

    df['sentiment'] = sentiments
    df['negative_score'] = negative_scores
    df['neutral_score'] = neutral_scores
    df['positive_score'] = positive_scores

    # Completar la barra de progreso
    progress_bar.progress(1.0)
    st.success("Sentiment analysis complete!")

    # Convertir el DataFrame en CSV y permitir la descarga
    csv = df.to_csv(index=False).encode('utf-8')
    st.download_button(
        label=":arrow_down: Download results as CSV",
        data=csv,
        file_name='sentiment_analysis_results.csv',
        mime='text/csv',
    )
    return df


# Función para calcular los porcentajes de cada sentimiento
def calculate_sentiment_percentages(df):
    sentiment_counts = df['sentiment'].value_counts(normalize=True) * 100
    return [sentiment_counts.get('Negative', 0), sentiment_counts.get('Neutral', 0), sentiment_counts.get('Positive', 0)]

