In [None]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import pandas as pd
import matplotlib.pyplot as plt


def classify_sentiment_vader(comment, analyzer):
    # Obtener el puntaje de sentimiento
    sentiment_score = analyzer.polarity_scores(str(comment))['compound']
    
    # Clasificar según el puntaje
    if sentiment_score > 0.05:
        return 'Positive'
    elif sentiment_score < -0.05:
        return 'Negative'
    else:
        return 'Neutral'

def classify_comments_vader(csv_path):
    df = pd.read_csv(csv_path)

    analyzer = SentimentIntensityAnalyzer()
    sentiments = []

    for i, row in df.iterrows():
        sentiment = classify_sentiment_vader(row['cleaned_comment'], analyzer)
        sentiments.append(sentiment)

        if i % 10000 == 0 and i > 0:
            print(f"Clasificados {i} comentarios...")

    df['sentiment_Vader'] = sentiments
    df.to_csv('../data/comments_with_sentiment.csv', index=False)
    print("Los comentarios con clasificación de sentimiento VADER han sido guardados.")
    
    return df

# Clasificar los comentarios usando VADER
classified_comments_vader = classify_comments_vader('../data/cleaned_comments.csv')


In [None]:
classified_comments_vader['sentiment_Vader'].hist()

In [None]:
from transformers import pipeline 


def classify_sentiment_bert(comment, analyzer):
    # Obtener el puntaje de sentimiento
    sentiment_score = analyzer(str(comment))
    
    # Clasificar según el puntaje
    if sentiment_score[0]["label"] == "POS":
        return 'Positive'
    elif sentiment_score[0]["label"] == "NEG":
        return 'Negative'
    else:
        return 'Neutral'

def classify_comments_bert(csv_path):
    df = pd.read_csv(csv_path)

    analyzer = pipeline("sentiment-analysis", model="pysentimiento/robertuito-sentiment-analysis", truncation = True) 
    #Bert sólo procesa hasta 128 tokens, se habilita el truncamiento
    
    sentiments = []

    for i, row in df.iterrows():
        sentiment = classify_sentiment_bert(row['cleaned_comment'], analyzer)
        sentiments.append(sentiment)

        if i % 1000 == 0 and i > 0:
            print(f"Clasificados {i} comentarios...")

    df['sentiment_Bert'] = sentiments
    df.to_csv('../data/comments_with_sentiment.csv', index=False)
    print("Los comentarios con clasificación de sentimiento Bert han sido guardados.")
    
    return df

# Clasificar los comentarios usando Bert
classified_comments_bert = classify_comments_bert('../data/comments_with_sentiment.csv')

In [None]:
classified_comments_bert['sentiment_Bert'].hist()