In [None]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import pandas as pd
import matplotlib.pyplot as plt


def classify_sentiment_vader(comment, analyzer):
    # Obtener el puntaje de sentimiento
    sentiment_score = analyzer.polarity_scores(str(comment))['compound']
    
    # Clasificar según el puntaje
    if sentiment_score > 0.05:
        return 'Positive'
    elif sentiment_score < -0.05:
        return 'Negative'
    else:
        return 'Neutral'

def classify_comments_vader(csv_path):
    df = pd.read_csv(csv_path)

    analyzer = SentimentIntensityAnalyzer()
    sentiments = []

    for i, row in df.iterrows():
        sentiment = classify_sentiment_vader(row['cleaned_comment'], analyzer)
        sentiments.append(sentiment)

        if i % 10000 == 0 and i > 0:
            print(f"Clasificados {i} comentarios...")

    df['sentiment_Vader'] = sentiments
    df.to_csv('../data/comments_with_sentiment.csv', index=False)
    print("Los comentarios con clasificación de sentimiento VADER han sido guardados.")
    
    return df

# Clasificar los comentarios usando VADER
classified_comments_vader = classify_comments_vader('../data/cleaned_comments.csv')


In [None]:
classified_comments_vader['sentiment_Vader'].hist()

In [None]:
from transformers import pipeline 


def classify_sentiment_bert(comment, analyzer):
    # Obtener el puntaje de sentimiento
    sentiment_score = analyzer(str(comment))
    
    # Clasificar según el puntaje
    if sentiment_score[0]["label"] == "POS":
        return 'Positive'
    elif sentiment_score[0]["label"] == "NEG":
        return 'Negative'
    else:
        return 'Neutral'

def classify_comments_bert(csv_path):
    df = pd.read_csv(csv_path)

    analyzer = pipeline("sentiment-analysis", model="pysentimiento/robertuito-sentiment-analysis", truncation = True) 
    #Bert sólo procesa hasta 128 tokens, se habilita el truncamiento
    
    sentiments = []

    for i, row in df.iterrows():
        sentiment = classify_sentiment_bert(row['cleaned_comment'], analyzer)
        sentiments.append(sentiment)

        if i % 1000 == 0 and i > 0:
            print(f"Clasificados {i} comentarios...")

    df['sentiment_Bert'] = sentiments
    df.to_csv('../data/comments_with_sentiment.csv', index=False)
    print("Los comentarios con clasificación de sentimiento Bert han sido guardados.")
    
    return df

# Clasificar los comentarios usando Bert
classified_comments_bert = classify_comments_bert('../data/comments_with_sentiment.csv')

In [None]:
classified_comments_bert['sentiment_Bert'].hist()

In [None]:
# DataFrame Preparation for GPT-Based Stance Classification
#
# This DataFrame is part of a pipeline to classify the **Stance** of user comments
# on YouTube videos, based on the video's summarized content and sentiment.
#
# Previous Processing Steps:
# 1. Captions (CC) from each video were extracted.
# 2. We summarized the captions using the Sumy library.
#    ➤ These summaries were stored in the column `"summary_Sumy"`.
# 3. We used GPT to classify each video’s overall tone and position (Stance) regarding
#    a specific topic of interest.
#    ➤ Two columns were generated:
#       - `"video_Sentiment"`: General sentiment of the video summary ("POS", "NEG", "NEU").
#       - `"video_Stance"`: A stance classification on a 5-point **Likert scale**:
#         `0 = Totally disagree`, `1 = Disagree`, `2 = Neutral`, `3 = Agree`, `4 = Totally agree`.
#
# Objective:
# To classify the **stance of user comments** based on:
# - The video summary (`summary_Sumy`)
# - The video sentiment (`video_Sentiment`)
# - The video stance (`video_Stance`)

# Viveo Comment-Based Stance Classification
#
# This allows us to build a context-aware stance classifier, where GPT can be prompted to answer:
#
# Prompt Example (Reply to a video):
# "This video is about: {summary_Sumy}. It has a general sentiment of {video_Sentiment}, and a stance of {video_Stance} on the topic.
# Given this context, classify the user's {comment} in a 5-point Likert scale (0–4) according to their stance on the topic discussed... also include de sentiment."
#
#
#
# Reply-Based Stance Classification
#
# In cases where a user comment is a **reply** to another user's comment,
# we provide GPT with additional context by including the original comment in the prompt.
#
# Prompt Example (Reply Case):
# "This video is about: {summary_Sumy}. It has a general sentiment of {video_Sentiment}, 
# and a stance of {video_Stance} on the topic.
#
# The user is replying to the comment: '{reply}'
# The reply is: '{replied_comment}'
#
# Given this context, classify the reply's stance in a 5-point Likert scale (0–4) 
# according to its alignment with the topic discussed in the video."

In [None]:
import numpy as np

def add_replied_comment_column(csv_path):
    
    # Cargar el archivo de comentarios
    df = pd.read_csv(csv_path)
    

    # Crear un diccionario que mapea comment_id → comment_text
    comment_lookup = df.set_index('comment_id')['cleaned_comment'].to_dict()

    # Función para recuperar el texto del comentario al que se respondió
    def get_replied_text(row):
        if row.get('is_reply') == True or row.get('is_reply') == 'True':
            reply_to_id = row.get('reply_to_comment_id')
            return comment_lookup.get(reply_to_id, None)
            
        return np.nan

    # Aplicar la función y crear la nueva columna
    df['replied_comment'] = df.apply(get_replied_text, axis=1)

    # Guardar los cambios en el mismo archivo
    df.to_csv(csv_path, index=False)
    print(f"Columna 'replied_comment' añadida correctamente a: {csv_path}")

    return df

# Ejecutar la función
updated_df = add_replied_comment_column('../data/comments_with_sentiment.csv')

In [None]:
def classify_sentiment_gpt(comment, analyzer):
    return 'None'

def classify_comments_gpt(csv_path):
    df = pd.read_csv(csv_path)

    

    df['sentiment_GPT'] = ""
    df['likert_GPT'] = ""
    df.to_csv('../data/comments_with_sentiment.csv', index=False)
    print("Los comentarios con clasificación de sentimiento GPT han sido guardados.")
    print("Los comentarios con clasificación de posición GPT han sido guardados.")
    
    return df

# Clasificar los comentarios usando GPT
classified_comments_gpt = classify_comments_gpt('../data/comments_with_sentiment.csv')