In [None]:
import pandas as pd
import networkx as nx


def remap_weight(original_weight):
    if original_weight == 1:
        return 2
    elif original_weight == 0:
        return 1.5
    elif original_weight == -1:
        return 1
    else:
        return 1  # valor por defecto


# Graph Description
# Nodes: comments
# Edges: comments to videos, comments to comments (Replies)
def build_comment2_video_graph(csv_path, feature = 'sentiment_Bert'):
    # Cargar los datos
    df = pd.read_csv(csv_path)
    
    # Inicializar el grafo dirigido
    G = nx.DiGraph()
    
    # Agregar nodos de video (agrupando por video_id)
    for video_id in df['video_id'].unique():
        G.add_node(video_id, type='video')
    
    # Agregar nodos de comentario y construir aristas
    for _, row in df.iterrows():
        comment_id = row['comment_id']
        video_id = row['video_id']
        is_reply = row['is_reply']
        sentiment = row[feature]
        replied_comment_id = row.get('reply_to_comment_id', None)
    
        # Convertimos el sentimiento en un peso
        sentiment_map = {'Positive': 1, 'Neutral': 0, 'Negative': -1}
        weight = sentiment_map.get(sentiment, 0)
    
        # Agregar nodo del comentario
        G.add_node(comment_id, type='comment')
    
        if is_reply and pd.notna(replied_comment_id):
            # Conectar el comentario con el comentario al que responde
            G.add_edge(comment_id, replied_comment_id, weight=remap_weight(weight), sentiment_score=weight)
        else:
            # Conectar el comentario con el video
            G.add_edge(comment_id, video_id, weight=remap_weight(weight), sentiment_score=weight)
            
    return G


# Graph Description
# Nodes: comments
# Edges: comments to videos, comments to comments (Replies), videos to videos
def build_comment2_video2_graph(csv_path, feature = 'sentiment_Bert'):
   # Pendiente de implementación....
            
    return None



def save_graph(G, output_path='../data/comments_videos_Bert_graph.graphml'):
    nx.write_graphml(G, output_path)
    print(f"Grafo guardado en: {output_path}")

In [None]:
graph = build_comment2_video_graph("../data/comments_with_sentiment.csv", 'sentiment_Bert')
save_graph(graph,'../data/comments_videos_Bert_graph.graphml')

In [None]:
import statistics as stat
from collections import defaultdict
# Graph Description
# Nodes: users
# Edges: users to videos, users to users (Replies)
def build_user2_video_graph(csv_path, feature = 'sentiment_Bert'):
    # Cargar los datos
    df = pd.read_csv(csv_path)
    
    # Inicializar el grafo dirigido
    G = nx.DiGraph()
    
    # Agregar nodos de video (agrupando por video_id)
    for video_id in df['video_id'].unique():
        G.add_node(video_id, type='video')
    
    # Agregar nodos de comentario y construir aristas

    for author_id in df['author_id'].unique():
        G.add_node(author_id, type='author')

    # Crear diccionario de (author_id, video_id) -> lista de sentimientos
    author_video_sentiments = defaultdict(list)
    for _, row in df.iterrows():
        author_video_sentiments[(row['author_id'], row['video_id'])].append(row[feature])

    # Calcular la moda de sentimiento por (author_id, video_id)
    # Un usuario puede tener muchos comentarios en un mismo video. 
    # Para esto se calcula la mediana de la opinión del autor respecto al video
    author_video_mode_sentiment = {
        k: stat.mode(v) for k, v in author_video_sentiments.items()
    }

    # Crear diccionario de comment_id -> author_id para replies
    comment_to_author = dict(zip(df['comment_id'], df['author_id']))

    for _, row in df.iterrows():
        author_id = row['author_id']
        video_id = row['video_id']
        is_reply = row['is_reply']
        reply_id = row.get('reply_to_comment_id')

        sentiment = author_video_mode_sentiment.get((author_id, video_id), 'Neutral')
        weight = sentiment_map.get(sentiment, 0)

        if is_reply and pd.notna(reply_id) and reply_id in comment_to_author:
            # Conectar el autor con el comentario al que responde
            replied_author_id = comment_to_author[reply_id]
            G.add_edge(author_id, replied_author_id, weight=remap_weight(weight), sentiment_score=weight)
        else:
            # Conectar el autor con el video
            G.add_edge(author_id, video_id, weight=remap_weight(weight), sentiment_score=weight)

            
    return G

In [None]:
graph = build_user2_video_graph("../data/comments_with_sentiment.csv", 'sentiment_Bert')
save_graph(graph,'../data/user2_video_Bert_graph.graphml')