In [None]:
import pandas as pd
import networkx as nx


# Remap the weight of the edges based on the sentiment score
def remap_weight(original_weight, default, only_default=False): 
    if original_weight == 1 and not only_default:
        return 2
    elif original_weight == 0 and not only_default:
        return 1.5
    elif original_weight == -1 and not only_default:
        return 1
    else:
        return default 


# Graph Description
# Nodes: comments
# Edges: comments to videos, comments to comments (Replies)
def build_comment2_video_graph(csv_path, feature = 'sentiment_Bert'):
    # Load the data
    df = pd.read_csv(csv_path)
    
    # Initialize the directed graph
    G = nx.DiGraph()
    
    # Add video nodes with their unique IDs
    for video_id in df['video_id'].unique():
        G.add_node(video_id, type='video')
    
    # Add comment nodes and build edges
    for _, row in df.iterrows():
        comment_id = row['comment_id']
        video_id = row['video_id']
        is_reply = row['is_reply']
        sentiment = row[feature]
        replied_comment_id = row.get('reply_to_comment_id', None)
    
        # Map sentiment to numerical values
        sentiment_map = {'Positive': 1, 'Neutral': 0, 'Negative': -1}
        weight = sentiment_map.get(sentiment, 0)
    
        # Add the comment node with its unique ID
        G.add_node(comment_id, type='comment')
    
        if is_reply and pd.notna(replied_comment_id):
            # Connect the comment to the replied comment
            G.add_edge(comment_id, replied_comment_id, weight=remap_weight(weight, 1.5, True), sentiment_score=weight)
        else:
            # Connect the comment to the video
            G.add_edge(comment_id, video_id, weight=remap_weight(weight, 1.5, True), sentiment_score=weight)
            
    return G


# Graph Description
# Nodes: comments
# Edges: comments to videos, comments to comments (Replies), videos to videos
def build_comment2_video2_graph(csv_path, feature = 'sentiment_Bert'):
   # Pending implementation...
            
    return None



def save_graph(G, output_path='../data/comments_videos_Bert_graph.graphml'):
    nx.write_graphml(G, output_path)
    print(f"Grafo guardado en: {output_path}")

In [None]:
graph_bert = build_comment2_video_graph("../data/comments_with_sentiment_class.csv", 'sentiment_Bert')
graph_gpt = build_comment2_video_graph("../data/comments_with_sentiment_class.csv", 'sentiment_gpt')

# Save the graphs to files
save_graph(graph_bert, '../data/graphs/comments_videos_Bert_graph.graphml')
save_graph(graph_gpt,'../data/graphs/comments_videos_GPT_graph.graphml')

Grafo guardado en: ../data/graphs/comments_videos_Bert_graph.graphml
Grafo guardado en: ../data/graphs/comments_videos_GPT_graph.graphml


In [None]:
import statistics as stat
from collections import defaultdict
# Graph Description
# Nodes: users
# Edges: users to videos, users to users (Replies)
def build_user2_video_graph(csv_path, feature = 'sentiment_gpt'):
    # Load the data
    df = pd.read_csv(csv_path)
    
    # Initialize the directed graph
    G = nx.DiGraph()
    
    # Add video nodes (grouping by video_id)
    for video_id in df['video_id'].unique():
        G.add_node(video_id, type='video')
    
    # Add user nodes and build edges
    for author_id in df['author_id'].unique():
        G.add_node(author_id, type='author')

    # Create dictionary of (author_id, video_id) -> list of sentiments
    author_video_sentiments = defaultdict(list)
    for _, row in df.iterrows():
        author_video_sentiments[(row['author_id'], row['video_id'])].append(row[feature])
    
    print(f"Total unique author-video pairs: {len(author_video_sentiments)}")

    # Calculate the mode sentiment per (author_id, video_id)
    # A user can have many comments on the same video.
    # For this, the median opinion of the author toward the video is calculated
    author_video_mode_sentiment = {
        k: stat.mode(v) for k, v in author_video_sentiments.items()
    }

    # Create dictionary of comment_id -> author_id for replies
    comment_to_author = dict(zip(df['comment_id'], df['author_id']))

    for _, row in df.iterrows():
        author_id = row['author_id']
        video_id = row['video_id']
        is_reply = row['is_reply']
        reply_id = row.get('reply_to_comment_id')

        sentiment_map = {'Positive': 1, 'Neutral': 0, 'Negative': -1}
        sentiment = author_video_mode_sentiment.get((author_id, video_id), 'Neutral')
        weight = sentiment_map.get(sentiment, 0)

        if is_reply and pd.notna(reply_id) and reply_id in comment_to_author:
            # Connect the author to the replied comment's author
            replied_author_id = comment_to_author[reply_id]
            G.add_edge(author_id, replied_author_id, weight=remap_weight(weight, 1.5, True), sentiment_score=weight)
        else:
            # Connect the author to the video
            G.add_edge(author_id, video_id, weight=remap_weight(weight, 1.5, True), sentiment_score=weight)

            
    return G

In [None]:

graph_bert = build_user2_video_graph("../data/comments_with_sentiment_class.csv", 'sentiment_Bert')
graph_gpt = build_user2_video_graph("../data/comments_with_sentiment_class.csv", 'sentiment_gpt')

save_graph(graph_bert, '../data/graphs/user2_video_Bert_graph.graphml')
save_graph(graph_gpt, '../data/graphs/user2_video_GPT_graph.graphml')

Total unique author-video pairs: 49880
Total unique author-video pairs: 49880
Grafo guardado en: ../data/graphs/user2_video_Bert_graph.graphml
Grafo guardado en: ../data/graphs/user2_video_GPT_graph.graphml
