In [45]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

df = pd.read_csv('youtube_comments_final.csv')
print(df.head(2))

   rank     video_id                                              title  \
0     1  rzIVd35dGZ4  Terror and Tragedy: IDF releases shocking unse...   
1     1  rzIVd35dGZ4  Terror and Tragedy: IDF releases shocking unse...   

  channel_name                  comment_id  \
0     ANI News  UgwFO1jEbDcigT0orYl4AaABAg   
1     ANI News  UgxkHDJ2nYj3Y4rTGX14AaABAg   

                                                text parent_id  \
0  Bismilahrahmanrahim İslam'ın en azından çocukl...      root   
1                     STUPID LOOP that shows NOTHING      root   

           author         author_channel_id  likes  ... compound  anger_score  \
0  @necdettok2935  UCJtu4N2m5RKD22Hf7T0Efcw      0  ...   0.0000     0.131780   
1      @punapeter  UCCyq9ZDntJqdbrLaVepVaMg      1  ...  -0.6289     0.064972   

   disgust_score  fear_score  joy_score  neutral_score  sadness_score  \
0       0.047135    0.540039   0.020078       0.193388       0.038085   
1       0.045874    0.007189   0.001827       

Now that the csv file has been read, we can build a graph using the information gathered


In [46]:

G = nx.DiGraph()
for _, row in df.iterrows():
    comment_id = row['comment_id']
    sentiment = row['sentiment_class']
    emotion = row['emotion']
    G.add_node(comment_id, sentiment=sentiment, emotion=emotion)

for _, row in df.iterrows():
    comment_id = row['comment_id']
    parent_id = row['parent_id']
    if pd.notna(parent_id):
        # If parent isn't in the dataset, add it with placeholder attributes
        if parent_id not in G:
            G.add_node(parent_id, sentiment=None, emotion=None)
        G.add_edge(parent_id, comment_id)

Now we can analyze the propagation of the sentiment


In [47]:
for root in [n for n, d in G.in_degree() if d == 0]:
    descendants = nx.descendants(G, root)
    root_sentiment = G.nodes[root]['sentiment']
    root_emotion = G.nodes[root]['emotion']
    
    # Skip roots with missing sentiment
    if root_sentiment is None and root_emotion is None:
        continue
    
    sentiments = [G.nodes[n]['sentiment'] for n in descendants if G.nodes[n]['sentiment'] is not None]
    emotions = [G.nodes[n]['emotion'] for n in descendants if G.nodes[n]['emotion'] is not None]

    print(f"\nRoot Comment {root} (Sentiment: {root_sentiment}, Emotion: {root_emotion}):")
    print("Sentiment distribution among replies:")
    print(pd.Series(sentiments).value_counts())
    print("Emotion distribution among replies:")
    print(pd.Series(emotions).value_counts())
    

Now that we have all of the sentiments and emotions analyzed, we can visualize them using matplotlib


In [48]:
plt.figure(figsize=(12,8))
pos = nx.spring_layout(G, k=0.5)

# Color nodes by emotion
emotion_colors = {
    'anger': 'red',
    'disgust': 'brown',
    'fear': 'purple',
    'joy': 'yellow',
    'neutral': 'gray',
    'sadness': 'blue',
    'surprise': 'orange'
}

colors = []
for n in G.nodes:
    emotion = G.nodes[n]['emotion']
    if emotion in emotion_colors:
        colors.append(emotion_colors[emotion])
    else:
        colors.append('lightgray')  # For missing or unknown emotion

nx.draw(G, pos, with_labels=False, node_color=colors, node_size=50, edge_color='lightgray', alpha=0.7)
plt.title("Comment-Reply Graph Colored by Emotion")
plt.show()

ModuleNotFoundError: No module named 'scipy'

<Figure size 1200x800 with 0 Axes>

Social Network Analysis


In [None]:
centrality = nx.degree_centrality(G)
betweenness = nx.betweenness_centrality(G)
print("\nTop 5 comments by degree centrality:")
for node, cent in sorted(centrality.items(), key=lambda x: x[1], reverse=True)[:5]:
    print(f"{node}: {cent:.4f}")


Top 5 comments by degree centrality:
UgzUeyxWQV2TlIvJcgx4AaABAg: 0.6250
UgzM6Sz9zSf3OT-ywj54AaABAg: 0.3750
root: 0.1250
UgzUeyxWQV2TlIvJcgx4AaABAg.A9Inx9ddziEA9IoKwrD5v1: 0.0625
UgzUeyxWQV2TlIvJcgx4AaABAg.A9Inx9ddziEA9IzbcBgg7p: 0.0625
