# Network Centrality Calculation Script

In [None]:
# Author : Saksham Arora

In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt 
import networkx as nx 
import time
from collections import defaultdict
from itertools import combinations

### Network Creation 

In [2]:
# read in df
cornell_dtype = {'id':'str', 'author':'str', 'score':'float', 'upvote_ratio':'float', 'num_comments':'int', 'subreddit':'str', 'selftext':'str', 'comments':'object'}
berkeley_df = pd.read_csv('berkeley_top_337_new.csv', dtype=cornell_dtype, usecols=cornell_dtype.keys())

In [82]:
cornell_df = pd.read_csv('cornell_top_307.csv', dtype=cornell_dtype, usecols=cornell_dtype.keys())
harvard_df = pd.read_csv('harvard_top_494.csv', dtype=cornell_dtype, usecols=cornell_dtype.keys())
uiuc_df = pd.read_csv('uiuc_top_215.csv', dtype=cornell_dtype, usecols=cornell_dtype.keys())

In [4]:
berkeley_df['comments'] = berkeley_df['comments'].apply(lambda x: eval(x))

In [89]:
berkeley_df['comments']

0      [{'author': 'buckyspunisher', 'body': 'Haha ye...
1      [{'author': 'compstomper1', 'body': 'or flying...
2      [{'author': 'BrainyCardinal45', 'body': '@Deba...
3      [{'author': 'Money_Equivalent6439', 'body': 'A...
4      [{'author': 'Important_Ad_3688', 'body': 'This...
                             ...                        
333    [{'author': 'sunoosupremacy', 'body': 'that is...
334    [{'author': 'PotentiallyExplosive', 'body': "i...
335    [{'author': 'LugnutsK', 'body': 'B for Berkele...
336    [{'author': 'gotransfersgobears', 'body': "The...
337    [{'author': 'None', 'body': 'Number 1 in the w...
Name: comments, Length: 338, dtype: object

In [5]:
cornell_df['comments'] = cornell_df['comments'].apply(lambda x: eval(x))
harvard_df['comments'] = harvard_df['comments'].apply(lambda x: eval(x))
uiuc_df['comments'] = uiuc_df['comments'].apply(lambda x: eval(x))

In [96]:
def process_comments(row):
    comments_list = eval(row['comments'])
    interactions = []
    # authors = [comment['author'] for comment in comments_list if comment['author'] != '[deleted]']
    # # Create a list of all pairs of authors that have commented on the same post
    # interactions = list(combinations(authors, 2))
    for comment in comments_list:
        if comment['author'] != '[deleted]':
            interactions.append((row['author'], comment['author']))
    return interactions

In [97]:
def create_interactions(df):
    all_interactions = []
    for index, row in df.iterrows():
        all_interactions.extend(row['interactions'])
        
    return all_interactions

In [98]:
def initialize_populate_network(all_interactions):
    graph = nx.Graph()
    for interaction in all_interactions:
        parent, child = interaction
        if parent == child:
            continue
        if graph.has_edge(parent, child):
            continue
        elif graph.has_edge(child, parent):
            continue
        else:
            graph.add_edge(parent, child)
            
    return graph 

### Write created network as edgelists

In [99]:
berkeley_df['interactions'] = berkeley_df.apply(lambda x: process_comments(x), axis = 1)
all_interactions_berkeley = create_interactions(berkeley_df)
berkeley_graph = initialize_populate_network(all_interactions_berkeley)

In [100]:
nx.write_edgelist(berkeley_graph, "berkeley_graph_sentiment.edgelist")

In [101]:
cornell_df['interactions'] = cornell_df.apply(lambda x: process_comments(x), axis = 1)
all_interactions_cornell = create_interactions(cornell_df)
cornell_graph = initialize_populate_network(all_interactions_cornell)

In [102]:
nx.write_edgelist(cornell_graph, "cornell_graph_sentiment.edgelist")

In [103]:
harvard_df['interactions'] = harvard_df.apply(lambda x: process_comments(x), axis = 1)
all_interactions_harvard = create_interactions(harvard_df)
harvard_graph = initialize_populate_network(all_interactions_harvard)

In [104]:
nx.write_edgelist(harvard_graph, "harvard_graph_sentiment.edgelist")

In [105]:
uiuc_df['interactions'] = uiuc_df.apply(lambda x: process_comments(x), axis = 1)
all_interactions_uiuc = create_interactions(uiuc_df)
uiuc_graph = initialize_populate_network(all_interactions_uiuc)

In [106]:
nx.write_edgelist(uiuc_graph, "uiuc_graph_sentiment.edgelist")

In [107]:
len(uiuc_graph.nodes)

1507

### Compute Centrality Measures

In [108]:
# compute centrality measures
deg_centrality = nx.degree_centrality(berkeley_graph)
btw_centrality = nx.betweenness_centrality(berkeley_graph, normalized = True, 
                                              endpoints = False)
eig_centrality = nx.eigenvector_centrality(berkeley_graph)
clustering_coefficients = nx.clustering(berkeley_graph)
pagerank = nx.pagerank(berkeley_graph, 0)

In [111]:
# compute centrality measures
deg_centrality = nx.degree_centrality(cornell_graph)
btw_centrality = nx.betweenness_centrality(cornell_graph, normalized = True, 
                                              endpoints = False)
eig_centrality = nx.eigenvector_centrality(cornell_graph)
clustering_coefficients = nx.clustering(cornell_graph)
pagerank = nx.pagerank(cornell_graph, 0)

In [114]:
# compute centrality measures
deg_centrality = nx.degree_centrality(harvard_graph)
btw_centrality = nx.betweenness_centrality(harvard_graph, normalized = True, 
                                              endpoints = False)
eig_centrality = nx.eigenvector_centrality(harvard_graph)
clustering_coefficients = nx.clustering(harvard_graph)
pagerank = nx.pagerank(harvard_graph, 0)

In [117]:
# compute centrality measures
deg_centrality = nx.degree_centrality(uiuc_graph)
btw_centrality = nx.betweenness_centrality(uiuc_graph, normalized = True, 
                                              endpoints = False)
eig_centrality = nx.eigenvector_centrality(uiuc_graph)
clustering_coefficients = nx.clustering(uiuc_graph)
pagerank = nx.pagerank(uiuc_graph, 0)

### Save Centrality Data into DataFrame

In [118]:
row = defaultdict(list)
for entry in deg_centrality:
    row['author'].append(entry) 
    row['btw_centrality'].append(btw_centrality[entry])
    row['deg_centrality'].append(deg_centrality[entry])
    row['eig_centrality'].append(eig_centrality[entry])
    row['pagerank'].append(pagerank[entry])
    row['clustering_coeff'].append(clustering_coefficients[entry])
# berkeley_centrality_df = pd.DataFrame(row)
# cornell_centrality_df = pd.DataFrame(row)
# harvard_centrality_df = pd.DataFrame(row)
uiuc_centrality_df = pd.DataFrame(row)

In [110]:
berkeley_centrality_df.to_csv('data/berkeley_centrality_poster.csv')

In [119]:
uiuc_centrality_df.to_csv('data/uiuc_centrality_sentiment.csv')

In [113]:
cornell_centrality_df.to_csv('data/cornell_centrality_poster.csv')

In [116]:
harvard_centrality_df.to_csv('data/harvard_centrality_poster.csv')

In [12]:
berkeley_centrality_df

Unnamed: 0,author,btw_centrality,deg_centrality,eig_centrality,pagerank,clustering_coeff
0,yoyoyaass,0.004013,0.005352,0.034852,0.000167,0.038306
1,buckyspunisher,0.007701,0.008028,0.058060,0.000167,0.039894
2,funkyfaithy,0.000008,0.000502,0.002205,0.000167,0.000000
3,novared19,0.004117,0.005519,0.034038,0.000167,0.026515
4,DragoSphere,0.000456,0.002007,0.031810,0.000167,0.181818
...,...,...,...,...,...,...
5975,Bruinburner_1919,0.000000,0.000167,0.001251,0.000167,0.000000
5976,octobersotherveryown,0.000000,0.000167,0.001251,0.000167,0.000000
5977,withluna,0.000000,0.000167,0.001251,0.000167,0.000000
5978,SchwartzBwifU,0.000000,0.000167,0.001251,0.000167,0.000000
