# negative_graph
0. (sentiment_graphs copy)
1. import gml file
2. write to directed and undirected gml file
3. component and connectivity analysis
4. plot graph components and subgraphs

In [1]:
import networkx as nx
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import os
from glob import glob

pd.set_option('display.mpl_style', 'default') 
pd.set_option('display.width', 5000) 
pd.set_option('display.max_columns', 60)

#gml_files = glob('../output/network/*/*.gml')

def calculate_graph_inf(graph):
    graph.name = filename
    info = nx.info(graph)
    print info

def plot_graph(graph):
    info = nx.info(graph)
    print info
    plt.figure(figsize=(10,10))
    nx.draw_spring(graph, with_labels = True)

# start here

In [2]:
graph = nx.read_gml("../output/network/article_neg1.gml")
ugraph = graph.to_undirected()
U = graph.to_undirected(reciprocal=True)
e = U.edges()
ugraph.add_edges_from(e)

In [3]:
def drawIt(graph, what = 'graph'):
    nsize = graph.number_of_nodes()
    print "Drawing %s of size %s:" % (what, nsize)
    
    if nsize > 20:
        plt.figure(figsize=(10, 10))
        if nsize > 40:
            nx.draw_spring(graph, with_labels = True, node_size = 70, font_size = 12)
        else:
            nx.draw_spring(graph, with_labels = True)
    else:
        nx.draw_spring(graph, with_labels = True)
    plt.show()

def describeGraph(graph):
    components = sorted(nx.connected_components(graph), key = len, reverse = True)
    cc = [len(c) for c in components]
    subgraphs = list(nx.connected_component_subgraphs(graph))
    params = (graph.number_of_edges(),graph.number_of_nodes(),len(cc))
    print "Graph has %s nodes, %s edges, %s connected components\n" % params
    drawIt(graph)
    for sub in components:
        drawIt(graph.subgraph(sub), what = 'component')

## Components & connectivity

In [4]:
# list of connected components (sets of nodes), starting with largest
print "List of connected components =", [len(c) for c in sorted(nx.connected_components(ugraph), key=len, reverse=True)]

# generate connected components as subgraphs; Gc is largest component
subgraphs = list(nx.connected_component_subgraphs(ugraph))

List of connected components = [1140, 7, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]


### Greatest component

In [5]:
Gc = max(nx.connected_component_subgraphs(ugraph), key=len)
print "Size of greatest component =", len(Gc)

Size of greatest component = 1140


Moody and White provide an algorithm for identifying k-components in a graph, which is based on Kanevsky’s algorithm for finding all minimum-size node cut-sets of a graph (implemented in all_node_cuts() function):

1. Compute node connectivity, k, of the input graph G.
2. Identify all k-cutsets at the current level of connectivity using Kanevsky’s algorithm.
3. Generate new graph components based on the removal of these cutsets. Nodes in a cutset belong to both sides of the induced cut.
4. If the graph is neither complete nor trivial, return to 1; else end.

In [6]:
# returns all minimum k cutsets of an undirected graph
# i.e., the set(s) of nodes of cardinality equal to the node connectivity of G
# thus if removed, would break G into two or more connected components
cutsets = list(nx.all_node_cuts(Gc))
print "# of cutsets =", len(cutsets)

# of cutsets = 226


In [7]:
# returns a set of nodes or edges of minimum cardinality that disconnects G
print "Min node cut =", nx.minimum_node_cut(Gc, s='vaccines', t='autism')
print "Min edge cut =", nx.minimum_edge_cut(Gc)

Min node cut = set([u'mercury', u'toxic chemical ingredients', u'vaccine safety', u"alzheimer's", u'measles', u'CDC whistleblower', u'CDC', u'vaccine-injured children', u'thimerosal', u'vaccination', u'public', u'rates', u'encephalopathy', u'Vaccine Injury Compensation Program', u'MMR', u'studies', u'children', u'hepatitis B vaccine'])
Min edge cut = set([(u'influenza', u'childhood diseases')])


In [8]:
nx.minimum_node_cut(Gc, s='vaccines', t='autism')

{u'CDC',
 u'CDC whistleblower',
 u'MMR',
 u'Vaccine Injury Compensation Program',
 u"alzheimer's",
 u'children',
 u'encephalopathy',
 u'hepatitis B vaccine',
 u'measles',
 u'mercury',
 u'public',
 u'rates',
 u'studies',
 u'thimerosal',
 u'toxic chemical ingredients',
 u'vaccination',
 u'vaccine safety',
 u'vaccine-injured children'}

In [9]:
a = nx.minimum_edge_cut(Gc, s='autism', t='vaccines')
a

{(u'African American males', u'MMR'),
 (u'CDC whistleblower', u'mainstream media'),
 (u'autism', u'CDC'),
 (u'autism', u'MMR'),
 (u'autism', u'Ryan Mojabi'),
 (u'autism', u'Vaccine Injury Compensation Program'),
 (u'autism', u"alzheimer's"),
 (u'autism', u'children'),
 (u'autism', u'encephalopathy'),
 (u'autism', u'glutathione'),
 (u'autism', u'hepatitis B vaccine'),
 (u'autism', u'measles'),
 (u'autism', u'preservative'),
 (u'autism', u'public'),
 (u'autism', u'studies'),
 (u'autism', u'thimerosal'),
 (u'autism', u'toxic chemical ingredients'),
 (u'autism', u'vaccination'),
 (u'autism', u'vaccine safety'),
 (u'autism', u'vaccine-injured children'),
 (u'autism', u'vaccines'),
 (u'link', u'CDC'),
 (u'military records', u'measles decline'),
 (u'newborn boys', u'hepatitis B vaccine')}

In [10]:
labels = nx.get_edge_attributes(Gc,'edge')
edgelabels = {}
for e in labels.keys():
    e1 = e[0:2]
    edgelabels[e1]=labels[e]
edgelabels

{(u'labor and delivery floor', u'parents'): u'walking into the',
 (u'public', u'autism'): u'wonders why they have',
 (u'Merck', u'lies'): u'exposed for',
 (u'patients', u'payment'): u'fight for',
 (u'holocaust',
  u'vaccine apologists'): u'relish in aiding and abetting a real-life',
 (u'Prevnar', u'vaccination'): u'added to childhood',
 (u'infectious diseases',
  u'people'): u'more than 500 million have died as a result of',
 (u'sicker children',
  u'vaccination'): u'dramatically increasing the number during the past 30 years led to',
 (u'chicken kidney cells',
  u'flu shots'): u'contain questionable ingredients such as',
 (u'newborns', u'hepatitis B vaccine'): u'routine use began 1992 for all',
 (u'government healthcare reform',
  u'mandatory vaccines'): u'may include trying to make',
 (u'safe', u'evidence'): u'may not prove the flu shot is',
 (u'drug companies',
  u'national vaccination policy'): u'astoundingly effective at manipulating',
 (u'higher doses', u'profits'): u'to maximize

In [11]:
for e in a:
    if edgelabels.has_key(e):
        print e,edgelabels[e]
    else:
        rev_e = e[::-1]
        print rev_e, edgelabels[rev_e]

(u"alzheimer's", u'autism') brain cells same patterns with
(u'Vaccine Injury Compensation Program', u'autism') found in two kids after being vaccinated awarded by
(u'toxic chemical ingredients', u'autism') may be significant contributing factor to
(u'studies', u'autism') but where are of all the others in relation to
(u'vaccine safety', u'autism') questionable in relation to
(u'vaccination', u'autism') caused
(u'MMR', u'African American males') were at increased risk for autism who received before age 36 months
(u'CDC', u'link') obscure an existing
(u'autism', u'vaccine-injured children') often revealed to have
(u'vaccines', u'autism') debate rages on
(u'newborn boys', u'hepatitis B vaccine') more than tripled their risk of developing autism spectrum disorder in
(u'public', u'autism') wonders why they have
(u'preservative', u'autism') in vaccine causes
(u'autism', u'measles') some parents feel more dangerous than
(u'thimerosal', u'autism') was directly caused by
(u'glutathione', u'auti

In [None]:
# this takes forever
# average connectivity k of a graph G is the average of local node connectivity over all pairs of nodes of G
#nx.average_node_connectivity(Gc)

In [None]:
# NEW SUMMARY

print "List of connected components =", [len(c) for c in sorted(nx.connected_components(ugraph), key=len, reverse=True)]
print "Size of greatest component =", len(Gc)
print "# of cutsets =", len(cutsets)
print "Min node cut =", nx.minimum_node_cut(Gc)
print "Min edge cut =", nx.minimum_edge_cut(Gc)