In [1]:
import networkx as nx

In [2]:
from prettytable import PrettyTable

Data is political blogs linked to each other.  Most of these blogs links are dead

Lada A. Adamic and Natalie Glance, "The
political blogosphere and the 2004 US Election", in Proceedings of the
WWW-2005 Workshop on the Weblogging Ecosystem (2005)

#####First we will load the data and look at some of its properties

In [3]:
G = nx.read_gml("polblogs.gml")

#####The label is the website and the value is 0 or 1 depending on whether the political blog is left or right leaning

In [4]:
G.node[345]

{'id': 345,
 'label': u'lawdork.blogspot.com',
 'source': u'LeftyDirectory,eTalkingHead',
 'value': 0}

In [5]:
nx.is_directed(G)

True

In [6]:
G.number_of_edges()

19090

In [7]:
G.number_of_nodes()

1490

In [8]:
nx.is_strongly_connected(G)

False

In [9]:
nx.is_weakly_connected(G)

False

#####We will only concern ourselves with the largest weakly connected subgraph

In [10]:
i=nx.weakly_connected_component_subgraphs(G)

In [11]:
l = max(i, key = len)

In [12]:
l.number_of_nodes()

1222

In [13]:
nx.is_weakly_connected(l)

True

##First we will look at centrality at all the nodes in the Graph

###Degree Centrality

In [14]:
deg=nx.degree(G)

In [15]:
deg_cent=nx.degree_centrality(G)

In [16]:
deg_sort=sorted(deg_cent.iteritems(),key=lambda(k,v):(-v,k))

In [17]:
deg_sort[0:9]

[(855, 0.3143049026192075),
 (155, 0.25789120214909333),
 (1051, 0.24378777703156482),
 (55, 0.23572867696440566),
 (641, 0.1900604432505037),
 (729, 0.17192746809939558),
 (963, 0.16453995970449967),
 (1245, 0.1584956346541303),
 (1153, 0.15379449294828745)]

###Betweenness Centrality

In [18]:
bet_cent=nx.betweenness_centrality(G)

In [19]:
bet_sort=sorted(bet_cent.iteritems(),key=lambda(k,v):(-v,k))

In [20]:
bet_sort[0:9]

[(855, 0.09860123355546516),
 (55, 0.041065409701381644),
 (1051, 0.034423597988754114),
 (155, 0.024815500156320013),
 (454, 0.020714412538725437),
 (387, 0.020319988222406524),
 (1479, 0.018325573772945088),
 (1101, 0.016309365690314936),
 (1041, 0.015458192296913366)]

###Closeness Centrality


In [21]:
clo_cent=nx.closeness_centrality(G)

In [22]:
clo_sort=sorted(clo_cent.iteritems(),key=lambda(k,v):(-v,k))

In [23]:
clo_sort[0:9]

[(855, 0.2707203175398935),
 (880, 0.26707623163293015),
 (387, 0.26523353232024066),
 (935, 0.26523353232024066),
 (927, 0.2635289466369486),
 (856, 0.25996473434092904),
 (1427, 0.2583269892694826),
 (1351, 0.25596194816922097),
 (467, 0.25521849022848053)]

###Like the book, we shall look at a table of the blogs for the top measures

In [24]:
names1=[x[0] for x in deg_sort[:10]]
names2=[x[0] for x in bet_sort[:10]]
names3=[x[0] for x in clo_sort[:10]]
names=list(set(names1)|set(names2)|set(names3))

In [67]:
table=[[name,G.node[name]['label'],G.node[name]['value'],deg[name],round(deg_cent[name],4),round(bet_cent[name],4),round(clo_cent[name],4)] for name in names]

In [68]:
table=sorted(table,key=lambda x: -x[3])

In [69]:
t = PrettyTable(['ID','Link','Value','Degree','Degree Cent','Betweenness','Closeness'])
for i in range(0, len(table)):
    t.add_row(table[i])

In [70]:
print(t)

+------+-----------------------------------+-------+--------+-------------+-------------+-----------+
|  ID  |                Link               | Value | Degree | Degree Cent | Betweenness | Closeness |
+------+-----------------------------------+-------+--------+-------------+-------------+-----------+
| 855  |          blogsforbush.com         |   1   |  468   |    0.3143   |    0.0986   |   0.2707  |
| 155  |            dailykos.com           |   0   |  384   |    0.2579   |    0.0248   |   0.2037  |
| 1051 |          instapundit.com          |   1   |  363   |    0.2438   |    0.0344   |   0.242   |
|  55  |        atrios.blogspot.com        |   0   |  351   |    0.2357   |    0.0411   |   0.2368  |
| 641  |       talkingpointsmemo.com       |   0   |  283   |    0.1901   |    0.0055   |   0.1875  |
| 729  |       washingtonmonthly.com       |   0   |  256   |    0.1719   |    0.0147   |   0.2196  |
| 963  |          drudgereport.com         |   1   |  245   |    0.1645   |    0.0

#Now let us look at each of the left and right leaning groups

##First we will create subgraphs for each of the groups

In [52]:
nodes0=[]
nodes1=[]

In [53]:
for i,j in G.nodes_iter(data=True):
    if j['value']==0:
        nodes0.append(i)
    else:
        nodes1.append(i)

In [56]:
G_node0 = G.subgraph(nodes0)
G_node1 = G.subgraph(nodes1)

In [60]:
G_node0.number_of_nodes()

758

In [61]:
G_node1.number_of_nodes()

732

In [64]:
def createTable(graph):
    deg=nx.degree(graph)
    deg_cent=nx.degree_centrality(graph)
    deg_sort=sorted(deg_cent.iteritems(),key=lambda(k,v):(-v,k))
    bet_cent=nx.betweenness_centrality(graph)
    bet_sort=sorted(bet_cent.iteritems(),key=lambda(k,v):(-v,k))
    clo_cent=nx.closeness_centrality(graph)
    clo_sort=sorted(clo_cent.iteritems(),key=lambda(k,v):(-v,k))
    names1=[x[0] for x in deg_sort[:10]]
    names2=[x[0] for x in bet_sort[:10]]
    names3=[x[0] for x in clo_sort[:10]]
    names=list(set(names1)|set(names2)|set(names3))
    table=[[name,graph.node[name]['label'],graph.node[name]['value'],deg[name],round(deg_cent[name],4),round(bet_cent[name],4),round(clo_cent[name],4)] for name in names]
    table=sorted(table,key=lambda x: -x[3])
    t = PrettyTable(['ID','Link','Value','Degree','Degree Cent','Betweenness','Closeness'])
    for i in range(0, len(table)):
        t.add_row(table[i])
    return t

In [65]:
print(createTable(G_node0))

+-----+-----------------------------+-------+--------+-------------+-------------+-----------+
|  ID |             Link            | Value | Degree | Degree Cent | Betweenness | Closeness |
+-----+-----------------------------+-------+--------+-------------+-------------+-----------+
| 155 |         dailykos.com        |   0   |  338   |    0.4465   |    0.0386   |   0.2132  |
|  55 |     atrios.blogspot.com     |   0   |  329   |    0.4346   |    0.064    |   0.2524  |
| 641 |    talkingpointsmemo.com    |   0   |  242   |    0.3197   |    0.0083   |   0.1851  |
| 363 |       liberaloasis.com      |   0   |  214   |    0.2827   |    0.0307   |   0.2609  |
| 729 |    washingtonmonthly.com    |   0   |  205   |    0.2708   |    0.012    |   0.206   |
| 180 |   digbysblog.blogspot.com   |   0   |  182   |    0.2404   |    0.0135   |   0.2462  |
|  99 |   bodyandsoul.typepad.com   |   0   |  180   |    0.2378   |    0.0136   |   0.2588  |
| 144 |    corrente.blogspot.com    |   0   |  177

In [66]:
print(createTable(G_node1))

+------+---------------------------------------+-------+--------+-------------+-------------+-----------+
|  ID  |                  Link                 | Value | Degree | Degree Cent | Betweenness | Closeness |
+------+---------------------------------------+-------+--------+-------------+-------------+-----------+
| 855  |            blogsforbush.com           |   1   |  463   |    0.6334   |    0.2088   |   0.4447  |
| 1051 |            instapundit.com            |   1   |  297   |    0.4063   |    0.0237   |   0.2888  |
| 1153 |           michellemalkin.com          |   1   |  215   |    0.2941   |    0.015    |   0.2778  |
| 963  |            drudgereport.com           |   1   |  212   |     0.29    |    0.0008   |   0.0055  |
| 1101 |           lashawnbarber.com           |   1   |  212   |     0.29    |    0.0353   |   0.3668  |
| 1245 |           powerlineblog.com           |   1   |  210   |    0.2873   |    0.0065   |   0.2476  |
| 1041 |             hughhewitt.com           

###It seems that the right leaning blogs have higher degree and closeness within themselves

###Two blogs stick out, ID 363 and ID 1000 have relatively high degree and centralities within the respective 0 and 1 subgraphs, but are not in the overall top measurements for the whole graph

In [83]:
for i,j in deg_cent.iteritems():
    if i == 363:
        print j

0.145063801209


In [85]:
def getDetails(G, deg, deg_cent, bet_cent, clo_cent, name):
    table=[[name,G.node[name]['label'],G.node[name]['value'],deg[name],round(deg_cent[name],4),round(bet_cent[name],4),round(clo_cent[name],4)]]
    table=sorted(table,key=lambda x: -x[3])
    t = PrettyTable(['ID','Link','Value','Degree','Degree Cent','Betweenness','Closeness'])
    for i in range(0, len(table)):
        t.add_row(table[i])
    return t

In [86]:
print(getDetails(G,deg,deg_cent,bet_cent,clo_cent,363))

+-----+------------------+-------+--------+-------------+-------------+-----------+
|  ID |       Link       | Value | Degree | Degree Cent | Betweenness | Closeness |
+-----+------------------+-------+--------+-------------+-------------+-----------+
| 363 | liberaloasis.com |   0   |  216   |    0.1451   |    0.0143   |   0.2235  |
+-----+------------------+-------+--------+-------------+-------------+-----------+


In [87]:
print(getDetails(G,deg,deg_cent,bet_cent,clo_cent,1000))

+------+---------------------------------------+-------+--------+-------------+-------------+-----------+
|  ID  |                  Link                 | Value | Degree | Degree Cent | Betweenness | Closeness |
+------+---------------------------------------+-------+--------+-------------+-------------+-----------+
| 1000 | gevkaffeegal.typepad.com/the_alliance |   1   |  212   |    0.1424   |    0.0142   |   0.2374  |
+------+---------------------------------------+-------+--------+-------------+-------------+-----------+
