In [35]:
# Loading required packages
import operator           # Used to sort the values of closeness centrality and page rank.
import tabulate as tb     # Used to show the results in tabular format

In [37]:
dataset = open("C:/Users/samiu/polblogs.edgelist.simple_format_unweighted", "r")
graph = {}
for line in dataset:
    firstColumn, secondColumn = line.split()
    if firstColumn not in graph:
        graph[firstColumn] = {}
        graph[firstColumn][secondColumn] = {}
    else:
        graph[firstColumn][secondColumn] = {}

    if secondColumn not in graph:
        graph[secondColumn] = {}
        graph[secondColumn][firstColumn] = {}
    else:
        graph[secondColumn][firstColumn] = {}
    
    
    """Implementation of Shortest Path Function        
    """    
def findShortestPath(graph,source):
    visited={}                  
    level=0               
    nextlevel={source:1}  
    while nextlevel:
        actuallevel=nextlevel  
        nextlevel={}         
        for v in actuallevel:
            if v not in visited:
                visited[v]=level 
                nextlevel.update(graph[v]) 
        level=level+1
    return visited  

        
    """Implementation of Closeness Centrality Function  
    """
def ClosenessCentralityFunction(Graf, u=None):
    closeness_centrality = {}
    vertex = []
    i = 0
    for key, value in Graf.items():
        vertex.append(key)
    
    for n in vertex:
        dictShortestPath = findShortestPath(Graf,n)
        totalPath = sum(dictShortestPath.values())
        if totalPath > 0.0 and len(Graf) > 1:
            closeness_centrality[n] = (len(dictShortestPath)-1.0) / totalPath
            s = (len(dictShortestPath)-1.0) / ( len(Graf) - 1 )
            closeness_centrality[n] *= s
        else:
            closeness_centrality[n] = 0.0
    if u is not None:
        return closeness_centrality[u]
    else:
        return closeness_centrality

def top25pages(scores):
    """
    Display the top 25 pagess in the graph ranked by
    their closeness centrality scores
    """
    table = sorted(scores.items(), key = lambda x: x[1], reverse = True)
    headers = ["Pages", "Closeness Centrality Scores"]
    print (tb.tabulate(table[:25], headers, tablefmt = "rst"))

"""
Finding the Closeness centrality values of the polblogs graph 
using ClosenessCentralityFunction.     
"""
ClosenessValue = ClosenessCentralityFunction(graph)
sort = sorted(ClosenessValue.items(),key=operator.itemgetter(-1), reverse=True)
for i in sort:
    print(i)

('instapundit.com', 0.5185041559531912)
('dailykos.com', 0.5178433605122993)
('talkingpointsmemo.com', 0.5022675198376402)
('atrios.blogspot.com', 0.4975523553656949)
('littlegreenfootballs.com/weblog', 0.493723479402978)
('washingtonmonthly.com', 0.48936301511278707)
('drudgereport.com', 0.48430801376478055)
('michellemalkin.com', 0.48334784720299473)
('truthlaidbear.com', 0.48030073705514287)
('blogsforbush.com', 0.47766585840358644)
('hughhewitt.com', 0.4759872200882283)
('blogsofwar.com', 0.4743203387727442)
('powerlineblog.com', 0.47376730301047515)
('captainsquartersblog.com/mt', 0.4733993284061952)
('lashawnbarber.com', 0.47321555537498167)
('madkane.com/notable.html', 0.47229882628669223)
('nationalreview.com/thecorner', 0.4693890144959386)
('balloon-juice.com', 0.4690278070973269)
('acertainslantoflight.blogspot.com', 0.46848703714294876)
('wizbangblog.com', 0.4675885196187007)
('talkleft.com', 0.4670510615501734)
('vodkapundit.com', 0.4670510615501734)
('buzzmachine.com', 0.4

('zeph1z.tripod.com/blog', 0.273625874443536)
('boycottsbg.com', 0.2731966092886492)
('bradcarson.com/blog', 0.27264667202996035)
('bushlies.net/pages/10/index.htm', 0.2710703292519352)
('thunewatch.squarespace.com', 0.2699298650677486)
('markwarnerforpresident2008.blogspot.com', 0.2696313361304916)
('andrightlyso.blogspot.com', 0.26832561537441174)
('yoder.ru', 0.26744257802675575)
('truedemocrat.blogspot.com', 0.26373934890652373)
('charlineandjamie.com/dotnetweb01a/blogdisplay.aspx?logname=jamie&logcatid=48', 0.26373934890652373)
('julietterossant.com/superchefblog/superchefblog.html', 0.26203853625235435)
('democratreport.blogspot.com', 0.2593623980097772)
('meanspirit.blogspot.com', 0.2593623980097772)
('amliberal.com/blog', 0.2593623980097772)
('winnegar.blog-city.com', 0.2593623980097772)
('mcwil.blogspot.com', 0.2593623980097772)
('endthenightmare.blogspot.com', 0.2576629191811356)
('depressedlonelybored.typepad.com', 0.2576084680147829)
('margieburns.com', 0.25544913467014935)

In [38]:
top25pages(ClosenessValue)

Pages                                Closeness Centrality Scores
instapundit.com                                         0.518504
dailykos.com                                            0.517843
talkingpointsmemo.com                                   0.502268
atrios.blogspot.com                                     0.497552
littlegreenfootballs.com/weblog                         0.493723
washingtonmonthly.com                                   0.489363
drudgereport.com                                        0.484308
michellemalkin.com                                      0.483348
truthlaidbear.com                                       0.480301
blogsforbush.com                                        0.477666
hughhewitt.com                                          0.475987
blogsofwar.com                                          0.47432
powerlineblog.com                                       0.473767
captainsquartersblog.com/mt                             0.473399
lashawnbarber.com         

In [39]:
"""
To compare/validate our result with the closeness_centrality function 
from networkx package, we can run this test chunk.  
"""
import networkx as nx
polblog_graph = nx.read_edgelist('C:/Users/samiu/Desktop/8009 LAB/polblogs.edgelist.simple_format_unweighted', 
                                 create_using=nx.Graph())
closeness=nx.closeness_centrality(polblog_graph)
sort = sorted(closeness.items(),key=operator.itemgetter(-1), reverse=True)
for i in sort:
    print(i)

('instapundit.com', 0.5185041559531912)
('dailykos.com', 0.5178433605122993)
('talkingpointsmemo.com', 0.5022675198376402)
('atrios.blogspot.com', 0.4975523553656949)
('littlegreenfootballs.com/weblog', 0.493723479402978)
('washingtonmonthly.com', 0.48936301511278707)
('drudgereport.com', 0.48430801376478055)
('michellemalkin.com', 0.48334784720299473)
('truthlaidbear.com', 0.48030073705514287)
('blogsforbush.com', 0.47766585840358644)
('hughhewitt.com', 0.4759872200882283)
('blogsofwar.com', 0.4743203387727442)
('powerlineblog.com', 0.47376730301047515)
('captainsquartersblog.com/mt', 0.4733993284061952)
('lashawnbarber.com', 0.47321555537498167)
('madkane.com/notable.html', 0.47229882628669223)
('nationalreview.com/thecorner', 0.4693890144959386)
('balloon-juice.com', 0.4690278070973269)
('acertainslantoflight.blogspot.com', 0.46848703714294876)
('wizbangblog.com', 0.4675885196187007)
('talkleft.com', 0.4670510615501734)
('vodkapundit.com', 0.4670510615501734)
('buzzmachine.com', 0.4

In [32]:

nodeDictList = nx.to_dict_of_lists(polblog_graph)
class DirectedGraphADT(object):
    
    def __init__(self):
        self.graph = {}
        self.noOfEdges = 0.0
       
    def vertices(self):
        return self.graph.keys()
        
    def getGraph(self):
        if self.graph == None or len(self.graph) == 0:
            raise Exception("Graph is empty")
        return self.graph
     
    def setGraph(self, graph):
        self.graph = graph
        
"""Implementation of Page Rank Function  
"""
def computePageRank(graph, damping_factor = 0.80, eps = 0.00001):
    vertices = len(graph.vertices())    
    old_prank, new_prank = {}, {}
    for vertex in graph.vertices():
        old_prank[vertex] = 1.0/vertices
    for iter_ in range(1, 101):
        print ("Iteration: %d" % iter_)        
        diff = []
        for u in graph.vertices():
            rank = (1-damping_factor)/vertices
            for v in graph.vertices():
                if u in graph.getGraph()[v]:
                    rank += damping_factor*old_prank[v]/len(graph.getGraph()[v])
            diff.append(abs(old_prank[u] - rank))
            new_prank[u] = rank
        old_prank = copy.deepcopy(new_prank)
        new_prank.clear()
        if sum(diff) < eps:
            print ("Total iterations: %d" % iter_)
            break
    return old_prank

def top25pages(scores):
    """
    Display the top 25 users in the graph ranked by
    their page rank scores
    """
    table = sorted(scores.items(), key = lambda x: x[1], reverse = True)
    headers = ["Pages", "Page-Rank Scores"]
    print (tb.tabulate(table[:25], headers, tablefmt = "rst"))

"""Finding the page rank values of the polblogs graph 
    using computePageRank function.
"""
graph = DirectedGraphADT()
graph.setGraph(nodeDictList)
ranks = computePageRank(graph, 0.8)
sort = sorted(ranks.items(),key=operator.itemgetter(-1))
for i in sort:
    print(i)  

Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 6
Iteration: 7
Iteration: 8
Iteration: 9
Iteration: 10
Iteration: 11
Iteration: 12
Iteration: 13
Iteration: 14
Iteration: 15
Iteration: 16
Iteration: 17
Iteration: 18
Iteration: 19
Iteration: 20
Total iterations: 20
('matthewyglesias.com', 0.00018171713898444404)
('philvbblog.blogspot.com', 0.00018243281452626531)
('nebursworld.blogspot.com', 0.00018251408491991086)
('blog01.kintera.com/emilysblog', 0.00018287643403506774)
('grownups.blogspot.com', 0.00018295331219588839)
('duckdaotsu.blogspot.com', 0.00018297773874325393)
('askhoudari.blogspot.com', 0.00018312617368021684)
('peskytherat.com/pesky', 0.00018315036375639483)
('mediaprima.com/nv1962', 0.0001837863239878241)
('trainorphans.org', 0.00018382980081721331)
('blog.nam.org', 0.00018385055850264593)
('justmark.com', 0.00018394659318312823)
('hasidicgentile.org', 0.00018401940779583473)
('orthogonian.blogspot.com', 0.0001841003557363544)
('delraysteve.com/

In [33]:
top25pages(ranks) 

Pages                                    Page-Rank Scores
blogsforbush.com                               0.0130169
dailykos.com                                   0.0100489
drudgereport.com                               0.00878696
instapundit.com                                0.00734596
talkingpointsmemo.com                          0.00716451
atrios.blogspot.com                            0.00674555
powerlineblog.com                              0.00570092
michellemalkin.com                             0.00546002
truthlaidbear.com                              0.00545502
washingtonmonthly.com                          0.00518684
littlegreenfootballs.com/weblog                0.00505204
newleftblogs.blogspot.com                      0.00473009
wizbangblog.com                                0.00468642
lashawnbarber.com                              0.00446137
hughhewitt.com                                 0.00444864
liberaloasis.com                               0.00429988
juancole.com    

In [34]:
"""
To compare/validate our result with the page rank function from networkx package, we can run this test chunk.
"""
pr=nx.pagerank(polblog_graph,0.8)
sort = sorted(pr.items(),key=operator.itemgetter(-1), reverse=True)
for i in sort:
    print(i)

('blogsforbush.com', 0.01301895781552029)
('dailykos.com', 0.0100413888806916)
('drudgereport.com', 0.00878715956670799)
('instapundit.com', 0.007346461630721889)
('talkingpointsmemo.com', 0.0071586272781508425)
('atrios.blogspot.com', 0.006739354972683495)
('powerlineblog.com', 0.005701141968403332)
('michellemalkin.com', 0.005460823619533248)
('truthlaidbear.com', 0.005455412237247717)
('washingtonmonthly.com', 0.005182694273126984)
('littlegreenfootballs.com/weblog', 0.005052865834291799)
('newleftblogs.blogspot.com', 0.004727153323002762)
('wizbangblog.com', 0.0046873582215709465)
('lashawnbarber.com', 0.004463380488201566)
('hughhewitt.com', 0.004449352535004244)
('liberaloasis.com', 0.00429603750272129)
('juancole.com', 0.004157171214577217)
('etalkinghead.com', 0.0038948522326822076)
('gevkaffeegal.typepad.com/the_alliance', 0.0038352652114142)
('andrewsullivan.com', 0.0036716549573829723)
('wonkette.com', 0.0036120066769204364)
('georgewbush.com', 0.0035821345714611564)
('talkl

In [None]:
#####------------------------------END-------------------------------###