In [3]:
from operator import itemgetter
import networkx as nx
import wikipedia
import matplotlib.pyplot as plt

START = "Armenia"

F = nx.DiGraph()

to_scrape_list = [(0, START)]
to_scrape_set = set(to_scrape_list)
done_set = set()

layer, page = to_scrape_list[0]

while layer < 2:
    del to_scrape_list[0]
    done_set.add(page)
    print(layer, page) # Show progress
    
    try:
        wiki = wikipedia.page(page)
    except KeyboardInterrupt:
        raise
    except:
        layer, page = to_scrape_list[0]
        print("Could not load", page)
        continue

    for link in wiki.links:
        link = link.title()
        if not link.startswith("List Of") or not link.startswith("Economy Of"):
            if link not in to_scrape_set and link not in done_set:
                to_scrape_list.append((layer + 1, link))
                to_scrape_set.add(link)
            F.add_edge(page, link)
    layer, page = to_scrape_list[0]

    
    
core = [node for node, deg in F.degree() if deg >= 300]
G = nx.subgraph(F, core)
print("{} nodes, {} edges".format(len(G), nx.number_of_edges(G)))

nx.write_graphml(G, "wiki.graphml")

G = nx.read_graphml("wiki.graphml")

top_indegree = sorted(G.in_degree(),reverse=True, key=itemgetter(1))[:100]
print("\n".join(map(lambda t: "{} {}".format(*reversed(t)), top_indegree)))


size_ = list(G.in_degree())
size_ = [j for i,j in size_]


nx.draw(G,linewidths = 2,width = 0.2, edgecolors = "white", font_size = 8, edge_color = "b", with_labels = True)
plt.savefig("wiki.png", dpi = 900)

In [56]:
from operator import itemgetter
import networkx as nx
import wikipedia
import matplotlib.pyplot as plt

from plotly.offline import download_plotlyjs, init_notebook_mode,  iplot, plot
init_notebook_mode(connected=True)

F = nx.read_graphml("wiki.graphml.xml")

core = [node for node, deg in F.degree() if deg >= 600]
G = nx.subgraph(F, core)
print("{} nodes, {} edges".format(len(G), nx.number_of_edges(G)))

15 nodes, 171 edges


In [67]:
G.in_degree()

InDegreeView({'Bibliography Of Armenia': 5, 'International Standard Book Number': 14, 'Economy Of Armenia': 14, 'China': 10, 'Switzerland': 13, 'Poland': 13, 'Italy': 13, 'Russia': 14, 'Armenia': 14, 'Hayastan': 4, 'Turkey': 14, 'Hungary': 12, 'Germany': 14, 'Lithuania': 12, 'Infrastructure Of Armenia': 5})

In [61]:
top_indegree = sorted(G.in_degree(),reverse=True, key=itemgetter(1))[:5]
top_indegree

[('International Standard Book Number', 14),
 ('Economy Of Armenia', 14),
 ('Russia', 14),
 ('Armenia', 14),
 ('Turkey', 14)]

In [62]:
top_outdegree = sorted(G.out_degree(),reverse=True, key=itemgetter(1))[:5]
top_outdegree

[('Bibliography Of Armenia', 15),
 ('Armenia', 15),
 ('Hayastan', 15),
 ('Infrastructure Of Armenia', 15),
 ('Italy', 12)]

In [64]:
top_degree = sorted(G.degree(),reverse=True, key=itemgetter(1))[:5]
top_degree

[('Armenia', 29),
 ('Russia', 26),
 ('Turkey', 26),
 ('Germany', 26),
 ('Italy', 25)]

In [78]:
G.in_degree()

InDegreeView({'Bibliography Of Armenia': 5, 'International Standard Book Number': 14, 'Economy Of Armenia': 14, 'China': 10, 'Switzerland': 13, 'Poland': 13, 'Italy': 13, 'Russia': 14, 'Armenia': 14, 'Hayastan': 4, 'Turkey': 14, 'Hungary': 12, 'Germany': 14, 'Lithuania': 12, 'Infrastructure Of Armenia': 5})

In [None]:
nx.spring_layout(g, random_state=779)

In [92]:
pos=nx.fruchterman_reingold_layout(G)

Xn=[pos[k][0] for k in list(pos.keys())]
Yn=[pos[k][1] for k in list(pos.keys())]

trace_nodes=dict(type='scatter',
                 x=Xn, 
                 y=Yn,
                 mode='markers',
                 marker=dict(size=28, color='rgb(0,240,0)'),
                 text=list(pos.keys()),
                 hoverinfo='text')

Xe=[]
Ye=[]
for e in G.edges():
    Xe.extend([pos[e[0]][0], pos[e[1]][0], None])
    Ye.extend([pos[e[0]][1], pos[e[1]][1], None])
    
trace_edges=dict(type='scatter',
                 mode='lines',
                 x=Xe,
                 y=Ye,
                 line=dict(width=1, color='rgb(25,25,25)'),
                 hoverinfo='none' 
                )

axis=dict(showline=False, # hide axis line, grid, ticklabels and  title
          zeroline=False,
          showgrid=False,
          showticklabels=False,
          title='' 
          )
layout=dict(title= 'My Graph',  
            font= dict(family='Balto'),
            width=600,
            height=600,
            autosize=False,
            showlegend=False,
            xaxis=axis,
            yaxis=axis,
            margin=dict(
            l=40,
            r=40,
            b=85,
            t=100,
            pad=0,
       
    ),
    hovermode='closest',
    plot_bgcolor='#efecea', #set background color            
    )


fig = dict(data=[trace_edges, trace_nodes], layout=layout)

def make_annotations(pos, anno_text, font_size=14, font_color='rgb(10,10,10)'):
    annotations = []
    for k in list(pos.keys()):
        annotations.append(dict(text=k, 
                                x=pos[k][0], 
                                y=pos[k][1], #+0.075,#this additional value is chosen by trial and error
                                xref='x1', yref='y1',
                                font=dict(color= font_color, size=font_size),
                                showarrow=False)
                          )
    return annotations 

fig['layout'].update(annotations=make_annotations(pos, list(pos.keys())))

iplot(fig)

In [88]:
# nx.draw(G,linewidths = 2,width = 0.2, edgecolors = "white", font_size = 8, edge_color = "b", with_labels = True)