In [None]:
#NETWORKX / JSON / DATAFRAME / ADJMX / METRICS+SCATTER / VIZ+GEPHI

In [None]:
%pylab inline
import numpy as np
import pandas as pd
import networkx as nx

# NetworkX

In [None]:
#create
demo = nx.Graph()

consonants = list(set("bcdfghjklmnpqrstvwxyz"))
vowels = list(set("aeiou"))

#add nodes
for c in consonants:
    demo.add_node(c)
for v in vowels:
    demo.add_node(v)
    
#add random edges
for i in range(50):
    demo.add_edge(numpy.random.choice(consonants),numpy.random.choice(consonants))
for i in range(100):
    demo.add_edge(numpy.random.choice(vowels),numpy.random.choice(vowels))

#draw
nx.draw(demo, with_labels=True, node_color=['red' if i in set('aeiou') else 'cyan' for i in demo.nodes_iter()])

In [None]:
print 'connected components',[cc for cc in nx.connected_components(demo)]
print
print 'degrees',np.array(nx.degree(demo))
print
print 'clustering_coefficients',np.array(nx.clustering(demo))
print
print 'diameter_vowels:',nx.diameter(demo.subgraph(vowels)),'- dimameter_consonant:',nx.diameter(demo.subgraph(consonants))

# Data Import

In [None]:
#get data, as jsons, from http://bank.top-ix.org/banks

In [None]:
import urllib2
import json
request = urllib2.urlopen('http://bank.top-ix.org/banks')
data = json.loads(request.read())
#connect
index = [(entry['BANK_ID'],entry['BANK']) for entry in data['data']]
payload = json.loads(urllib2.urlopen('http://bank.top-ix.org/data/'+str(index[0][0])).read())
#init dataframe
fields = np.array(payload['data'][0].keys())
data = pd.DataFrame({i:[] for i in range(6)})
data.columns = fields
data

In [None]:
#EX_1: fill in

#download data for each bank

#to append a row to a DF, do
#data.loc[len(data)] = np.array(element.values())

In [None]:
data.columns = ['gdp','origin','millions','exposure','perc','bank']
rearrange_columns = ['bank','origin','exposure','gdp','millions','perc']
data = data[rearrange_columns]
data.head()

# Data Cleaning

In [None]:
#EX_2: poke around - any data cleaning?


# Make DataFrames

In [None]:
#data:all, nndata:nation->nation, bdata:bipartite

In [None]:
print len(data)
data.head()

In [None]:
nndata = pd.DataFrame(data.groupby(['origin','exposure'], as_index=False)['millions'].sum())
print len(nndata)
nndata.head()

In [None]:
bdata = data[['bank','exposure','millions']]
print len(bdata)
bdata.head()

# Adjacency Matrices (nndata and bdata)

In [None]:
nations = sorted(pd.concat([data.origin, data.exposure]).unique())
banks = sorted(data.bank.unique())
print len(nations),'nations,',len(banks),'banks'

In [None]:
print nations

In [None]:
adjm_w_nn = np.zeros([len(nations),len(nations)])
adjm_b_nn = np.zeros([len(nations),len(nations)])
for i,ie in enumerate(nations):
    for j,je in enumerate(nations):
        if ie in np.array(nndata['origin']) and je in np.array(nndata[nndata['origin']==ie]['exposure']):
            adjm_w_nn[i,j]=nndata[(nndata['origin']==ie) & (nndata['exposure']==je)]['millions']
            adjm_b_nn[i,j]=1
           
figure(figsize=(5,5))
pcolormesh(adjm_b_nn, cmap=cm.binary)

In [None]:
adjm_w_b = np.zeros([len(banks),len(nations)])
adjm_b_b = np.zeros([len(banks),len(nations)])
for i,ie in enumerate(banks):
    for j,je in enumerate(nations):
        if ie in np.array(bdata['bank']) and je in np.array(bdata[bdata['bank']==ie]['exposure']):
            adjm_w_b[i,j]=bdata[(bdata['bank']==ie) & (bdata['exposure']==je)]['millions']
            adjm_b_b[i,j]=1

figure(figsize=(5,10))
pcolormesh(adjm_b_b, cmap=cm.binary)
plt.axis([0, len(nations), 0, len(banks)])

In [None]:
#order is arbitrary

# Graph-Viz, part 1

In [None]:
#building graph, edge by edge
Gnn = nx.Graph()
for i in range(adjm_b_nn.shape[0]):
    for j in range(adjm_b_nn.shape[1]):
        if adjm_b_nn[i,j]==1:
            Gnn.add_edge(nations[i],nations[j])
figure(figsize=(10,10))
nx.draw(Gnn,with_labels=True)

In [None]:
#the easy way
Gnn = nx.from_numpy_matrix(adjm_b_nn)
figure(figsize=(10,10))

nx.draw(Gnn,with_labels=True)

In [None]:
#and a bipartite graph

#building
from scipy.sparse import csr_matrix
from networkx.algorithms import bipartite
sparse_adj_matrix_b = csr_matrix(adjm_b_b)
Gb = bipartite.from_biadjacency_matrix(sparse_adj_matrix_b)

#drawing
figure(figsize=(5,15))
X, Y = bipartite.sets(Gb)
pos = dict()
pos.update((n, (0, i*10)) for i, n in enumerate(X))
pos.update((n, (0.5, i*10)) for i, n in enumerate(Y))
nx.draw(Gb, pos=pos)
plt.show()

In [None]:
#Awful, right? Visualisation is not necessarily enlightning 
#We'll see something nicer further on. For now we'll use the graphs to compute metrics

# Metrics

In [None]:
#EX_3: Matrics! What can you obtain by groupby? What by using nx methods? How can you viz the results?

In [None]:
#out-degree (nation-clustered)
for name,group in nndata.groupby('origin'):
    print name,len(group.groupby('exposure')),

In [None]:
#in-degree (nation-clustered)
for name,group in nndata.groupby('exposure'):
    print name,len(group.groupby('origin')),

In [None]:
#out-degree (bipartite)
for name,group in bdata.groupby('bank'):
    print name,len(group.groupby('exposure')),

In [None]:
#in-degree (bipartite)
for name,group in bdata.groupby('exposure'):
    print name,len(group.groupby('bank')),

In [None]:
#what if I want to see them sorted?
sorted([(len(group.groupby('exposure')),name) for name,group in bdata.groupby('bank')])

In [None]:
#degree_nn = in+out
degrees_nn = (adjm_b_nn.sum(axis=1)+adjm_b_nn.sum(axis=0)-np.diagonal(adjm_b_nn)).astype('int')
print zip(nations,degrees_nn)
plot(sorted(degrees_nn), 'bx')
xlabel('list of nations')
ylabel('degree')

In [None]:
plt.hist(degrees_nn)
xlabel('degree')
ylabel('count')

In [None]:
#banks out_degree in bipartite graph
degrees_b_banks = sorted(adjm_b_b.sum(axis=1).astype('int'))
print degrees_b_banks
plot(degrees_b_banks, 'bx')
xlabel('list of banks')
ylabel('degree')

In [None]:
plt.hist(degrees_b_banks)
xlabel('degree')
ylabel('count')

In [None]:
#nations in_degree in bipartite graph
degrees_b_nations = sorted(adjm_b_b.sum(axis=0).astype('int'))
print degrees_b_nations
plot(degrees_b_nations, 'bx')
xlabel('list of nations')
ylabel('degree')

In [None]:
plt.hist(degrees_b_nations)
xlabel('degree')
ylabel('count')

In [None]:
nu_order = argsort(degrees_nn)
sorted_adjm_b_nn = np.copy(adjm_b_nn)
sorted_adjm_b_nn = sorted_adjm_b_nn[nu_order,:]
sorted_adjm_b_nn = sorted_adjm_b_nn[:,nu_order]
figure(figsize=(5,5))
pcolormesh(sorted_adjm_b_nn, cmap=cm.binary)

In [None]:
#now we finally use the graphs
nn_betweenness = nx.betweenness_centrality(Gnn)
print nn_betweenness
plot(sorted(nn_betweenness.values()))
xlabel('list of nations')
ylabel('betweenness')

In [None]:
#let's have a closer look
sorted([(v,nations[k]) for k,v in nx.betweenness_centrality(Gnn).items()])

In [None]:
plt.hist(nn_betweenness.values())
xlabel('betweenness')
ylabel('count')

In [None]:
#Scatterplots!

In [None]:
nation_agg = pd.DataFrame()
nation_agg['name'] = nations
nation_agg['nn_degree'] = degrees_nn
nation_agg['nn_indegree'] = adjm_b_nn.sum(axis=0).astype('int')
nation_agg['nn_outdegree'] = adjm_b_nn.sum(axis=1).astype('int')
nation_agg['bank_indegree'] = adjm_b_b.sum(axis=0).astype('int')
nation_agg['debt'] = np.array(data.groupby('exposure')['millions'].apply(sum))
nation_agg['betweenness'] = nn_betweenness.values()
nation_agg

In [None]:
banks_agg = pd.DataFrame()
banks_agg['name'] = banks
banks_agg['degree'] = adjm_b_b.sum(axis=1).astype('int')
banks_agg['owned_debt'] = np.array(data.groupby('bank')['millions'].apply(sum))
banks_agg

In [None]:
def scatterplot(x,y,l):
    figure(figsize=(15,15))
    plt.scatter(x,y)
    for i,label in enumerate(l):
        plt.text(x[i],y[i],label)

In [None]:
scatterplot(adjm_w_nn.sum(axis=1),adjm_w_nn.sum(axis=0),nations)

In [None]:
banks_agg.head()

In [None]:
scatterplot(banks_agg['degree'],banks_agg['owned_debt'],banks)
xlabel('degree')
ylabel('owned_debt')

In [None]:
nation_agg.head()

In [None]:
scatterplot(nation_agg['debt'],nation_agg['bank_indegree'],nations)
xlabel('debt')
ylabel('bank_indegree')

# Community Detection

In [None]:
import community
c = community.best_partition(Gnn)

In [None]:
['red','blue']

In [None]:
import community
c = community.best_partition(Gnn)
colors = ['red','blue','green','cyan']
nx.draw(Gnn,
        pos=nx.fruchterman_reingold_layout(Gnn), 
        cmap=plt.cm.hsv,
        node_color=[colors[i] for i in [c[node] for node in Gnn.nodes()]])

In [None]:
clustered_nations = [n for (v,n) in sorted([(v,nations[k]) for (k,v) in c.items()])]
nu_order = argsort(argsort(clustered_nations))
sorted_adjm_b_nn = np.copy(adjm_b_nn)
sorted_adjm_b_nn = sorted_adjm_b_nn[nu_order,:]
sorted_adjm_b_nn = sorted_adjm_b_nn[:,nu_order]
figure(figsize=(5,5))
pcolormesh(sorted_adjm_b_nn, cmap=cm.binary)

# Graph-Viz, part 2

In [None]:
G_nn_exp = nx.DiGraph()
for i in range(adjm_w_nn.shape[0]):
    for j in range(adjm_w_nn.shape[1]):
        if adjm_w_nn[i,j]>0:
            #print nations[i],nations[j],adjm_w_nn[i,j]
            G_nn_exp.add_edge(nations[i],nations[j],weight=adjm_w_nn[i,j])
nx.write_gml(G_nn_exp,'graph_nn.gml')

G_b_exp = nx.DiGraph()
for k in banks:
    G_b_exp.add_node(k,bipartite='bank')
for k in nations:
    G_b_exp.add_node(k,bipartite='nation')
for i in range(adjm_w_b.shape[0]):
    for j in range(adjm_w_b.shape[1]):
        if adjm_w_b[i,j]>0:
            #print banks[i],nations[j],adjm_w_b[i,j]
            G_b_exp.add_edge(banks[i],nations[j],weight=adjm_w_b[i,j])
nx.write_gml(G_b_exp,'graph_b_bip.gml')

In [None]:
#to gephi!