In [1]:
import os
import numpy as np
import pandas as pd
import re
from webweb import Web
import networkx as nx
import operator
import math
import seaborn as sns
import itertools
import matplotlib.pyplot as plt
from networkx.drawing.nx_agraph import graphviz_layout
from pandas import ExcelWriter
from pandas import ExcelFile
from collections import Counter
from networkx import edge_betweenness_centrality as betweenness
import random
from collections import Counter
from collections import defaultdict
from itertools import groupby

In [2]:
#UnDirected, Weighted
def data_undirected_weighted(df_trade,year):
    df_trade = df_trade[df_trade['year']==year]
    nodes_trade = np.unique(df_trade[['importer1','importer2']]).tolist()

    #When both flows are >0 
    temp1 = df_trade[(df_trade['flow1']>0) & (df_trade['flow2']>0)]
    #Edge from importer2 - importer1
    edges_21 = temp1[['importer2','importer1','flow1','flow2']].values.tolist()
    
    #When flow1>0 and flow2 either 0 or -9
    temp2 = df_trade[(df_trade['flow1']>0) & (df_trade['flow2']==0)]
    #Edge from importer2-> importer1
    edges_21_single = temp2[['importer2','importer1','flow1','flow2']].values.tolist()
    
    #When flow1 either 0 or -9 and flow2>0
    temp3 = df_trade[(df_trade['flow1']==0) & (df_trade['flow2']>0)]
    #Edge from importer1-> importer2
    edges_12_single = temp3[['importer1','importer2','flow1','flow2']].values.tolist()
    
    weighted_edges = []
   
    for edge in edges_21:
        weight1 = (edge[2]*1000000)
        weight2 = (edge[3]*1000000)
        weighted_edges.append([edge[0],edge[1],(weight1+weight2)/2])
    
    for edge in edges_21_single:
        weight = (edge[2]*1000000)
        weighted_edges.append([edge[0],edge[1],weight])
        
    for edge in edges_12_single:
        weight = (edge[3]*1000000)
        weighted_edges.append([edge[0],edge[1],weight])
       
    edges = edges_21+edges_21_single+edges_12_single
    
    return nodes_trade,edges,weighted_edges
    

In [None]:
#Directed, Weighted
def data_directed_weighted(df_trade,year):
    df_trade = df_trade[df_trade['year']==year]
#     df_gdp = df_gdp[df_gdp['Year']==year]
    nodes_trade = np.unique(df_trade[['importer1','importer2']]).tolist()

    #When both flows are >0 
    temp1 = df_trade[(df_trade['flow1']>0) & (df_trade['flow2']>0)]
    #Edge from importer2-> importer1
    edges_21 = temp1[['importer2','importer1','flow1']].values.tolist()
    #Edge from importer1->importer2
    edges_12 = temp1[['importer1','importer2','flow2']].values.tolist()
    
    #When flow1>0 and flow2 either 0 or -9
    temp2 = df_trade[(df_trade['flow1']>0) & (df_trade['flow2']==0)]
    #Edge from importer2-> importer1
    edges_21_single = temp2[['importer2','importer1','flow1']].values.tolist()
    
    #When flow1 either 0 or -9 and flow2>0
    temp3 = df_trade[(df_trade['flow1']==0) & (df_trade['flow2']>0)]
    #Edge from importer1-> importer2
    edges_12_single = temp3[['importer1','importer2','flow2']].values.tolist()
    
    weighted_edges = []
   
    for edge in edges_21:
#         d_exp = df_gdp[df_gdp['Country'] == edge[0]]
        weight = (edge[2]*1000000)
        weighted_edges.append([edge[0],edge[1],weight])
        
    for edge in edges_12:
#         d_exp = df_gdp[df_gdp['Country'] == edge[0]]
        weight = (edge[2]*1000000)
        weighted_edges.append([edge[0],edge[1],weight])
    
    for edge in edges_21_single:
#         d_exp = df_gdp[df_gdp['Country'] == edge[0]]
        weight = (edge[2]*1000000)
        weighted_edges.append([edge[0],edge[1],weight])
        
    for edge in edges_12_single:
#         d_exp = df_gdp[df_gdp['Country'] == edge[0]]
        weight = (edge[2]*1000000)
        weighted_edges.append([edge[0],edge[1],weight])
       
    edges = edges_21+edges_12+edges_21_single+edges_12_single
    
    labels = {}
    labels['nodes'] = {}
    for i in range(len(nodes)):
        labels['nodes'][str(i)] = {'name':nodes[i]}
        
    return nodes,labels,edges,weighted_edges

In [3]:
def density_undirected(nodes,edges):
    #Density 
    n = len(nodes)
    total_possible_edges = n*(n-1)/2
    density = len(edges)/total_possible_edges
    return density

def density_directed(nodes,edges):
    #Density 
    n = len(nodes)
    total_possible_edges = n*n
    density = len(edges)/total_possible_edges
    return density

In [4]:

def graph_ud_w(nodes,weighted_edges):
    g = nx.Graph()
    for i in weighted_edges:
        g.add_edge(i[0], i[1], weight=i[2]) 
    return g


In [5]:
def node_strengths_undirected(nodes, weighted_edges):
    node_strength = {}
    total_weight = 0
    for node in nodes:
        count_strength = 0
        for edge in weighted_edges:
            if edge[0] == node or edge[1] == node:
                count_strength+=edge[2]
        total_weight+=count_strength
        node_strength[node] = count_strength
    return {k:v/total_weight for k,v in  node_strength.items()}

def node_strengths_directed_out(nodes,weighted_edges):
    node_strength = {}
    total_weight = 0
    for node in nodes:
        count_strength = 0
        for edge in weighted_edges:
            if edge[0] == node:
                count_strength+= edge[2]
        total_weight+=count_strength
        node_strength[node] = count_strength
    return {k:round(v/total_weight,3) for k,v in  node_strength.items()}
    
    
def node_strengths_directed_in(nodes,weighted_edges):
    node_strength = {}
    total_weight = 0
    for node in nodes:
        count_strength = 0
        for edge in weighted_edges:
            if edge[1] == node:
                count_strength+=edge[2]
        total_weight+=count_strength
        node_strength[node] = count_strength
    return {k:round(v/total_weight,3) for k,v in  node_strength.items()}

def exports_imports_degree(nodes, edges):
    export_rate = []
    import_rate = []
    degree = []
    for node in nodes:
        count_exp =0
        count_imp = 0
        count_degree = 0
        for edge in edges:
            if edge[0]==node:
                count_exp+=1
            if edge[1] ==node:
                count_imp+=1
            if edge[0] == node or edge[1] ==node:
                count_degree+=1
        export_rate.append(count_exp)
        import_rate.append(count_imp)
        degree.append(count_degree)
    return export_rate,import_rate,degree

def avg_degree(nodes,edges):
    degree_avg = 0
    for node in nodes:
        count_degree = 0
        for edge in edges:
            if edge[0] == node or edge[1] ==node:
                count_degree+=1
        degree_avg += count_degree
    return degree_avg/len(nodes)

def avg_in_out_degree(nodes,edges):
    degree_in =0
    degree_out =0
    
    for node in nodes:
        count_in = 0
        count_out = 0
        for edge in edges:
            if edge[0]==node:
                count_out+=1
            if edge[1] ==node:
                count_in+=1
        degree_in+=count_in
        degree_out+=count_out
   
    return (degree_in/len(nodes)),(degree_out/len(nodes))
        
        
def cdf(freq):
    
    prob = {}
    
    for i in freq:
        sum_ = 0
        for j in freq:
            if(j >= i):
                sum_+=freq[j]
        prob[i] = sum_
        
    return prob

def plot_cdf(prob):
    data = []
    for key, value in prob.items():
        data.append((key,value))
    data = sorted(data, key=lambda x: x[0])


    x = []
    y = []
    for a,b in data:
        x.append(a)
        y.append(b)
        
    return x,y

In [6]:
#DataFrame Trade
df_trade = pd.read_excel('Countries_Flows.xlsx')
df_trade['importer1'] = df_trade['importer1'].str.strip()
df_trade['importer2'] = df_trade['importer2'].str.strip()

In [7]:
#Years of data in trade data
years = np.unique(df_trade['year']).tolist()
years = years[50:]

In [9]:
#Calculate reciprocity over the years 1920-2014
reci = {}
for year in years:
    nodes,labels,edges,weighted_edges = data_directed_weighted(df_trade,year)
    G = graph_d_w(nodes,weighted_edges)

    reci[year] = nx.overall_reciprocity(G)

print(reci)

In [None]:
#Calculate average degree over teh years 1920-2014
degree_avg_dict = {}
for year in years:
    nodes,labels,edges,weighted_edges = data_undirected_weighted(df_trade,year)
    degree_avg = avg_degree(nodes,edges)
    degree_avg_dict[year] = degree_avg
print(degree_avg_dict)


In [None]:
#Evolution of trade network (edges), Preferential attachment like mechanism
new_nodes_count = []
new_edges_count = []
new_nodes = []
new_edges = []
edges_from_new_nodes = []
edges_from_old_nodes = []
source = []
dest = []

nodes_ud_uw_prev,edges_ud_uw_prev = data_undirected_unweighted(df_trade,1920)
print(len(edges_ud_uw_prev))

for year in years[1:]:
    nodes_ud_uw,edges_ud_uw = data_undirected_unweighted(df_trade,year)
    nodes_diff = list(set(nodes_ud_uw)-set(nodes_ud_uw_prev))
    
    new_nodes_count.append(len(nodes_diff))
    new_nodes.append(nodes_diff)
    nodes_ud_uw_prev = nodes_ud_uw
    
    edges_diff = []
    for edge in edges_ud_uw:
        if edge not in edges_ud_uw_prev:
            edges_diff.append(edge)
    
    for edge in edges_diff:
        deg_n0 = cal_degree(edge[0],edges_ud_uw)
        deg_n1 = cal_degree(edge[1],edges_ud_uw)
        source.append(min(deg_n0,deg_n1))
        dest.append(max(deg_n0,deg_n1))
        
    new_edges.append(edges_diff)
    edge_new_nodes = []
    for edge in edges_ud_uw:
        if edge[0] in nodes_diff or edge[1] in nodes_diff:
            edge_new_nodes.append(edge)

    edges_from_new_nodes.append(len(edge_new_nodes))
    edges_from_old_nodes.append(len(edges_diff)-len(edge_new_nodes))    
    new_edges_count.append(len(edges_diff))
    edges_ud_uw_prev = edges_ud_uw
    

In [None]:
source = Counter(source)
source = dict(sorted(source.items(), key=operator.itemgetter(0),reverse=False))
dest = Counter(dest)
dest = dict(sorted(dest.items(), key=operator.itemgetter(0),reverse=False))

In [None]:
plt.plot(list(source.keys()),list(source.values()),color ='g', label='Source nodes')
plt.plot(list(dest.keys()),list(dest.values()),color ='b',label ='Destination nodes')
plt.xlabel('Degree')
plt.ylabel('Frequency')
plt.legend()
plt.savefig('PA.png')
plt.show()

In [None]:
plt.xlabel("year")
plt.ylabel("Distribution of new edges")
plt.plot(years[1:],new_nodes_count,'-',label='Total new nodes')
plt.plot(years[1:],new_edges_count,'-',label='Total new edges')
plt.plot(years[1:],edges_from_old_nodes,'-',label='Edges from existing nodes')
plt.plot(years[1:],edges_from_new_nodes,'-',label='Edges from new nodes')
plt.legend()
plt.savefig('distribution_edges.png')

In [None]:
#Degree Centrality UnDirected Weighted
dict_count_ = {}
years_ = [2005]
for year in years_:
    print(year)
    nodes_ud_w,edges_ud_w,weighted_edges_ud_w = data_undirected_weighted(df_trade,2005) 
    for i in weighted_edges_ud_w:
        i[2] = i[2]/max(map(lambda x: x[2], weighted_edges_ud_w))
    node_strength_undirected = node_strengths_undirected(nodes_ud_w,weighted_edges_ud_w)
    node_strength_undirected = dict(sorted(node_strength_undirected .items(), key=operator.itemgetter(1),reverse=True))
    print(node_strength_undirected)
    for k,v in node_strength_undirected.items():
        if k in dict_count_:
            dict_count_[k]+=1
        else:
            dict_count_[k]=1    

In [None]:
dict_count = dict(sorted(dict_count.items(), key=operator.itemgetter(1),reverse=True)

In [None]:
dict_count = dict(itertools.islice(dict_count.items(), 10))
fig, ax = plt.subplots()
ax.bar(list(dict_count.keys()),dict_count.values())
ax.set_xticklabels(list(dict_count.keys()))
ax.set_xticklabels(ax.get_xticklabels(), rotation=40, ha="right")
plt.tight_layout()
plt.title('Degree Centrality')
plt.savefig('node_strength.png')
plt.show()

In [None]:
#Eigenvector centrality UnDirected weighted
dict_count_e_ = {}
for year in years_:
    if year==1942 :
        continue
    print(year)
    nodes_ud_w,edges_ud_w,weighted_edges_ud_w = data_undirected_weighted(df_trade,year)
    for i in weighted_edges_ud_w:
        i[2] = i[2]/max(map(lambda x: x[2], weighted_edges_ud_w))

    g_ud_w = graph_ud_w(nodes_ud_w,weighted_edges_ud_w)
    eigenvector_centrality_w = {k:round(v,3) for k,v in nx.eigenvector_centrality(g_ud_w,weight='weight').items()}
    eigenvector_centrality_w = dict( sorted(eigenvector_centrality_w.items(), key=operator.itemgetter(1),reverse=True))

    print(eigenvector_centrality_w)
    for k,v in eigenvector_centrality_w.items():
        if k in dict_count_e_:
            dict_count_e_[k]+=1
        else:
            dict_count_e_[k]=1


In [None]:
dict_count_e = dict(sorted(dict_count_e.items(), key=operator.itemgetter(1),reverse=True))
dict_count_e = dict(itertools.islice(dict_count_e.items(), 10))

fig, ax = plt.subplots()
ax.bar(list(dict_count_e.keys()),dict_count_e.values())
ax.set_xticklabels(list(dict_count_e.keys()))
ax.set_xticklabels(ax.get_xticklabels(), rotation=40, ha="right")
plt.tight_layout()
plt.title('Eigenvector Centrality')
plt.savefig('eigen.png')
plt.show()

In [None]:
#Closeness Centrality UnDirected weighted
dict_count_c_ = {}
for year in years_:
    print(year)
    nodes_ud_w,edges_ud_w,weighted_edges_ud_w = data_undirected_weighted(df_trade,year)

    for i in weighted_edges_ud_w:
        i[2] = i[2]/max(map(lambda x: x[2], weighted_edges_ud_w))
        i[2] = 1/i[2]

    g_ud_w = graph_ud_w(nodes_ud_w,weighted_edges_ud_w)

    closeness_centrality_ud_w = {k:v for k,v in nx.closeness_centrality(g_ud_w,distance = 'weight',wf_improved=True).items()}
    closeness_centrality_ud_w = dict(sorted(closeness_centrality_ud_w.items(), key=operator.itemgetter(1),reverse=True))
    for k,v in closeness_centrality_ud_w.items():
        if k in dict_count_c_:
            dict_count_c_[k]+=1
        else:
            dict_count_c_[k]=1

In [None]:
dict_count_c = dict(sorted(dict_count_c.items(), key=operator.itemgetter(1),reverse=True))
dict_count_c = dict(itertools.islice(dict_count_c.items(), 10))

fig, ax = plt.subplots()
ax.bar(list(dict_count_c.keys()),dict_count_c.values())
ax.set_xticklabels(list(dict_count_c.keys()))
ax.set_xticklabels(ax.get_xticklabels(), rotation=40, ha="right")
plt.tight_layout()
plt.title('Closeness Centrality')
plt.savefig('closeness.png')
plt.show()