In [1]:
import pandas as pd
import numpy as np
import os
import networkx as nx
import operator
import matplotlib.pyplot as plt
import community
%matplotlib inline  
import plotly
from plotly.graph_objs import *
import plotly.plotly as py
import math


In [2]:
import copy
from bokeh.io import output_notebook
output_notebook()


In [3]:
def loadTable(directory, country_code):
    path = directory+'TIV-Import-'+country_code+'-1950-2015.csv'
    df = pd.read_csv(path)
    
    #extract current country
    to_country = df.columns.values.tolist()[0].split('TIV of arms exports to ')[1].split(', 1950-2015')[0]
    
    #downsize
    df = df.ix[9:]
    
    #get rid of column
    df = df.drop(df.columns[[0]],axis =1)
    df.columns = df.iloc[0]
    df.columns.values[0] = 'country'
    df = df.set_index((df['country']))
    df = df.drop(df.columns[0], axis=1)
    
    #take the data less the header row
    df = df[1:] 
    df.index.name = None #credit to ulynn
    df.columns.name = None
    
    # Format of the keys should be YEAR - COUNTRYFROM - COUNTRYTO --> Faster query over the years

    df.columns = df.columns.astype('str').str.replace('\.0','')
        
    df.fillna(0, inplace=True)
    try:
        df.drop(df.loc['Unknown country'].name,inplace=True)
    except:
        ""
        
    #last cleansing
    df.drop(df.index[[-1,-2]],inplace=True)
    df.drop(df.columns[-1], axis=1,inplace=True)

    return df, to_country

In [4]:
#FORMAT
#[YEARS][FROM][TO] = MONEY
def convertTableToDict(df, onecountrydict, countryTo):
    years = list(df.columns.values)
    countries = list(df.index)
    
    for year in years:
        onecountrydict.setdefault(year, dict())
        onecountrydict[year].setdefault(countryTo, dict())
        
        for country in countries:
            value = df.get_value(country, year)
            onecountrydict[year][countryTo].setdefault(country, value)
            
    return onecountrydict

In [5]:
#delete empty cells from dict

def clean_empty(d):
    if not isinstance(d, (dict, list)): #dictionairy or list
        return d
    if isinstance(d, list): 
        return [v for v in (clean_empty(v) for v in d) if v] #list comprehension
    return {k: v for k, v in ((k, clean_empty(v)) for k, v in d.items()) if v}


In [6]:
countryImportDict = dict()
PATH = '/Users/hai/Devproj/weaponstradeanalysis/data/'

countryCodeMap = dict()

for f in os.listdir(PATH):
    if not f.startswith('.'):
        if "country_codes.csv" not in f:
            countryCode = f.replace('TIV-Import-',"").replace('-1950-2015.csv', "")
            df,to_country = loadTable(PATH, countryCode)
            countryImportDict = convertTableToDict(df, countryImportDict, to_country)
            countryCodeMap.setdefault(to_country, countryCode)

countryImportDict = clean_empty(countryImportDict)

MultiDiDict = dict()
for year in countryImportDict:
    MultiDiDict.setdefault(year, dict())
    for countryImport in countryImportDict[year]:
        for countryExport in countryImportDict[year][countryImport]:
            MultiDiDict[year].setdefault(countryExport, dict())
            MultiDiDict[year][countryExport].setdefault(countryImport, 0)
            MultiDiDict[year][countryExport][countryImport] += countryImportDict[year][countryImport][countryExport]

In [7]:
df = pd.DataFrame.from_dict(countryCodeMap, orient="index")
df.sort_index(inplace=True)
df.to_csv('countrymap.csv')


In [8]:
militaryexpdf = pd.read_excel('/Users/hai/Devproj/weaponstradeanalysis/newdata/SIPRI extended milex database beta/Constant USD Beta.xlsx')
militaryexpdf = militaryexpdf.iloc[2:175, 2:]
militaryexpdf.columns = militaryexpdf.iloc[0]
militaryexpdf.columns.values[0] = 'Country'
militaryexpdf = militaryexpdf.set_index((militaryexpdf['Country']))
militaryexpdf = militaryexpdf.drop(militaryexpdf.columns[0], axis=1)
militaryexpdf = militaryexpdf[1:] 
militaryexpdf.index.name = None #credit to ulynn
militaryexpdf.columns.name = None
militaryexpdf.drop(['Montenegro'], inplace = True)


namecorrectionmapping = pd.read_csv('/Users/hai/Devproj/weaponstradeanalysis/countrymapping.csv', delimiter = ';', header = None)
namecorrectionmapping.columns = ['countrycode','newname', 'oldname']

namecorrectionmapping.set_index((namecorrectionmapping['oldname']), inplace = True)
namecorrectionmapping.drop(namecorrectionmapping.columns[[0,2]], axis=1, inplace = True)
namecorrectionmapping.index.name = None
namemappingdict = namecorrectionmapping.to_dict()


from decimal import Decimal


def convertmilitaryexpTableToDict(df,namemapping):
    years = list(df.columns.values)
    countries = list(df.index)
    militaryexpdict = dict()
    for year in years:
        militaryexpdict.setdefault(str(year), dict())
        for country in countries:
            value = df.get_value(country, year)
            if isinstance(value,float):
                value = round(Decimal(value * 0.5520917815626),1) #inflation correction from constant 2014 to constant 1990
            militaryexpdict[str(year)].setdefault((namemapping['newname'][country]), value)
    return militaryexpdict

militaryexpdict = convertmilitaryexpTableToDict(militaryexpdf, namemappingdict)

In [9]:
excludedentities = pd.read_csv('excludedcountries.csv', delimiter=';', header = None)
excludedentities.columns = ['Name','Code']
excludedentities.set_index((excludedentities['Name']), inplace = True)
excludedentities.drop(excludedentities.columns[[0]], axis=1, inplace = True)
excludedentities.index.name = None
excludedentities = excludedentities.index


In [10]:
def clean_noncountries(d):
    if not isinstance(d, (dict)): #dictionairy or list
        return d
    return {k: v for k, v in ((k, clean_noncountries(v)) for k, v in d.items()) if k not in excludedentities}

In [11]:
def createNeighbourGraph(G, node):
    
    newGraph = nx.Graph()
    
    for edge in G.edges(data=True):
        if edge[0] == node or edge[1] == node:
            newGraph.add_edge(edge[0],edge[1],edge[2])
    
    return newGraph

In [12]:
# most fucked up function

def addUpDict(MasterDict):

    SomethingsOverwrittenDict = copy.deepcopy(MasterDict)

    AddedUpDict = dict()

    for year in SomethingsOverwrittenDict:
        AddedUpDict.setdefault(year, dict())
        for countryExport in SomethingsOverwrittenDict[year]:
            for countryImport in SomethingsOverwrittenDict[year][countryExport]:
                AddedUpDict[year].setdefault(countryExport, dict())
                AddedUpDict[year][countryExport].setdefault(countryImport, 0)
                Richtung = SomethingsOverwrittenDict[year][countryExport][countryImport]
                #andere richtung verfügbar?
                if countryImport in SomethingsOverwrittenDict[year]:
                    if countryExport in SomethingsOverwrittenDict[year][countryImport]:
                        andereRichtung = SomethingsOverwrittenDict[year][countryImport][countryExport]
                    else: andereRichtung = 0
                else: andereRichtung = 0
                
                #Transmitting stuff into the AddedUpDict
                if (Richtung + andereRichtung) != 0:
                    AddedUpDict[year][countryExport][countryImport] = Richtung + andereRichtung

                #"Clearing the MasterDict"
                SomethingsOverwrittenDict[year][countryExport][countryImport] = 0

                if countryImport in SomethingsOverwrittenDict[year]:
                    if countryExport in SomethingsOverwrittenDict[year][countryImport]:
                        SomethingsOverwrittenDict[year][countryImport][countryExport] = 0
    
    return AddedUpDict

In [13]:
def createYearGraph(AddedUpDict, year, militaryexpdict):
    
    G = nx.Graph()    
    for countryExport in AddedUpDict[year]:
        for countryImport in AddedUpDict[year][countryExport]:
            if AddedUpDict[year][countryExport][countryImport] != 0:
                G.add_weighted_edges_from([(countryExport,countryImport,AddedUpDict[year][countryExport][countryImport])])
    
    for node in G.nodes_iter():
        if node in militaryexpdict[year]:
            G.node[node]['military expenditure'] = militaryexpdict[year][node]
        else:
            G.node[node]['military expenditure'] = 'no data'
    
    return G

In [14]:
def createYearMultiDiGraph(AddedUpDict, year,militaryexpdict):
    
    G = nx.MultiDiGraph()    
    for countryExport in AddedUpDict[year]:
        for countryImport in AddedUpDict[year][countryExport]:
            if AddedUpDict[year][countryExport][countryImport] != 0:
                G.add_weighted_edges_from([(countryExport,countryImport,AddedUpDict[year][countryExport][countryImport])])
    
    for node in G.nodes_iter():
        if node in militaryexpdict[year]:
            G.node[node]['military expenditure'] = militaryexpdict[year][node]
        else:
            G.node[node]['military expenditure'] = 'no data'
    
    return G

# Setup Dicts

In [15]:
CompleteDict = copy.deepcopy(MultiDiDict)

MultiDiDict = clean_empty(clean_noncountries(MultiDiDict))
CompleteDict = clean_empty(clean_noncountries(CompleteDict))
AddedUpDict = clean_empty(clean_noncountries(addUpDict(MultiDiDict)))

multigraph = createYearMultiDiGraph(MultiDiDict,'1960',militaryexpdict)
mastergraph = createYearGraph(AddedUpDict, '1960', militaryexpdict)

In [16]:
colormap = dict([(0, 'rgb(91,147,204)'), (1, 'rgb(255,46,43)'),(2, 'rgb(145,58,31)'), (3, 'rgb(151,219,79)'), (4, 'rgb(41,255,25)'), (5, 'rgb(255,104,0)'), (6, 'rgb(44,20,204)'), (7, 'rgb(178,84,18)')])

In [17]:
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource,HoverTool
from bokeh.io import curdoc
from bokeh.layouts import row, widgetbox
from bokeh.models.widgets import Slider, TextInput

In [18]:
def influenceOverTimeAll(countrydict,militaryexpdict):
    influenceAll = dict()
    for year in countrydict:
        G = createYearGraph(countrydict, year,militaryexpdict)
        for entry in nx.betweenness_centrality(G).items():
            if entry[0] in influenceAll:
                influenceAll[entry[0]][year] = entry[1]
            else:
                influenceAll.setdefault(entry[0], dict())
                influenceAll[entry[0]][year] = entry[1]
    return influenceAll

In [19]:
def generateTimelinesPlot(influencedict, countries, name):
    from bokeh.palettes import Spectral10
    
    plot = figure(plot_width=600, plot_height=400, tools=['pan','tap','box_zoom','reset','wheel_zoom','save'])

    d = dict(x=[], y=[],country=[], color=[])
    color = copy.copy(Spectral10)
    for country in countries:
        countrycolor = color[0]
        color.remove(countrycolor)
        line = dict(xe=[], ye=[])
        for entry in sorted(influencedict[country].items(), key=operator.itemgetter(0)):
            d['x'].append(entry[0])
            d['y'].append(entry[1])
            d['country'].append(country)
            d['color'].append(countrycolor)
            line['xe'].append(entry[0])
            line['ye'].append(entry[1])
                    
        plot.line(line['xe'], line['ye'],legend=country, color=countrycolor)
    
    s1 = plot.scatter('x', 'y', line_width=1, color = 'color', source=ColumnDataSource(d))
    plot.add_tools(HoverTool(renderers=[s1], tooltips=[('year','@x'),('country','@country'),(name,'@y')]))
    
    return plot

### Definition

In [20]:
def WeaponFlowTimeAll(MultiDiDict,militaryexpdict):
    weaponinflow = dict()
    weaponoutflow = dict()
    for year in MultiDiDict:
        G = createYearMultiDiGraph(MultiDiDict, year,militaryexpdict)
        for country in G.nodes_iter():
            inflow = 0
            for x in G.in_edges_iter(nbunch=country, data=True):
                inflow = inflow + x[2].values()[0]        
            if country in weaponinflow:
                weaponinflow[country][year] = inflow
            else:
                weaponinflow.setdefault(country, dict())
                weaponinflow[country][year] = inflow
                
            outflow = 0
            for x in G.out_edges_iter(nbunch=country, data=True):
                outflow = outflow + x[2].values()[0]        
            if country in weaponoutflow:
                weaponoutflow[country][year] = outflow
            else:
                weaponoutflow.setdefault(country, dict())
                weaponoutflow[country][year] = outflow
    
    return weaponinflow, weaponoutflow


In [21]:
weaponinflow, weaponoutflow = WeaponFlowTimeAll(MultiDiDict,militaryexpdict)

## New Visualization

In [22]:
layout = nx.spring_layout(mastergraph)

In [23]:
def get_nodes_specs(_network, _layout):
    d = dict(x=[], y=[], color=[], country=[], size=[], totaltrade =[], 
             cluster=[], alpha=[], largestpartner=[], military_expenditure=[])
    
    partition = community.best_partition(_network)
    
    for node in _network.nodes(data = True):
        totaltrade = 0
        largest_trade_partner_trade = 0
        largest_trade_partner_name = ''  
        
        for edge in _network.edges(data=True):
                if edge[0] == node[0] or edge[1] == node[0]:
                    totaltrade += edge[2].values()[0]
                    if edge[2].values()[0] >= largest_trade_partner_trade:
                        largest_trade_partner_trade = edge[2].values()[0]
                        if edge[0] == node[0]:
                            largest_trade_partner_name = edge[1]
                        else:
                            largest_trade_partner_name = edge[0] 

        d['x'].append(_layout[node[0]][0])
        d['y'].append(_layout[node[0]][1])
        d['color'].append(colormap[partition[node[0]]])
        d['country'].append(node[0])
        
        if isinstance(node[1].values()[0],float) and not math.isnan(node[1].values()[0]):
            d['size'].append((np.log(node[1].values()[0]))*0.8)
            d['alpha'].append(0.6)
        else:
            d['size'].append(np.log(totaltrade))
            d['alpha'].append(0.2)
        
        d['totaltrade'].append(totaltrade)
        d['cluster'].append(partition[node[0]])
        
        d['largestpartner'].append(largest_trade_partner_name)
        d['military_expenditure'].append(node[1].values()[0])
        
    return d

In [24]:
def get_edges_specs(_network, _layout):
    d = dict(xs=[], ys=[], alphas=[], width=[])
    weights = []
    for u, v, data in _network.edges(data=True):
        weights.append(data)
    max_weight = max(weights).values()
    calc_alpha = lambda h: 0.1 + 0.6 * (h / max_weight)
    for u, v, data in _network.edges(data=True):
        d['xs'].append([_layout[u][0], _layout[v][0]])
        d['ys'].append([_layout[u][1], _layout[v][1]])
        d['alphas'].append(calc_alpha(data['weight']))
        d['width'].append(np.log(data.values()[0]))
    
    return d

In [28]:
year = '1990'

#pregenerate all graphs

def AllGraphs(countrydict, militaryexpdict):
    AllGraphsDict = dict()
    for year in countrydict:
        G = createYearGraph(countrydict, year, militaryexpdict)
        layout = nx.spring_layout(G)
        
        AllGraphsDict[year] = [G,layout]
        
    return AllGraphsDict

AllGraphs = AllGraphs(AddedUpDict, militaryexpdict)


In [29]:
for year in AddedUpDict:
    nodes = dict()
    edges = dict()
    nodes[year] = ColumnDataSource(get_nodes_specs(AllGraphs[year][0],AllGraphs[year][1]))
    edges[year] = ColumnDataSource(get_edges_specs(AllGraphs[year][0],AllGraphs[year][1]))

    
    
    

hover = HoverTool(tooltips=[('country','@country'),
                            ('cluster','@cluster'),
                            ('military expenditure','@military_expenditure'),
                            ('largest tradepartner', '@largestpartner'),
                            #('imports from', '@import'),
                            #('exports from', '@export')
                           ])
    
plot = figure(plot_width=800, plot_height=600, tools=['pan','tap',hover,'box_zoom','reset','wheel_zoom','save'])
    
r_circles = plot.circle('x','y', source=nodes, size='size', color='color', level='overlay',alpha='alpha')
r_lines = plot.multi_line('xs', 'ys', source=edges, line_width='width', alpha='alphas', color='navy')

In [None]:
#widget
year = Slider(title="year", value=1950, start=1950, end=2015, step = 1)

In [None]:
#callback

In [27]:
show(plot)