### Webscraping
------------------------

In [1]:
import requests
from bs4 import BeautifulSoup
import json

result = requests.get('https://github.com/factbook/factbook.json')

content = result.content

bsoup = BeautifulSoup(content, 'html5lib')

tds = bsoup.find_all('td', class_='content')

continents = []
for td in tds[1:14]:
    continents.append(td.text.split('\n')[1].split(' ')[-1])

code_to_continent = {}

for continent in continents: 
    if continent != 'meta':
        url = 'https://github.com/factbook/factbook.json/tree/master/{c}'.format(c=continent)
        countries_page = requests.get(url)
        content = countries_page.content
        bsoup = BeautifulSoup(content, 'html5lib')
        tds = bsoup.find_all('td', class_='content')
        for td in tds[1:]:
            code_to_continent[td.text.split('\n')[1].split(' ')[-1].split('.')[0]] = continent

In [2]:
class Country:
    def __init__(self, name, code):
        self.name = name.lower()
        self.code = code
        self.exporting_partners = {}
    def add_partner(self, p, v):
        self.exporting_partners[p] = v

In [3]:
idx = 0

countries = {}

for code in code_to_continent:
    print(code)
    url = 'https://raw.githubusercontent.com/factbook/factbook.json/master/{continent}/{code}.json'.format(continent=code_to_continent[code], code=code)
    print(url)
    json = requests.get(url).json()
    if 'Country name' in json['Government']:
        if 'conventional short form' in json['Government']['Country name']:
            name = json['Government']['Country name']['conventional short form']['text']
            if 'Exports - partners' in json['Economy']:
                partners = json['Economy']['Exports - partners']['text'].split(',')
                #print(partners)
                country = Country(name, code)
                for partner in partners:
                    p = partner.split()
                    if p[-1][0] == '(':
                        country.add_partner(' '.join(p[0:-2]).lower(), float(p[-2].split('%')[0]))
                    elif p[-1][0] == 'e':
                        country.add_partner(' '.join(p[0:-3]).lower(), float(p[-3].split('%')[0]))  
                        #print(p)
                    else:
                        country.add_partner(' '.join(p[0:-1]).lower(), float(p[-1].split('%')[0]))
                countries[code] = country

ag
https://raw.githubusercontent.com/factbook/factbook.json/master/africa/ag.json
ao
https://raw.githubusercontent.com/factbook/factbook.json/master/africa/ao.json
bc
https://raw.githubusercontent.com/factbook/factbook.json/master/africa/bc.json
bn
https://raw.githubusercontent.com/factbook/factbook.json/master/africa/bn.json
by
https://raw.githubusercontent.com/factbook/factbook.json/master/africa/by.json
cd
https://raw.githubusercontent.com/factbook/factbook.json/master/africa/cd.json
cf
https://raw.githubusercontent.com/factbook/factbook.json/master/africa/cf.json
cg
https://raw.githubusercontent.com/factbook/factbook.json/master/africa/cg.json
cm
https://raw.githubusercontent.com/factbook/factbook.json/master/africa/cm.json
cn
https://raw.githubusercontent.com/factbook/factbook.json/master/africa/cn.json
ct
https://raw.githubusercontent.com/factbook/factbook.json/master/africa/ct.json
cv
https://raw.githubusercontent.com/factbook/factbook.json/master/africa/cv.json
dj
https://raw.g

bh
https://raw.githubusercontent.com/factbook/factbook.json/master/central-america-n-caribbean/bh.json
bq
https://raw.githubusercontent.com/factbook/factbook.json/master/central-america-n-caribbean/bq.json
cc
https://raw.githubusercontent.com/factbook/factbook.json/master/central-america-n-caribbean/cc.json
cj
https://raw.githubusercontent.com/factbook/factbook.json/master/central-america-n-caribbean/cj.json
cs
https://raw.githubusercontent.com/factbook/factbook.json/master/central-america-n-caribbean/cs.json
cu
https://raw.githubusercontent.com/factbook/factbook.json/master/central-america-n-caribbean/cu.json
do
https://raw.githubusercontent.com/factbook/factbook.json/master/central-america-n-caribbean/do.json
dr
https://raw.githubusercontent.com/factbook/factbook.json/master/central-america-n-caribbean/dr.json
es
https://raw.githubusercontent.com/factbook/factbook.json/master/central-america-n-caribbean/es.json
gj
https://raw.githubusercontent.com/factbook/factbook.json/master/centra

ls
https://raw.githubusercontent.com/factbook/factbook.json/master/europe/ls.json
lu
https://raw.githubusercontent.com/factbook/factbook.json/master/europe/lu.json
md
https://raw.githubusercontent.com/factbook/factbook.json/master/europe/md.json
mj
https://raw.githubusercontent.com/factbook/factbook.json/master/europe/mj.json
mk
https://raw.githubusercontent.com/factbook/factbook.json/master/europe/mk.json
mn
https://raw.githubusercontent.com/factbook/factbook.json/master/europe/mn.json
mt
https://raw.githubusercontent.com/factbook/factbook.json/master/europe/mt.json
nl
https://raw.githubusercontent.com/factbook/factbook.json/master/europe/nl.json
no
https://raw.githubusercontent.com/factbook/factbook.json/master/europe/no.json
pl
https://raw.githubusercontent.com/factbook/factbook.json/master/europe/pl.json
po
https://raw.githubusercontent.com/factbook/factbook.json/master/europe/po.json
ri
https://raw.githubusercontent.com/factbook/factbook.json/master/europe/ri.json
ro
https://raw.g

In [4]:
import pandas as pd
import networkx as nx
import matplotlib as plt
G = nx.DiGraph()

for country in countries:
    for partner in countries[country].exporting_partners: 
        G.add_edge(countries[country].name, partner, weight=countries[country].exporting_partners[partner]/100)

In [5]:
from networkx.readwrite import json_graph
data = json_graph.node_link_data(G)

In [6]:
import json
with open('graph.json', 'w') as fp:
    json.dump(data, fp)

In [40]:
degrees = nx.degree(G)

ds = {}
for name, d in degrees: 
    ds[name] = d

nx.set_node_attributes(G, ds)

{'algeria': 0.0, 'spain': 0.020689727753186317, 'france': 0.012846427618835109, 'us': 0.0, 'italy': 0.013409068509673546, 'uk': 0.0, 'brazil': 0.002438463308028526, 'tunisia': 0.0004924722104966933, 'germany': 0.045759863374539064, 'angola': 0.010763253756077732, 'china': 0.02295494464834553, 'india': 0.00552647939379331, 'south africa': 0.010672175330258908, 'benin': 0.0010502994392117791, 'gabon': 0.0016672399147423772, 'niger': 4.375161966269325e-05, 'bangladesh': 0.0008871707689774383, 'nigeria': 0.005325938230117208, 'vietnam': 0.014758471364634302, 'burundi': 0.0, 'pakistan': 0.007227960286564478, 'democratic republic of the congo': 0.0, 'uganda': 0.00046885364529940303, 'sweden': 0.0031633522505198314, 'belgium': 0.0016792073989203577, 'rwanda': 0.0006967755912458319, 'chad': 0.0, 'japan': 0.021431861448746214, 'congo (brazzaville)': 0.0, 'portugal': 0.0149881851338162, 'drc': 0.0, 'zambia': 0.0001407063458561981, 'south korea': 0.014753465530868086, 'cameroon': 0.0, 'netherland

TypeError: 'int' object is not iterable

In [30]:
data = json_graph.node_link_data(G)
data['nodes']

[{'d': DiDegreeView({'algeria': 8, 'spain': 31, 'france': 57, 'us': 103, 'italy': 52, 'uk': 49, 'brazil': 13, 'tunisia': 6, 'germany': 72, 'angola': 7, 'china': 95, 'india': 53, 'south africa': 18, 'benin': 8, 'gabon': 9, 'niger': 5, 'bangladesh': 9, 'nigeria': 18, 'vietnam': 11, 'burundi': 9, 'pakistan': 12, 'democratic republic of the congo': 6, 'uganda': 10, 'sweden': 15, 'belgium': 26, 'rwanda': 10, 'chad': 4, 'japan': 44, 'congo (brazzaville)': 5, 'portugal': 10, 'drc': 4, 'zambia': 6, 'south korea': 26, 'cameroon': 8, 'netherlands': 41, 'comoros': 7, 'saudi arabia': 15, 'singapore': 16, 'mauritius': 11, 'none': 7, 'norway': 13, 'cabo verde': 2, 'australia': 16, 'djibouti': 4, 'somalia': 4, 'yemen': 8, 'uae': 18, 'egypt': 9, 'turkey': 21, 'equatorial guinea': 9, 'ethiopia': 6, 'switzerland': 27, 'the gambia': 4, 'trinidad and tobago': 12, 'ghana': 7, 'guinea': 7, 'ireland': 11, 'ukraine': 9, "cote d'ivoire": 10, 'burkina faso': 4, 'kenya': 8, 'tanzania': 6, 'liberia': 6, 'poland':