### Webscraping
------------------------

In [1]:
import requests
from bs4 import BeautifulSoup
import json

result = requests.get('https://github.com/factbook/factbook.json')

content = result.content

bsoup = BeautifulSoup(content, 'html5lib')

tds = bsoup.find_all('td', class_='content')

continents = []
for td in tds[1:14]:
    continents.append(td.text.split('\n')[1].split(' ')[-1])

code_to_continent = {}

for continent in continents: 
    if continent != 'meta':
        url = 'https://github.com/factbook/factbook.json/tree/master/{c}'.format(c=continent)
        countries_page = requests.get(url)
        content = countries_page.content
        bsoup = BeautifulSoup(content, 'html5lib')
        tds = bsoup.find_all('td', class_='content')
        for td in tds[1:]:
            code_to_continent[td.text.split('\n')[1].split(' ')[-1].split('.')[0]] = continent

In [2]:
class Country:
    def __init__(self, name, code):
        self.name = name.lower()
        self.code = code
        self.exporting_partners = {}
    def add_partner(self, p, v):
        self.exporting_partners[p] = v

In [3]:
idx = 0

countries = {}

for code in code_to_continent:
    print(code)
    url = 'https://raw.githubusercontent.com/factbook/factbook.json/master/{continent}/{code}.json'.format(continent=code_to_continent[code], code=code)
    print(url)
    json = requests.get(url).json()
    if 'Country name' in json['Government']:
        if 'conventional short form' in json['Government']['Country name']:
            name = json['Government']['Country name']['conventional short form']['text']
            if 'Exports - partners' in json['Economy']:
                partners = json['Economy']['Exports - partners']['text'].split(',')
                #print(partners)
                country = Country(name, code)
                for partner in partners:
                    p = partner.split()
                    if p[-1][0] == '(':
                        country.add_partner(' '.join(p[0:-2]).lower(), float(p[-2].split('%')[0]))
                    elif p[-1][0] == 'e':
                        country.add_partner(' '.join(p[0:-3]).lower(), float(p[-3].split('%')[0]))  
                        #print(p)
                    else:
                        country.add_partner(' '.join(p[0:-1]).lower(), float(p[-1].split('%')[0]))
                countries[code] = country

ag
https://raw.githubusercontent.com/factbook/factbook.json/master/africa/ag.json
ao
https://raw.githubusercontent.com/factbook/factbook.json/master/africa/ao.json
bc
https://raw.githubusercontent.com/factbook/factbook.json/master/africa/bc.json
bn
https://raw.githubusercontent.com/factbook/factbook.json/master/africa/bn.json
by
https://raw.githubusercontent.com/factbook/factbook.json/master/africa/by.json
cd
https://raw.githubusercontent.com/factbook/factbook.json/master/africa/cd.json
cf
https://raw.githubusercontent.com/factbook/factbook.json/master/africa/cf.json
cg
https://raw.githubusercontent.com/factbook/factbook.json/master/africa/cg.json
cm
https://raw.githubusercontent.com/factbook/factbook.json/master/africa/cm.json
cn
https://raw.githubusercontent.com/factbook/factbook.json/master/africa/cn.json
ct
https://raw.githubusercontent.com/factbook/factbook.json/master/africa/ct.json
cv
https://raw.githubusercontent.com/factbook/factbook.json/master/africa/cv.json
dj
https://raw.g

bh
https://raw.githubusercontent.com/factbook/factbook.json/master/central-america-n-caribbean/bh.json
bq
https://raw.githubusercontent.com/factbook/factbook.json/master/central-america-n-caribbean/bq.json
cc
https://raw.githubusercontent.com/factbook/factbook.json/master/central-america-n-caribbean/cc.json
cj
https://raw.githubusercontent.com/factbook/factbook.json/master/central-america-n-caribbean/cj.json
cs
https://raw.githubusercontent.com/factbook/factbook.json/master/central-america-n-caribbean/cs.json
cu
https://raw.githubusercontent.com/factbook/factbook.json/master/central-america-n-caribbean/cu.json
do
https://raw.githubusercontent.com/factbook/factbook.json/master/central-america-n-caribbean/do.json
dr
https://raw.githubusercontent.com/factbook/factbook.json/master/central-america-n-caribbean/dr.json
es
https://raw.githubusercontent.com/factbook/factbook.json/master/central-america-n-caribbean/es.json
gj
https://raw.githubusercontent.com/factbook/factbook.json/master/centra

ls
https://raw.githubusercontent.com/factbook/factbook.json/master/europe/ls.json
lu
https://raw.githubusercontent.com/factbook/factbook.json/master/europe/lu.json
md
https://raw.githubusercontent.com/factbook/factbook.json/master/europe/md.json
mj
https://raw.githubusercontent.com/factbook/factbook.json/master/europe/mj.json
mk
https://raw.githubusercontent.com/factbook/factbook.json/master/europe/mk.json
mn
https://raw.githubusercontent.com/factbook/factbook.json/master/europe/mn.json
mt
https://raw.githubusercontent.com/factbook/factbook.json/master/europe/mt.json
nl
https://raw.githubusercontent.com/factbook/factbook.json/master/europe/nl.json
no
https://raw.githubusercontent.com/factbook/factbook.json/master/europe/no.json
pl
https://raw.githubusercontent.com/factbook/factbook.json/master/europe/pl.json
po
https://raw.githubusercontent.com/factbook/factbook.json/master/europe/po.json
ri
https://raw.githubusercontent.com/factbook/factbook.json/master/europe/ri.json
ro
https://raw.g

In [56]:
import pandas as pd
import networkx as nx
import matplotlib as plt
G = nx.DiGraph()

for country in countries:
    for partner in countries[country].exporting_partners: 
        G.add_edge(countries[country].name, partner, weight=countries[country].exporting_partners[partner]/100)

In [57]:
degrees = nx.degree(G)

ds = {}
for name, d in degrees: 
    ds[name] = d

nx.set_node_attributes(G, ds, 'degree')

In [58]:
from networkx.readwrite import json_graph
data = json_graph.node_link_data(G)

In [59]:
import json
with open('graph.json', 'w') as fp:
    json.dump(data, fp)

In [60]:
data

{'directed': True,
 'graph': {},
 'links': [{'source': 'algeria', 'target': 'spain', 'weight': 0.188},
  {'source': 'algeria', 'target': 'france', 'weight': 0.11199999999999999},
  {'source': 'algeria', 'target': 'us', 'weight': 0.08800000000000001},
  {'source': 'algeria', 'target': 'italy', 'weight': 0.087},
  {'source': 'algeria', 'target': 'uk', 'weight': 0.071},
  {'source': 'algeria', 'target': 'brazil', 'weight': 0.052000000000000005},
  {'source': 'algeria', 'target': 'tunisia', 'weight': 0.049},
  {'source': 'algeria', 'target': 'germany', 'weight': 0.045},
  {'source': 'spain', 'target': 'france', 'weight': 0.157},
  {'source': 'spain', 'target': 'germany', 'weight': 0.11},
  {'source': 'spain', 'target': 'italy', 'weight': 0.07400000000000001},
  {'source': 'spain', 'target': 'uk', 'weight': 0.07400000000000001},
  {'source': 'spain', 'target': 'portugal', 'weight': 0.071},
  {'source': 'spain', 'target': 'us', 'weight': 0.045},
  {'source': 'france', 'target': 'germany', 'w