### Webscraping
------------------------

In [4]:
import requests
from bs4 import BeautifulSoup
import json

#Accessing the Webpage
result = requests.get('https://github.com/factbook/factbook.json')

#Getting the content of the webpage
content = result.content

#Beautiful-souping it
bsoup = BeautifulSoup(content, 'html5lib')

#Getting the table data cells from the page with a specific class - (Use Chrome Dev Tools)
tds = bsoup.find_all('td', class_='content')

#An array of the possible continents
continents = []
for td in tds[1:14]:
    continents.append(td.text.split('\n')[1].split(' ')[-1])

code_to_continent = {}

#Key-value pairs of country code to their respective continent
for continent in continents: 
    if continent != 'meta':
        url = 'https://github.com/factbook/factbook.json/tree/master/{c}'.format(c=continent)
        countries_page = requests.get(url)
        content = countries_page.content
        bsoup = BeautifulSoup(content, 'html5lib')
        tds = bsoup.find_all('td', class_='content')
        for td in tds[1:]:
            code_to_continent[td.text.split('\n')[1].split(' ')[-1].split('.')[0]] = continent

In [5]:
class Country:
    def __init__(self, name, code):
        self.name = name.lower()
        self.code = code
        self.exporting_partners = {}
    def add_partner(self, p, v):
        self.exporting_partners[p] = v

In [6]:
#Generate dictionary of possible names
names_to_code = {}

for code in code_to_continent:
    url = 'https://raw.githubusercontent.com/factbook/factbook.json/master/{continent}/{code}.json'.format(continent=code_to_continent[code], code=code)
    names_to_code[code] = code
    json = requests.get(url).json()
    if 'Country name' in json['Government']:
        if 'conventional short form' in json['Government']['Country name']:
            names_to_code[json['Government']['Country name']['conventional short form']['text'].lower()] = code
        if 'conventional long form' in json['Government']['Country name']:
            names_to_code[json['Government']['Country name']['conventional long form']['text'].lower()] = code
            

In [7]:
countries = {}

#For all of the country codes
for code in code_to_continent:
    #Get the JSON Data
    url = 'https://raw.githubusercontent.com/factbook/factbook.json/master/{continent}/{code}.json'.format(continent=code_to_continent[code], code=code)
    json = requests.get(url).json()
    if 'Country name' in json['Government']:
        #We might miss a few if they don't have this
        if 'conventional short form' in json['Government']['Country name']:
            name = json['Government']['Country name']['conventional short form']['text']
            if 'Exports - partners' in json['Economy']:
                #Get the exporters
                partners = json['Economy']['Exports - partners']['text'].split(',')
                country = Country(name, code)
                for partner in partners:
                    #Process the data to drop some bits we don't need and get the values we want
                    p = partner.split()
                    if p[-1][0] == '(':
                        country.add_partner(' '.join(p[0:-2]).lower(), float(p[-2].split('%')[0]))
                    elif p[-1][0] == 'e':
                        country.add_partner(' '.join(p[0:-3]).lower(), float(p[-3].split('%')[0]))  
                    else:
                        country.add_partner(' '.join(p[0:-1]).lower(), float(p[-1].split('%')[0]))
                countries[code] = country

In [8]:
import pandas as pd
import networkx as nx
G = nx.DiGraph()

#Names to also add
names_to_code['uae'] = 'ae'
names_to_code['cote divoire'] = 'iv'
names_to_code['netherlands antilles'] = 'an'
names_to_code['st. lucia'] = 'lc'
names_to_code['st. vincent and the grenadines'] = 'vc'
names_to_code['st. kitts and nevis'] = 'kn'
names_to_code['europe'] = 'europe'
names_to_code['africa'] = 'africa'
names_to_code['america'] = 'america'
names_to_code['asia'] = 'asia'

for country in countries:
    for partner in countries[country].exporting_partners: 
        G.add_edge(names_to_code[countries[country].name], names_to_code[partner], weight=countries[country].exporting_partners[partner]/100)

In [9]:
degrees = nx.degree(G)

names = {}
for country in countries:
    names[country] = countries[country].name
    
ds = {}
for name, d in degrees: 
    ds[name] = d

nx.set_node_attributes(G, ds, 'degree')
nx.set_node_attributes(G, names, 'name')

In [10]:
from networkx.readwrite import json_graph
data = json_graph.node_link_data(G)

In [11]:
import json
with open('graph.json', 'w') as fp:
    json.dump(data, fp)

In [12]:
print(nx.shortest_path(G))

{'ag': {'ag': ['ag'], 'sp': ['ag', 'sp'], 'fr': ['ag', 'fr'], 'us': ['ag', 'us'], 'it': ['ag', 'it'], 'uk': ['ag', 'uk'], 'br': ['ag', 'br'], 'ts': ['ag', 'ts'], 'gm': ['ag', 'gm'], 'po': ['ag', 'sp', 'po'], 'be': ['ag', 'fr', 'be'], 'ca': ['ag', 'us', 'ca'], 'mx': ['ag', 'us', 'mx'], 'ch': ['ag', 'us', 'ch'], 'ja': ['ag', 'us', 'ja'], 'sz': ['ag', 'it', 'sz'], 'nl': ['ag', 'uk', 'nl'], 'ei': ['ag', 'uk', 'ei'], 'ar': ['ag', 'br', 'ar'], 'ly': ['ag', 'ts', 'ly'], 'au': ['ag', 'gm', 'au'], 'pl': ['ag', 'gm', 'pl'], 'ao': ['ag', 'sp', 'po', 'ao'], 'hk': ['ag', 'us', 'ch', 'hk'], 'ks': ['ag', 'us', 'ch', 'ks'], 'th': ['ag', 'us', 'ja', 'th'], 'in': ['ag', 'it', 'sz', 'in'], 'sy': ['ag', 'ts', 'ly', 'sy'], 'lo': ['ag', 'gm', 'au', 'lo'], 'ez': ['ag', 'gm', 'pl', 'ez'], 'sf': ['ag', 'sp', 'po', 'ao', 'sf'], 'vm': ['ag', 'us', 'ch', 'ks', 'vm'], 'my': ['ag', 'us', 'ja', 'th', 'my'], 'as': ['ag', 'us', 'ja', 'th', 'as'], 'sn': ['ag', 'us', 'ja', 'th', 'sn'], 'ae': ['ag', 'it', 'sz', 'in', 'ae