# Embassies
Get all of the addresses of the embassies.

In [1]:
import bs4
import requests

In [2]:
with open('countries.txt') as o:
    countries = o.read().splitlines()

In [3]:
url = 'https://embassy.goabroad.com/embassies-in/india'
r = requests.get(url)
soup = bs4.BeautifulSoup(r.content, 'lxml')

In [4]:
def parse_info(info):
    infos = {}
    for name, *details in info.find_all('div', {'class': None}): # only non-"info-row"
        name = str(name.string)
        text = parsers[name](details)
        infos[name] = text
    return infos
        
def parse_simple_details(details):
    string, = details
    return string.strip()

def parse_details_with_breaks(details):
    details = [detail.strip() for detail in details
               if type(detail) == bs4.element.NavigableString]
    return ', '.join(details)

def parse_details_with_links(details):
    details = [detail.text for detail in details if detail.name == 'a']
    return ','.join(details)

parsers = {'City': parse_simple_details, 'Phone': parse_details_with_breaks,
           'Fax': parse_details_with_breaks, 'Details': parse_details_with_breaks,
           'Office Hours': parse_details_with_breaks, 'Email': parse_details_with_links,
           'Website': parse_details_with_links}

In [5]:
embassies_info = {}
embassies = soup.find('section', {'class': 'search-embassies program-listing'})
for embassy in embassies.find_all('div', {'class': 'body'}):
    embassy_info = {}
    title = str(embassy.h6.string)
    ## Parse embassy title into country and type (consulate vs embassy)
    ...
    address = embassy.find('span', 'embassy-address').string.strip()
    info = embassy.find('section', {'class': 'embassy-info'})
    embassies_info[title] = parse_info(info)