## Notebook to create a map with PDBe-KB/FunPDBe partners

In [37]:
### Uncomment to install packages
# !pip install folium
# !pip install bs4
# !pip install geocoder

In [38]:
import folium
from folium import plugins
from folium.features import CustomIcon
import requests
import pandas as pd
import bs4
import geocoder
import time
import json

#### Get the list of current partners from the PDBe-KB/FunPDBe website

In [60]:
def get_pdbekb_partners(url="https://www.ebi.ac.uk/pdbe/pdbe-kb/funpdbe"):
    text = requests.get(url).text
    soup = bs4.BeautifulSoup(text, "html.parser").find_all("div", attrs={"id": "partners"})[0].find_all("tr")
    values = []
    
    for ingredient in soup:
        values.append([x.get_text().strip() for x in ingredient.find_all("td")])
    
    return pd.DataFrame(columns=["Resource","PI","URL"], data=values)

In [61]:
partners = get_pdbekb_partners()
partners["PI"] = [x[:-6] for x in partners["PI"]]
partners

Unnamed: 0,Resource,PI,URL
0,14-3-3-Pred,Geoff Barton,http://www.compbio.dundee.ac.uk/1433pred
1,3D Complex,Emmanuel Levy,https://shmoo.weizmann.ac.il/elevy/3dcomplexV6...
2,3DLigandSite,Mark Wass,http://www.sbg.bio.ic.ac.uk/~3dligandsite/
3,AKID,Manuela Helmer Citterich,http://akid.bio.uniroma2.it/
4,Arpeggio,Tom Blundell,http://biosig.unimelb.edu.au/arpeggioweb/
5,CamKinet,Toby Gibson,http://camkinet.embl.de/v2/home/
6,canSAR,Bissan Al-Lazikani,https://cansar.icr.ac.uk/
7,CATH-FunSites,Christine Orengo,http://www.cathdb.info/
8,ChannelsDB,Jaroslav Koca,http://ncbr.muni.cz/ChannelsDB/
9,Covalentizer,Nir London,https://covalentizer.weizmann.ac.il/covb/main


#### Add locations to each resource
Ideally, this could be included on the website, but no biggie

In [62]:
city_dict = {
    '14-3-3-Pred': ['Dundee, UK'],
    '3D Complex': ['Rehovot, Israel'],
    '3DLigandSite': ['Canterbury, UK'],
    'AKID': ['Rome, Italy'],
    'Arpeggio': ['Cambridge, UK'],
    'CamKinet': ['Heidelberg, Germany'],
    'canSAR': ['Sutton, UK'],
    'CATH-FunSites': ['London, UK'],
    'ChannelsDB': ['Brno, Czech Republic'],
    'Covalentizer': ['Rehovot, Israel'],
    'COSPI-Depth': ['Pune, India'],
    'DynaMine': ['Brussels, Belgium'],
    'EFoldMine': ['Brussels, Belgium'],
    'EVcouplings': ['Boston, MA, USA'],
    'FireProt DB': ['Brno, Czech Republic'],
    'FoldX': ['Barcelona, Spain'],
    'KinCore': ['Philadelphia, PA, USA'],
    'KnotProt': ['Warsaw, Poland'],
    'M-CSA': ['Hinxton, UK'],
    'MetalPDB': ['Florence, Italy'],
    'Missense3D': ['London, UK'],
    'P2Rank': ['Prague, Czech Republic'],
    'POPSCOMP': ['London, UK'],
    'ProKinO': ['Athens, GA, USA'],
    'SKEMPI': ['Barcelona, Spain'],
    'WEBnm@': ['Bergen, Norway']
}

In [63]:
partners.set_index("Resource", inplace=True)
locations = pd.DataFrame(data=city_dict).T
locations.columns=["Location"]
partners = partners.join(locations)
partners

Unnamed: 0_level_0,PI,URL,Location
Resource,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
14-3-3-Pred,Geoff Barton,http://www.compbio.dundee.ac.uk/1433pred,"Dundee, UK"
3D Complex,Emmanuel Levy,https://shmoo.weizmann.ac.il/elevy/3dcomplexV6...,"Rehovot, Israel"
3DLigandSite,Mark Wass,http://www.sbg.bio.ic.ac.uk/~3dligandsite/,"Canterbury, UK"
AKID,Manuela Helmer Citterich,http://akid.bio.uniroma2.it/,"Rome, Italy"
Arpeggio,Tom Blundell,http://biosig.unimelb.edu.au/arpeggioweb/,"Cambridge, UK"
CamKinet,Toby Gibson,http://camkinet.embl.de/v2/home/,"Heidelberg, Germany"
canSAR,Bissan Al-Lazikani,https://cansar.icr.ac.uk/,"Sutton, UK"
CATH-FunSites,Christine Orengo,http://www.cathdb.info/,"London, UK"
ChannelsDB,Jaroslav Koca,http://ncbr.muni.cz/ChannelsDB/,"Brno, Czech Republic"
Covalentizer,Nir London,https://covalentizer.weizmann.ac.il/covb/main,"Rehovot, Israel"


#### Get geolocation data for each resource
The geocoding data provider (OpenStreetMaps) has a strict limit of no more than 1 request per second. 

In [64]:
def get_lat_lng(locality):
    g = geocoder.osm(locality).json
    return (g["lat"], g["lng"])

In [65]:
latitudes = []
longitudes = []

for locality in partners["Location"]:
    lat, lng = get_lat_lng(locality)
    latitudes.append(lat)
    longitudes.append(lng)
    time.sleep(1)

#### Add geolocation data to the dataframe
Also add country as a separate column, and replace abbreviations with the full name of the country

In [66]:
partners["Latitude"] = latitudes
partners["Longitude"] = longitudes
partners["Country"] = [x.split(", ")[-1] for x in partners["Location"]]
partners.replace(to_replace="UK", value="United Kingdom", inplace=True)
partners.replace(to_replace="USA", value="United States of America", inplace=True)
partners

Unnamed: 0_level_0,PI,URL,Location,Latitude,Longitude,Country
Resource,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
14-3-3-Pred,Geoff Barton,http://www.compbio.dundee.ac.uk/1433pred,"Dundee, UK",56.460594,-2.97019,United Kingdom
3D Complex,Emmanuel Levy,https://shmoo.weizmann.ac.il/elevy/3dcomplexV6...,"Rehovot, Israel",31.893064,34.803409,Israel
3DLigandSite,Mark Wass,http://www.sbg.bio.ic.ac.uk/~3dligandsite/,"Canterbury, UK",51.280028,1.080253,United Kingdom
AKID,Manuela Helmer Citterich,http://akid.bio.uniroma2.it/,"Rome, Italy",41.89332,12.482932,Italy
Arpeggio,Tom Blundell,http://biosig.unimelb.edu.au/arpeggioweb/,"Cambridge, UK",52.197585,0.139154,United Kingdom
CamKinet,Toby Gibson,http://camkinet.embl.de/v2/home/,"Heidelberg, Germany",49.409358,8.694724,Germany
canSAR,Bissan Al-Lazikani,https://cansar.icr.ac.uk/,"Sutton, UK",51.357464,-0.173627,United Kingdom
CATH-FunSites,Christine Orengo,http://www.cathdb.info/,"London, UK",51.507322,-0.127647,United Kingdom
ChannelsDB,Jaroslav Koca,http://ncbr.muni.cz/ChannelsDB/,"Brno, Czech Republic",49.192244,16.611338,Czech Republic
Covalentizer,Nir London,https://covalentizer.weizmann.ac.il/covb/main,"Rehovot, Israel",31.893064,34.803409,Israel


#### The countries.geojson file is downloaded from https://datahub.io/core/geo-countries
This file is licensed under the "Open Data Commons Public Domain Dedication and License", while the original data comes from http://www.naturalearthdata.com/, a public domain dataset.
Essentially, it contains polygons describing country outlines.

In [67]:
countries_file = "https://datahub.io/core/geo-countries/r/countries.geojson"

countries_geo = requests.get(countries_file).json()    

#### Actual map plotting

In [68]:
# Base map. The tiles parameter defines how it looks like. Good options for this use case in my view are:
# "cartodbpositron", "Stamen watercolor", "Stamen toner". More options on the Folium documentation:
# https://python-visualization.github.io/folium/quickstart.html

pdbekb_map = folium.Map(location=[40, -5], zoom_start=2.5, tiles='cartodbpositron')

# Some utility styles, which can be as fancy as you like, really
# The colour codes used below are from http://colorbrewer2.org
# I copied them here purely for ease of access
# greens: ['#e5f5e0','#a1d99b','#31a354']
# blues: ['#deebf7','#9ecae1','#3182bd']

style_grey = lambda x: {
    'fillColor': '#cccccc',
    'color': "#aaaaaa"
}
style_green = lambda x: {
    'fillColor': '#e5f5e0',
    'color': "#a1d99b"
}
style_blue = lambda x: {
    'fillColor': '#deebf7',
    'color': "#9ecae1"
}

# Go over the countries of the world and check if PDBe-KB has partners in them
# Add their outlines to the map, skip others
for feature in countries_geo['features']:  
    if feature["properties"]["ADMIN"] in partners["Country"].values:
        folium.GeoJson(
            feature,
            style_function=style_green,
            name='geojson'
        ).add_to(pdbekb_map)    

# Create a cluster of markers for ech country. This looks better than each marker separately at low zoom
# At high zoom, the clusters split into individual labels
marker_clusters = {}
for country in set(partners.Country.values):
    marker_clusters[country] = plugins.MarkerCluster().add_to(pdbekb_map)

# Create markers and labels and add them to the cluster for the correct country
# They can also be aded to the map directly, but then will not be aggregated
for lat, lng, resource, url, country in zip(partners.Latitude, 
                                            partners.Longitude, 
                                            partners.index, 
                                            partners.URL,
                                            partners.Country
                                           ):
    label = f"<a href={url} target=_blank>{resource}</a>"
    # due to a bug in Folium, an icon must be defined each time it is used
    icon = CustomIcon(
        "https://www.ebi.ac.uk/pdbe/pdbe-kb/static/icon/favicon-32x32.png",
        icon_size=(22, 22),
        icon_anchor=(11, 11),
        popup_anchor=(0, 0)
    )
    folium.Marker(
        location=[lat, lng],
        icon=icon,
        popup=label,
    ).add_to(marker_clusters[country])
# Display the map
pdbekb_map

In [12]:
# Base map. The tiles parameter defines how it looks like. Good options for this use case in my view are:
# "cartodbpositron", "Stamen watercolor", "Stamen toner". More options on the Folium documentation:
# https://python-visualization.github.io/folium/quickstart.html

pdbekb_map = folium.Map(location=[30, 0], zoom_start=2.1, tiles='cartodbpositron')

# Some utility styles, which can be as fancy as you like, really
# The colour codes used below are from http://colorbrewer2.org
# I copied them here purely for ease of access
# greens: ['#e5f5e0','#a1d99b','#31a354']
# blues: ['#deebf7','#9ecae1','#3182bd']

style_grey = lambda x: {
    'fillColor': '#cccccc',
    'color': "#aaaaaa"
}
style_green = lambda x: {
    'fillColor': '#e5f5e0',
    'color': "#a1d99b"
}
style_blue = lambda x: {
    'fillColor': '#deebf7',
    'color': "#9ecae1"
}

# Go over the countries of the world and check if PDBe-KB has partners in them
# Add their outlines to the map, skip others
for feature in countries_geo['features']:  
    if feature["properties"]["ADMIN"] in partners["Country"].values:
        folium.GeoJson(
            feature,
            style_function=style_green,
            name='geojson'
        ).add_to(pdbekb_map)    

# Create a cluster of markers for ech country. This looks better than each marker separately at low zoom
# At high zoom, the clusters split into individual labels
marker_clusters = {}
for country in set(partners.Country.values):
    marker_clusters[country] = plugins.MarkerCluster().add_to(pdbekb_map)

# Create markers and labels and add them to the cluster for the correct country
# They can also be aded to the map directly, but then will not be aggregated
for lat, lng, resource, url, country in zip(partners.Latitude, 
                                            partners.Longitude, 
                                            partners.index, 
                                            partners.URL,
                                            partners.Country
                                           ):
    label = f"<a href={url} target=_blank>{resource}</a>"
    folium.Marker(
        location=[lat, lng],
        icon=folium.Icon(
            color="green"
        ),
        popup=label,
    ).add_to(marker_clusters[country])
# Display the map
pdbekb_map