In [1]:
import folium
import json
import geopandas
import numpy as np
import pandas as pd
import psycopg2

from folium.plugins import MarkerCluster
from geopandas import tools
from shapely.geometry import Point

%matplotlib inline

In [2]:
# Taken from https://github.com/python-visualization/folium/issues/416
# with thanks and recognition to https://github.com/ruoyu0088
# Modified to add a popup
class MarkerClusterScript(MarkerCluster):
    def __init__(self, data, callback, popup=None):
        from jinja2 import Template
        super(MarkerClusterScript, self).__init__([])
        self._name = 'Script'
        self._data = data
        self._popup = popup
        if callable(callback):
            from flexx.pyscript import py2js
            self._callback = py2js(callback, new_name="callback")
        else:
            self._callback = "var callback = {};".format(_callback)

        self._template = Template(u"""
            {% macro script(this, kwargs) %}
            (function(){
                var data = {{this._data}};
                var map = {{this._parent.get_name()}};
                var cluster = L.markerClusterGroup();
                {{this._callback}}

                for (var i = 0; i < data.length; i++) {
                    var row = data[i];
                    var marker = callback(row, popup='names');
                    marker.addTo(cluster);
                }

                cluster.addTo(map);
            })();
            {% endmacro %}
                        """)

In [None]:
def create_marker(row, popup=None):
    """Returns a L.marker object"""
    icon = L.AwesomeMarkers.icon({markerColor: row.color})    
    marker = L.marker(L.LatLng(row.lat, row.lng))
    marker.setIcon(icon)
    if popup:
        marker.bindPopup(row[popup])
    return marker

def is_unclassified(row):
    """Checks whether an organisation has been classified"""
    if not row.company and not row.other :
        return True
    else:
        return False
    
def check_lat_lon(row):
    """Returns false if the row has null lat or lng values"""
    return True if not pd.isnull(row.lat) or not pd.isnull(row.lng) else False

def get_type_counts(df, group='id_left'):
    # Organisation counts types
    # Groups by group using the size method
    # Returns dataframe
    out_df = pd.DataFrame(df.groupby('id_left').size()).reset_index(level=0, inplace=True)
    out_df.rename(columns={0: 'count'}, inplace=True)
    return out_df.copy()
    

In [None]:
sql_str = """
    SELECT organisations.name, 
        organisations.id,
        organisations.addresses,
        orgs_latlng.lat,
        orgs_latlng.lng
    FROM gtr.organisations
    LEFT JOIN gtr.orgs_latlng
    ON gtr.organisations.id = gtr.orgs_latlng.id
"""

# Read in config file with DB params
with open('../scripts/config.json') as f:
    conf = json.load(f)
    
# Define a connection string
conn_string = 'host={} dbname={} user={} password={}'.format(conf.get('host'),
                                                             conf.get('database'),
                                                             conf.get('user'),
                                                             conf.get('passw'))

# Create a connection object
conn = psycopg2.connect(conn_string)

# Dataframe from SQL data
df = pd.read_sql(sql_str, conn)

In [None]:
df.head()

In [None]:
# nan values throw the JS script
lat = df.lat[df.lat.notnull()].values
lng = df.lng[df.lng.notnull()].values

# Popups with name strings
popups = df.name[df.lng.notnull()].values

# Latitude and longitude dataframe with no nan values
locations = [list(a) for a in zip(lat, lng, popups)]
df_locations = pd.DataFrame(locations, columns=['lat', 'lng', 'names'])

In [None]:
fig = folium.element.Figure()
map_orgs = folium.Map(location=[56, -3], zoom_start=5)
MarkerClusterScript(df_locations.to_json(orient="records"), callback=create_marker).add_to(map_orgs)
map_orgs.add_to(fig)

In [None]:
df

In [None]:
df_types = pd.read_pickle('df_organisation_type.p')
df = pd.merge(df, df_types[['id', 'company', 'other']],
              how='left',
              sort=False)

df['unclassified'] = df.apply(is_unclassified, axis=1)

# Create a column that shapely recognises as geographic coordinates
df["geometry"] = df.apply(lambda row: Point(row["lng"], row["lat"]) if check_lat_lon(row) else None, axis=1)
df.drop(['lat', 'lng'], axis=1, inplace=True)

# Now we need a GeoDataFrame and to tell
# Geopandas what the coordinate system used is
df = geopandas.GeoDataFrame(df, geometry="geometry")
df.crs = {"init": "epsg:4326"}

# Now load the topojson to a seperate GeoDataFrame object
lads = geopandas.GeoDataFrame.from_file('../json/topo_lad.json')
lads.crs = {"init": "epsg:4326"}

lads.drop('name', axis=1, inplace=True)

# Perform the spatial join
df = tools.sjoin(lads, df[df.geometry.notnull()], how="left")

# Read in the ONS' Local Units data
# Data starts on row 6 and there are 22 lines of surplus info
# at the end of the file. Skipfooter requires the engine type
# be set to 'python' instead of 'c'. This can result in slower load times
local_units_counts = pd.read_csv('../csv/local_business_units.csv',
                                 header=6,
                                 skipfooter=22,
                                 engine='python')

# Type counts
organisation_counts_lad = get_type_counts(df)
company_counts_lad = get_type_counts(df[df.company == True])
other_counts_lad = get_type_counts(df[df.other == True])

# 6 year average (which will be used as the )
local_units_counts['6_yr_avg'] = local_units_counts.loc[:, '2010': '2015'].mean(axis=1)

# Remove the first part of the Area column
local_units_counts.Area = local_units_counts.Area.apply(lambda x: x.split(':')[1])

In [None]:
# Merge the aggregated values
# Use 'outer' join type (equivalent to a SQL FULL OUTER JOIN)
counts_df = organisation_counts_lad.merge(company_counts_lad,
                                          on='id_left',
                                          suffixes=('_orgs', '_comp'),
                                          how='outer').merge(other_counts_lad,
                                                             on='id_left',
                                                             how='outer')

# Consistent suffixes
#counts_df.rename(columns={'count': 'count_other'}, inplace=True)

# append ONS local units data
# using outer to get all the LAD values
#counts_df = counts_df.merge(local_units_counts,
#                            left_on='id_left',
#                            right_on='mnemonic',
#                            how='outer')

In [None]:
counts_df

In [None]:
counts_df[['counts_orgs_norm', counts_df.applymap

In [None]:
fig = folium.element.Figure()
map_orgs = folium.Map(location=[56, -3], zoom_start=5)
MarkerClusterScript(df_locations.to_json(orient="records"), callback=create_marker).add_to(map_orgs)
map_orgs.add_to(fig)

map_orgs.choropleth(geo_path='../json/topo_lad.json',
                    topojson='objects.lad',
                    data=organisation_counts_lad,
                    columns=['id_left', 'count'],
                    key_on='feature.properties.LAD13CD',
                    fill_color='YlGnBu',
                    fill_opacity=0.7,
                    line_opacity=0.9,
                    legend_name='Count by Location',
                    threshold_scale = [0, 250, 500, 750, 1000])

map_orgs

Local Authority TopoJSON data courtesy of [Dr. Martin Chorley](https://github.com/martinjc/UK-GeoJson).

Contains Ordnance Survey, Office of National Statistics, National Records Scotland and LPS Intellectual Property data © Crown copyright and database right [2016]. Data licensed under the terms of the Open Government Licence (http://www.nationalarchives.gov.uk/doc/open-government-licence/version/3). Ordnance Survey data covered by OS OpenData Licence. Any further sub-licences must retain this attribution.

In [None]:
map_orgs.save('map.html')