## Import libraries, define const values, and set URLs path
Set the root address (REST_API_ADDRESS) based on your docker exposed ID

In [19]:
import json
import folium
from geopandas import GeoDataFrame
from pysal.viz.mapclassify import Natural_Breaks
import requests

id_field = 'id'
value_field = 'score'
num_bins = 4
fill_color = 'YlOrRd'
fill_opacity = 0.9

# REST URL
REST_API_ADDRESS = 'http://localhost:4646/'
Alive_URL = REST_API_ADDRESS + 'alive'
BRS_URL = REST_API_ADDRESS + 'BRS'
RemoveTables_URL=REST_API_ADDRESS + 'removeTables'
Flush_URL = REST_API_ADDRESS + 'flushBuffer'
ChangeProteus_URL = REST_API_ADDRESS + 'changeProteus'
ChangeAlgo_URL = REST_API_ADDRESS + 'changeAlgo'
ChangeMemorySize_URL = REST_API_ADDRESS + 'changeMemorySize'

## Check BRS is alive
Check the status of BRS

In [193]:
response = requests.get(Alive_URL)
print(response.text)

I am alive
 algorithm 9 memorySize 10, and Spark cluster is free.


## Set Proteus credential
Set the proteus credentials. BRS needs this information to fetch tables. 

In [None]:
ProteusURL=""
ProteusUsername=""
ProteusPassword=""
data={'url' : ProteusURL, 'username' : ProteusUsername, 'pass':ProteusPassword}
response = requests.get(ChangeProteus_URL,params=data)
print(response.text)

## Change algorithm
Change algorithm to unif, single, multi, and hybrid. The fastest is the hybrid, and unif is an uniform grid. Default is hybrid.

In [34]:
algo="hybrid"
data={'algo':algo}
response = requests.get(ChangeAlgo_URL,params=data)
print(response.text)

Algorithm set to hybrid


## Remove previous results
BRS buffers previous results to avoid repeating the same query. To remove buffered results

In [40]:
response = requests.get(Flush_URL)
print(response.text)

buffer.tmp is flushed.


## Change memory size
Indicate the size of RAM for the spark instance. Default is 10G.

In [None]:
memorySize=11
data={'memorySize':memorySize}
response = requests.get(ChangeMemorySize_URL,params=data)
print(response.text)

## Identify industrial districts
An example of a BRS query. The table must include columns lat and lon which are coordinates. The f indicate the column name (revenue, numberOfEmployees, etc) for the scoring function. Keywords are used to apply filter on the records. You can define two columns for filtering at the same time. Separate keywords with comma. For examle, if you need to filter companies with ATECO code of 10.10 or 10.11, set keywordsColumn to "ATECO" and keywords to "10.10,10.11". Moreover, at the same query, if you want to filter companies of a specific province (e,g,. pisa), set keywordsColumn2 to "province" and set keywords2 to "pisa".

This query detects top 5 regions sized 10km*10km that contain the most number (f is null) of startup companies( filter column flags with startup-registroimprese). The second keywordColumn2 is set empty.

CAUTION: You can run this query with a pre-fetched table (BRSflags which is been injected into the docker image) in order to check the REST API and results. 

In [211]:
table = "BRSflags" # This table already exists in the docker image
topk = 10 #
eps = .1 # We measure distance in radians, where 1 radian is around 100km, and epsilon is the length of each side of the region
f = "null" # Set f to null if the scoring fucntion is number of elements
dist = True
keywordsColumn = "flags"
keywords = "startup-registroimprese"
keywordsColumn2 = ""
keywords2 = ""

data = {'topk' : topk, 'eps' : eps, 'f' : f, 'input' : table, "keywordsColumn" : keywordsColumn, "keywords" : keywords,"keywordsColumn2":keywordsColumn2,"keywords2":keywords2,"dist":dist}
response = requests.get(BRS_URL, params=data)
print(response.text)

[
{
"rank":1,
"center":[9.185410000000001,45.484415000000006],
"score":1699.0
}
,{
"rank":2,
"center":[12.488605000000002,41.900499999999994],
"score":660.0
}
,{
"rank":3,
"center":[7.661465,45.066535],
"score":306.0
}
,{
"rank":4,
"center":[14.238444999999999,40.86993],
"score":247.0
}
,{
"rank":5,
"center":[11.376294999999999,44.49163500000001],
"score":224.0
}
,{
"rank":6,
"center":[11.90895,45.402884795000006],
"score":167.0
}
,{
"rank":7,
"center":[13.343405,38.139575],
"score":136.0
}
,{
"rank":8,
"center":[8.920905,44.44473],
"score":135.0
}
,{
"rank":9,
"center":[9.64011,45.689725],
"score":118.0
}
,{
"rank":10,
"center":[11.235539535000001,43.788819805],
"score":107.0
}
]



## Initialize the map and visualize the output regions
This code helps you to visualize the output of previous cell

In [213]:
res = json.loads(response.text)
results_geojson={"type":"FeatureCollection","features":[]}
for region in res:
    results_geojson['features'].append({"type": "Feature", "geometry": { "type": "Point", "coordinates": region['center']},
      "properties": {
        "id": region['rank'],
        "score": region['score']
      }})
m = folium.Map(
    location=[45.474989560000004,9.205786594999998],
    tiles='Stamen Toner',
    zoom_start=11
)
gdf = GeoDataFrame.from_features(results_geojson['features'])
gdf.crs = {'init': 'epsg:4326'}
gdf['geometry'] = gdf.buffer(data['eps']).envelope
threshold_scale = Natural_Breaks(gdf[value_field], k=num_bins).bins.tolist()
threshold_scale.insert(0, gdf[value_field].min())
choropleth = folium.Choropleth(gdf, data=gdf, columns=[id_field, value_field],
                                   key_on='feature.properties.{}'.format(id_field),
                                   fill_color=fill_color, fill_opacity=fill_opacity,
                                   threshold_scale=threshold_scale).add_to(m)
fields = list(gdf.columns.values)
fields.remove('geometry')
tooltip = folium.features.GeoJsonTooltip(fields=fields)
choropleth.geojson.add_child(tooltip)
m

  return _prepare_from_string(" ".join(pjargs))


## ---------------------------------------------------------------------------------------------------------

## Identify industrial districts
Find the top 10 regions sized 50km*50km that contains the highest number of employees( f is numberOfEmployees) working in production of pasta.

In [230]:
topk=20
eps=0.5
keywordsColumn="ATECO"
keywords="10.73"
f="numberOfEmployees"
dist = True
table = "BRS"
keywordsColumn2 = ""
keywords2 = ""
data = {'topk' : topk, 'eps' : eps, 'f' : f, 'input' : table, "keywordsColumn" : keywordsColumn, "keywords" : keywords,"keywordsColumn2":keywordsColumn2,"keywords2":keywords2,"dist":dist}
response = requests.get(BRS_URL, params=data)
print(response.text)

[
{
"rank":1,
"center":[14.153735145,42.292225],
"score":1426.0
}
,{
"rank":2,
"center":[14.43571968,40.867641045],
"score":1235.0
}
,{
"rank":3,
"center":[9.16166053,45.61729499999999],
"score":1034.0
}
,{
"rank":4,
"center":[12.064605,45.59483],
"score":918.0
}
,{
"rank":5,
"center":[12.589753030000002,41.89389267],
"score":863.0
}
,{
"rank":6,
"center":[7.827308894999999,44.888835],
"score":721.0
}
,{
"rank":7,
"center":[8.99588,44.54887],
"score":631.0
}
,{
"rank":8,
"center":[14.685430000000002,41.370707675000006],
"score":619.0
}
,{
"rank":9,
"center":[11.367768439999999,44.646479525],
"score":593.0
}
,{
"rank":10,
"center":[13.653664999999998,43.07360500000001],
"score":553.0
}
,{
"rank":11,
"center":[10.767778665000002,45.373664999999995],
"score":536.0
}
,{
"rank":12,
"center":[12.770374260000002,43.84423],
"score":449.0
}
,{
"rank":13,
"center":[10.564810000000001,44.629635],
"score":425.0
}
,{
"rank":14,
"center":[10.19115184,43.965001865],
"score":378.0
}
,{
"rank":15,
"cen

## Initialize the map and visualize the output regions

In [220]:
res = json.loads(response.text)
results_geojson={"type":"FeatureCollection","features":[]}
for region in res:
    results_geojson['features'].append({"type": "Feature", "geometry": { "type": "Point", "coordinates": region['center']},
      "properties": {
        "id": region['rank'],
        "score": region['score']
      }})
m = folium.Map(
    location=[44.629635,10.563514999999999],
    tiles='Stamen Toner',
    zoom_start=11
)
gdf = GeoDataFrame.from_features(results_geojson['features'])
gdf.crs = {'init': 'epsg:4326'}
gdf['geometry'] = gdf.buffer(data['eps']/2).envelope
threshold_scale = Natural_Breaks(gdf[value_field], k=num_bins).bins.tolist()
threshold_scale.insert(0, gdf[value_field].min())
choropleth = folium.Choropleth(gdf, data=gdf, columns=[id_field, value_field],
                                   key_on='feature.properties.{}'.format(id_field),
                                   fill_color=fill_color, fill_opacity=fill_opacity,
                                   threshold_scale=4).add_to(m)
fields = list(gdf.columns.values)
fields.remove('geometry')
tooltip = folium.features.GeoJsonTooltip(fields=fields)
choropleth.geojson.add_child(tooltip)
m

  return _prepare_from_string(" ".join(pjargs))


## ---------------------------------------------------------------------------------------------------------

## Identify areas with a high concentration of restaurants or hotels
This is example of applying filter on two columns at the same time where it identifies top 10 hotspots in Pisa province (look at the keywordsColumn) for number of (f in null) restaurants, ice-cream parlour, pastry shop, etc (look at the keywordsColumn2). 

In [41]:
table = "BRS" 
topk = 10 
eps = 0.01 
f = "null" 
dist = True
keywordsColumn = "null"
keywords = "null"
keywordsColumn2 = ""
keywords2 = ""
data = {'topk' : topk, 'eps' : eps, 'f' : f, 'input' : table, "keywordsColumn" : keywordsColumn, "keywords" : keywords,"keywordsColumn2":keywordsColumn2,"keywords2":keywords2,"dist":dist}
response = requests.get(BRS_URL, params=data)
print(response.text[:-22])

[
{
"rank":1,
"center":[10.40094993,43.71824257499999],
"score":24.0
}
,{
"rank":2,
"center":[10.294949595,43.62871611],
"score":13.0
}
,{
"rank":3,
"center":[10.3908863,43.72260633999999],
"score":9.0
}
,{
"rank":4,
"center":[10.378360520000001,43.72100330999999],
"score":7.0
}
,{
"rank":5,
"center":[10.547071110000001,43.67660426000001],
"score":6.0
}
,{
"rank":6,
"center":[10.638674175,43.66692147499999],
"score":6.0
}
,{
"rank":7,
"center":[10.861097015000002,43.402155565],
"score":6.0
}
,{
"rank":8,
"center":[10.618839295,43.667777785],
"score":6.0
}
,{
"rank":9,
"center":[10.40261665,43.706955435000005],
"score":6.0
}
,{
"rank":10,
"center":[10.78296901,43.708708949999995],
"score":6.0
}
]


## Initialize the map and visualize the output regions

In [42]:
res = json.loads(response.text[:-22])
results_geojson={"type":"FeatureCollection","features":[]}
for region in res:
    results_geojson['features'].append({"type": "Feature", "geometry": { "type": "Point", "coordinates": region['center']},
      "properties": {
        "id": region['rank'],
        "score": region['score']
      }})
m = folium.Map(
    location=[43.71682982000001,10.401120675000001],
    tiles='Stamen Toner',
    zoom_start=11
)
gdf = GeoDataFrame.from_features(results_geojson['features'])
gdf.crs = {'init': 'epsg:4326'}
gdf['geometry'] = gdf.buffer(data['eps']/2).envelope
threshold_scale = Natural_Breaks(gdf[value_field], k=num_bins).bins.tolist()
threshold_scale.insert(0, gdf[value_field].min())
choropleth = folium.Choropleth(gdf, data=gdf, columns=[id_field, value_field],
                                   key_on='feature.properties.{}'.format(id_field),
                                   fill_color=fill_color, fill_opacity=fill_opacity,
                                   threshold_scale=threshold_scale).add_to(m)
fields = list(gdf.columns.values)
fields.remove('geometry')
tooltip = folium.features.GeoJsonTooltip(fields=fields)
choropleth.geojson.add_child(tooltip)
m

  return _prepare_from_string(" ".join(pjargs))


## Remove Tables
To remove the intermediate tables which are downloaded form proteus

In [None]:
response = requests.get(RemoveTables_URL)
print(response.text)