## Demo 7: Mixture Clusters
### Discover regions with high/low diversity in the types of POIs therein

In [None]:
import warnings
warnings.filterwarnings('ignore')

import sys
sys.path.append('..')

%load_ext autoreload
%autoreload 2

import math
from shapely.geometry import Polygon

import loci as lc
from loci import io
from loci import mbrs

### Read data (using LOCI I/O methods)

In [None]:
# Bounding box around Athens
min_lon = 23.55
max_lon = 24
min_lat = 37.8
max_lat = 38.15

bound = Polygon([(min_lon, min_lat),
                       (min_lon, max_lat),
                       (max_lon, max_lat),
                       (max_lon, min_lat),
                       (min_lon, min_lat)])

In [None]:
remote_file = 'http://download.slipo.eu/results/osm-to-csv/europe/europe_greece-pois.osm.csv.zip'
pois = lc.io.import_osmwrangle(remote_file, bound=bound, target_crs='EPSG:4326')
pois.head()

In [None]:
types, colors = lc.mbrs.get_types(pois)

### Set parameters for execution

In [None]:
params = {
    'variables': {
        'eps': {
            'current': 0.002
        },
        'max_size': {
            'current': 100
        },
        'size_weight': {
            'current': 0.1
        },
        'time_budget': {
            'current': 30
        }
    },
    'methods': {
        'current': 'ExpHybrid'
    },
    'entropy_mode': { 
        'current': 'high'  #'low'
    },
    'settings': {
        'top_k': 10,
        'max_se': math.log(len(types)),
        'seeds_ratio': 0.05,
        'overlap_threshold': 0.2,
        'use_grid': False
    }
}
params

### Pre-process (run only ONCE if parameter eps is unchanged)

In [None]:
eps = params['variables']['eps']['current']
if (params['settings']['use_grid'] == True):
    # Create a grid-based GeoDataFrame by aggregating the input points into square cells
    prtree, gdf_grid = lc.mbrs.partition_data_in_grid(pois, eps)
    # Create graph and R-tree index over this grid-based GeoDataFrame of cell centroids
    # CAUTION: Adjacent cells at the corners of each cell must also be considered neighbors -> search with eps*srqt(2)
    G, rtree = lc.mbrs.create_graph(gdf_grid, 1.001*math.sqrt(2)*eps) 
else:
    # Create graph and R-tree index over the original input points
    G, rtree = lc.mbrs.create_graph(pois, eps)

### Run the specified detection method

In [None]:
if (params['settings']['use_grid'] == True):
    topk_regions, updates = lc.mbrs.run(gdf_grid, G, rtree, types, params, eps)
else:
    topk_regions, updates = lc.mbrs.run(pois, G, rtree, types, params, eps)

In [None]:
# Report overall score and number of points in each of the top-k regions
i = 1
for region in topk_regions:
    print(str(i), region[0], len(region[2][0].union(region[2][1])))  # rank, score, number of points
    i += 1

### Display map with the detected top-k regions

In [None]:
if (params['settings']['use_grid'] == True):
    m = lc.mbrs.show_map_topk_grid_regions(pois, prtree, colors, gdf_grid, 1.001*eps, topk_regions)
else:
    m = lc.mbrs.show_map_topk_convex_regions(pois, colors, topk_regions)
m