# MAGE: Mixture-based Areas of Interest over Geolocated Entities

In [None]:
from __future__ import print_function
import ipywidgets as widgets
from IPython.display import display, clear_output

import warnings
warnings.filterwarnings('ignore')

import sys
sys.path.append('../code/')

%load_ext autoreload
%autoreload 2

import mbrs

import geopandas as gpd
import datetime
import folium
from folium import plugins
from shapely.geometry import Polygon
from statistics import mean, median
import networkx as nx
from random import sample
import time
import math
import panel as pn
import json
import pandas as pd
pd.set_option('display.max_rows', 100)
pn.extension()

In [None]:
### Global variables for spatial data

gdf = None
kwds_freq = None
description_style = {'description_width': 'initial'}

mbrs_G = None
mbrs_rtree = None

In [None]:
### WIDGETS FOR 'LOAD DATA' ###

w_file_label = widgets.Label(value = 'Input file specifications:', style=description_style)
w_input_file = widgets.Text(value='', description='File path:', style=description_style, placeholder='osmpois-greece.csv')
w_col_sep = widgets.Text(value='', description='Column separator:', style=description_style, placeholder=';')
w_col_id = widgets.Text(value='', description='ID column:', style=description_style, placeholder='id')
w_col_name = widgets.Text(value='', description='Name column:', style=description_style, placeholder='name')
w_col_lon = widgets.Text(value='', description='Longitude column:', style=description_style, placeholder='lon')
w_col_lat = widgets.Text(value='', description='Latitude column:', style=description_style, placeholder='lat')
w_col_kwds = widgets.Text(value='', description='Keywords column:', style=description_style, placeholder='keywords')
w_kwds_sep = widgets.Text(value='', description='Keywords separator:', style=description_style, placeholder=',')

w_transform_label = widgets.Label(value = '[Optional] Transform to another reference system:', style=description_style)
w_source_crs = widgets.Text(value='', description='Source CRS:', style=description_style, placeholder='EPSG:4326')
w_target_crs = widgets.Text(value='', description='Target CRS:', style=description_style, placeholder='EPSG:4326')

w_crop_label = widgets.Label(value = '[Optional] Filter entities within a bounding box:', style=description_style)
w_min_lon = widgets.Text(value='', description='Min lon:', style=description_style, placeholder='23.48')
w_max_lon = widgets.Text(value='', description='Max lon:', style=description_style, placeholder='23.98')
w_min_lat = widgets.Text(value='', description='Min lat:', style=description_style, placeholder='37.83')
w_max_lat = widgets.Text(value='', description='Max lat:', style=description_style, placeholder='38.08')

vertical_layout = widgets.Layout(display='flex',
                    flex_flow='column',
                    border = '2px solid #72bcd4',
                    align_items='inherit',
                    margin='12px')

w_file_panel = widgets.Box(children=[w_file_label, w_input_file, w_col_sep, w_col_id, w_col_name, w_col_lon, w_col_lat, w_col_kwds, w_kwds_sep], layout=vertical_layout)
w_transform_panel = widgets.Box(children=[w_transform_label, w_source_crs, w_target_crs], layout=vertical_layout)
w_crop_panel = widgets.Box(children=[w_crop_label, w_min_lon, w_max_lon, w_min_lat, w_max_lat], layout=vertical_layout)

w_button_load_data = widgets.Button(description='Load', style=description_style)
w_out_load_data = widgets.Output()

spatial_ui_load_data = widgets.VBox([widgets.HBox([w_file_panel, widgets.VBox([w_transform_panel, w_crop_panel])]), w_button_load_data, w_out_load_data])


In [None]:
### FUNCTIONS FOR 'LOAD DATA' ###

def load_data(btn):
    
    global gdf
    global kwds_freq
    
    # Check if reprojection is specified
    if w_source_crs.value == '' and w_target_crs.value == '':
        w_source_crs.value = 'EPSG:4326'
        w_target_crs.value = 'EPSG:4326'
    
    # Read input CSV file into a geodataframe
    gdf = mbrs.read_csv(input_file=w_input_file.value,
                         sep=w_col_sep.value,
                         col_id=w_col_id.value,
                         col_name=w_col_name.value,
                         col_lon=w_col_lon.value,
                         col_lat=w_col_lat.value,
                         col_kwds=w_col_kwds.value,
                         kwds_sep=w_kwds_sep.value,
                         source_crs=w_source_crs.value,
                         target_crs=w_target_crs.value)
    
    # Crop input data by the specified bounding box
    if w_min_lon.value != '' and w_max_lon.value != '' and w_min_lat.value != '' and w_max_lat.value != '':
        gdf = mbrs.crop(gdf, float(w_min_lon.value), float(w_min_lat.value), float(w_max_lon.value), float(w_max_lat.value))
        gdf.reset_index(drop=True, inplace=True)
    
    with w_out_load_data:
        w_out_load_data.clear_output()
        display(gdf)
    
    # Calculate keyword frequency
    kwds_freq = mbrs.kwds_freq(gdf)
    kwds_list = [('ALL (' + str(len(gdf.index)) + ')', '---')]
    kwds_list = kwds_list + [((k + ' (' + str(v) + ')'), k) for k, v in kwds_freq.items()]       

w_button_load_data.on_click(load_data)

In [None]:
### WIDGETS FOR 'MAP: MIXTURE CLUSTERS' ###

# Preprocess
w_mbrs_eps = widgets.BoundedFloatText(value=0.001, min=0.001, step=0.001, description='Radius: ', style=description_style)
w_mbrs_use_grid = widgets.Checkbox(value=False, description='Apply grid partitioning (cell side = radius)', style=description_style)
w_mbrs_use_lda = widgets.Checkbox(value=False, description='Detect topics from keywords (LDA)', style=description_style)
w_mbrs_num_topics = widgets.BoundedIntText(value=10, min=1, max=20, step=1, description='Number of topics: ', style=description_style)
w_mbrs_num_topics.layout.visibility = 'hidden'

def hide_topics(widg):
    if w_mbrs_use_lda.value == False:
        w_mbrs_num_topics.layout.visibility = 'hidden'
    else:
        w_mbrs_num_topics.layout.visibility = 'visible'

w_mbrs_use_lda.observe(hide_topics, names=['value'])
    
w_button_mbrs_graph = widgets.Button(description='Create Graph')
w_stats_mbrs = widgets.Output(layout={'width': '50%'})
ui_graph_mbrs = widgets.VBox([w_mbrs_eps, w_mbrs_use_grid, w_mbrs_use_lda, w_mbrs_num_topics, w_button_mbrs_graph, w_stats_mbrs])

# Discover regions
w_mbrs_max_size = widgets.BoundedIntText(value=100, min=2, max=500, step=1, description='Max size: ', style=description_style)
w_mbrs_size_weight = widgets.BoundedFloatText(value=0.1, min=0, step=0.01, description='Size weight: ', style=description_style)
w_mbrs_time_budget = widgets.BoundedIntText(value=30, min=1, step=1, description='Time budget (sec): ', style=description_style)
w_mbrs_entropy_mode = widgets.Dropdown(options=['high', 'low'], description='Entropy mode: ', style=description_style)
w_mbrs_method = widgets.Dropdown(options=['CircularScan', 'ExpandAll', 'ExpandBest', 'AdaptiveHybrid',  'AdaptiveGrid'], description='Method:', style=description_style)
w_mbrs_seeds_ratio = widgets.BoundedIntText(value=1, min=1, max=100, step=1, description='Initial seeds (%): ', style=description_style)
w_mbrs_overlap = widgets.BoundedIntText(value=20, min=0, max=100, step=1, description='Max overlap (%): ', style=description_style)
w_mbrs_topk = widgets.BoundedIntText(value=10, min=1, max=20, step=1, description='Top k: ', style=description_style)

w_button_show_mbrs = widgets.Button(description='Compute Regions')
w_map_mbrs = widgets.Output(layout={'width': '60%'})
ui_map_mbrs = widgets.VBox([w_mbrs_max_size, w_mbrs_size_weight, w_mbrs_time_budget, w_mbrs_entropy_mode, w_mbrs_method, w_mbrs_seeds_ratio, w_mbrs_overlap, w_mbrs_topk, w_button_show_mbrs, w_map_mbrs])

In [None]:
### FUNCTIONS FOR 'MAP: MIXTURE CLUSTERS' ###

def mbrs_graph(btn):
    
    global gdf
    global mbrs_G
    global mbrs_rtree
    global gdf_grid
    global mbrs_prtree
    
    if gdf is not None:
        
        with w_stats_mbrs:
            w_stats_mbrs.clear_output()
            print('Creating spatial connectivity graph...')
        
        eps = w_mbrs_eps.value
        
        # Apply LDA on the input points
        if (w_mbrs_use_lda.value == True):
            gdf = mbrs.topic_modeling(gdf, label_col='id', kwds_col='kwds', num_of_topics=int(w_mbrs_num_topics.value), kwds_per_topic=10)
    
        # Create graph on input points or grid cells, depending on user specs
        if (w_mbrs_use_grid.value == True):
            # Create a grid-based GeoDataFrame by aggregating the input points into square cells
            if (w_mbrs_use_lda.value == True):
                mbrs_prtree, gdf_grid = mbrs.partition_data_in_grid_lda(gdf, float(eps))
            else:
                mbrs_prtree, gdf_grid = mbrs.partition_data_in_grid(gdf, float(eps))
            # Create graph and R-tree index over this grid-based GeoDataFrame of cell centroids
            # CAUTION: Adjacent cells at the corners of each cell must also be considered neighbors -> search with eps*srqt(2)
            mbrs_G, mbrs_rtree = mbrs.create_graph(gdf_grid, 1.001*math.sqrt(2)*eps, w_mbrs_use_lda.value) 
        else:
            # Create graph and R-tree index over the original input points
            mbrs_G, mbrs_rtree = mbrs.create_graph(gdf, eps, w_mbrs_use_lda.value)
            
        cc = [d for n, d in mbrs_G.degree()]

        with w_stats_mbrs:
            print('Graph created successfully.')
            # check max node degree
            max_degree = sorted(cc)[-1] + 1
            mean_degree = mean(cc)
            median_degree = median(cc)
            print('Max degree: ' + str(max_degree) + ' Mean degree: ' + str(mean_degree) + ' Median degree: ' + str(median_degree))
    
            # check connected components
            print('Max connected component: ' + str([len(c) for c in sorted(nx.connected_components(mbrs_G), key=len, reverse=True)][0]))
    else:
        with w_stats_mbrs:
            print('No dataset is loaded.')

            
def mbrs_regions(btn):
    
    global gdf
    global mbrs_G
    global mbrs_rtree
    global gdf_grid
    global mbrs_prtree
    
    if gdf is not None and mbrs_G is not None:
        
        with w_map_mbrs:
            w_map_mbrs.clear_output()
            print('Detecting regions...')
            
        seeds_ratio = int(w_mbrs_seeds_ratio.value) / 100
        overlap_threshold = int(w_mbrs_overlap.value) / 100
        types, colors = mbrs.get_types(gdf)
        
        if (w_mbrs_use_lda.value == True):     
            max_se = math.log(int(w_mbrs_num_topics.value))
        else:
            max_se = math.log(len(types))
            w_mbrs_num_topics.value = 0
       
        params = {
            'variables': {
                'max_size': {'current': w_mbrs_max_size.value},
                'size_weight': {'current': w_mbrs_size_weight.value},
                'time_budget': {'current': w_mbrs_time_budget.value},
                'eps': {'current': w_mbrs_eps.value}
            },
            'methods': {'current': w_mbrs_method.value},
            'entropy_mode': {'current': w_mbrs_entropy_mode.value},
            'settings': {'top_k': int(w_mbrs_topk.value),
                         'max_se': max_se,
                         'seeds_ratio':float(seeds_ratio),
                         'overlap_threshold': float(overlap_threshold),
                         'use_lda': w_mbrs_use_lda.value,
                         'lda_topics':int(w_mbrs_num_topics.value)}
        }
        
        # Detect regions
        start_time = time.time()
        if (w_mbrs_use_grid.value == True):
            topk_regions, updates = mbrs.run(gdf_grid, mbrs_G, mbrs_rtree, types, params, float(w_mbrs_eps.value))
        else:
            topk_regions, updates = mbrs.run(gdf, mbrs_G, mbrs_rtree, types, params, float(w_mbrs_eps.value))
        elapsed = time.time() - start_time
          
        # Display regions on map
        if (w_mbrs_use_grid.value == True):
            m = mbrs.show_map_topk_grid_regions(gdf, mbrs_prtree, colors, gdf_grid, 1.001*float(w_mbrs_eps.value), topk_regions, w_mbrs_use_lda.value)
        else:
            m = mbrs.show_map_topk_convex_regions(gdf, colors, topk_regions, w_mbrs_use_lda.value)
        
        with w_map_mbrs:
            w_map_mbrs.clear_output()
            display(m)
    
    else:
        with w_map_mbrs:
            print('No dataset or graph is loaded.')

            
w_button_mbrs_graph.on_click(mbrs_graph)

w_button_show_mbrs.on_click(mbrs_regions)

In [None]:
### TABS ###
tab_mbrs = widgets.Tab()
tab_mbrs.children = [spatial_ui_load_data, ui_graph_mbrs, ui_map_mbrs]
tab_mbrs.set_title(0, 'Load')
tab_mbrs.set_title(1, 'Preprocess')
tab_mbrs.set_title(2, 'Discover')

display(tab_mbrs)