In [None]:
# Defaults
REGION = 'fortportal'
POP = 'grid_population'
UTM = 32636
CLUSTER_COUNT = 10

In [None]:
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

from sklearn import cluster
from pathlib import Path
from pysal.lib import weights
from pysal.explore import esda
from splot.esda import moran_scatterplot, lisa_cluster, plot_local_autocorrelation


warnings.filterwarnings('ignore')
plt.style.use('fivethirtyeight')
%matplotlib inline

In [None]:
CWD = Path('.')
DATA = Path('data')

INTER  = DATA/'inter'
INPUT  = DATA/'input'
OUTPUT = DATA/'output'

In [None]:
!ls {OUTPUT} | grep {REGION}

In [None]:
grids_gdf = gpd.read_file(f'{OUTPUT}/{REGION}_grids_output_{4326}.geojson').to_crs(epsg=UTM)

In [None]:
grids_gdf.plot(figsize=(10,10), cmap='RdYlGn_r', edgecolor='black', alpha=1, column=POP, scheme='percentiles', legend=True)
plt.gca().set_axis_off()

In [None]:
grids_gdf.head()

In [None]:
def make_copy(gdf): return gdf.copy()

In [None]:
def moran_cluster(gdf):
    w = weights.Queen.from_dataframe(gdf, idVariable='idx')
    gdf = gdf[~gdf.idx.isin(w.islands)]
    w = weights.Queen.from_dataframe(gdf, idVariable='idx')
    w.tranform = 'R'
    gdf[f'w_{POP}'] = weights.lag_spatial(w, gdf[POP])
    # MI = esda.Moran(gdf[f'w_{POP}'], w)
    LISA = esda.Moran_Local(gdf[f'w_{POP}'], w) 
    gdf['significance'] = LISA.p_sim < 0.05
    gdf['_quad'] = LISA.q
    gdf['quad'] = gdf['_quad'].replace({
        1: 'HH',
        2: 'LH',
        3: 'LL',
        4: 'HL'
    })
    
    moran_dissolved_gdf = (gdf[['geometry', 'grid_population', 'grid_building_count', 'grid_building_area', 'quad']]
            .dissolve(by='quad', aggfunc='mean')
            .reset_index())
    
    return gdf, LISA, moran_dissolved_gdf

# Cluster regions based on Moran Index

In [None]:
moran_gdf, LISA, moran_dissolved_gdf = grids_gdf.pipe(make_copy).pipe(moran_cluster)

In [None]:
moran_gdf.head()

In [None]:
moran_gdf['quad'].value_counts()

In [None]:
fig, ax = plt.subplots(1,1,figsize=(10, 10))
lisa_cluster(LISA, moran_gdf, ax=ax)

In [None]:
plot_local_autocorrelation(LISA, moran_gdf, f'w_{POP}', figsize=(20, 10))

In [None]:
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(20, 20))

moran_gdf.plot(ax=ax1, cmap='tab20b', edgecolor='black', categorical=True, column='quad', figsize=(20, 20), legend=True)
moran_dissolved_gdf.plot(ax=ax2, cmap='tab20b', edgecolor='black', categorical=True, column='quad', figsize=(20, 20), legend=True)
ax1.set_axis_off()
ax2.set_axis_off()

# Cluster regions based on K-Means

In [None]:
def kmeans_cluster(gdf, k=CLUSTER_COUNT):
    k_score = cluster.KMeans(n_clusters=k)
    gdf['k_score'] = k_score.fit(gdf[['grid_population']]).labels_
    kmeans_dissolved_gdf = (gdf[['geometry', 'grid_population', 'grid_building_count', 'grid_building_area', 'k_score']]
                    .dissolve(by='k_score', aggfunc='mean')
                    .reset_index())
    
    return gdf, kmeans_dissolved_gdf

In [None]:
kmeans_gdf, kmeans_dissolved_gdf = grids_gdf.pipe(make_copy).pipe(kmeans_cluster)

In [None]:
kmeans_gdf.head()

In [None]:
kmeans_dissolved_gdf

In [None]:
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(20, 20))

kmeans_gdf.plot(ax=ax1, cmap='tab20b', edgecolor='black', column='k_score', categorical=True, figsize=(20, 20), legend=True)
kmeans_dissolved_gdf.plot(ax=ax2, cmap='tab20b', edgecolor='black', column='k_score', categorical=True, figsize=(20, 20), legend=True)
ax1.set_axis_off()
ax2.set_axis_off()

# Cluster regions based on spatial similarity

In [None]:
def agglomerative_cluster(gdf, k=CLUSTER_COUNT*2):
    w = weights.Queen.from_dataframe(gdf)
    a_score = cluster.AgglomerativeClustering(n_clusters=k, connectivity=w.sparse) 
    gdf['a_score'] = a_score.fit(gdf[['grid_population']]).labels_
    agglomerative_dissolved_gdf = (gdf[['geometry', 'grid_population', 'grid_building_count', 'grid_building_area', 'a_score']]
                                       .dissolve(by='a_score', aggfunc='mean')
                                       .reset_index())
    
    return gdf, agglomerative_dissolved_gdf

In [None]:
agglomerative_gdf, agglomerative_dissolved_gdf = agglomerative_cluster(grids_gdf)

In [None]:
agglomerative_gdf.head()

In [None]:
agglomerative_dissolved_gdf

In [None]:
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(20, 20))

agglomerative_gdf.plot(ax=ax1, cmap='tab20b', edgecolor='black', column='a_score', categorical=True, figsize=(20, 20), legend=True)
agglomerative_dissolved_gdf.plot(ax=ax2, cmap='tab20b', edgecolor='black', column='a_score', categorical=True, figsize=(20, 20), legend=True)

ax1.set_axis_off()
ax2.set_axis_off()

### Save the analysed results

In [None]:
moran_dissolved_gdf.to_crs(epsg=4326).to_file(f'{OUTPUT/REGION}_moran.geojson', driver='GeoJSON')
kmeans_dissolved_gdf.to_crs(epsg=4326).to_file(f'{OUTPUT/REGION}_kmeans.geojson', driver='GeoJSON')
agglomerative_dissolved_gdf.to_crs(epsg=4326).to_file(f'{OUTPUT/REGION}_agglomerative.geojson', driver='GeoJSON')