# ___

# [ Machine Learning in Geosciences ]

**Department of Applied Geoinformatics and Carthography, Charles University** 

*Lukas Brodsky lukas.brodsky@natur.cuni.cz*


## Spatial Cross-Validation 

using K-Means clusterring and random and spatial resampling (spatial-kfold Python package). 

    1. Spatial clustering with KMeans
    2. Spatial Blocks
        2.1 Spatial resampled random blocks     
        2.2 Continuous spatial resampled blocks
    3. Plotting function 

In [None]:
# spatial k-fold package 
# !pip install spatial-kfold

In [None]:
import os
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
from matplotlib import cm
import matplotlib.colors as colors
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
from mpl_toolkits.axes_grid1.inset_locator import inset_axes

from spatialkfold.blocks import spatial_blocks 
from spatialkfold.datasets import load_ames
from spatialkfold.clusters import spatial_kfold_clusters 
from spatialkfold.plotting import spatial_kfold_plot
from spatialkfold.stats import spatial_kfold_stats

In [None]:
os.listdir()

In [None]:
# Load ST_LUCAS data
path = './data'
lucas_fn = os.path.join(path, 'cz_lucas_points_2018.gpkg')
lucas = gpd.read_file(lucas_fn)

In [None]:
lucas.head()

In [None]:
lucas.plot(markersize=1)

In [None]:
lucas.crs

## 1. Spatial clustering with KMeans

In [None]:
# 1. Spatial cluster resampling 
lucas_clusters = spatial_kfold_clusters(gdf=lucas, name='point_id', nfolds=10, 
                                       algorithm='kmeans', random_state=42) 


In [None]:
lucas_clusters.head()

In [None]:
# Get the 'tab20' colormap
cols_tab = cm.get_cmap('tab20', 10)
# Generate a list of colors from the colormap
cols = [cols_tab(i) for i in range(10)]
# create a color ramp
color_ramp = ListedColormap(cols)

In [None]:
# Plot 
fig, ax = plt.subplots(1,1 , figsize=(9, 4)) 
lucas_clusters.plot(column='folds', ax=ax, cmap= color_ramp, markersize = 2, legend=True)
ax.set_title('Spatially Clustered Folds')
plt.show()

In [None]:
# check the number of train and test of the dependent variable for each fold
# lc1 attribute .. LUCAS Land Cover classification 
lucas_clusters_stats = spatial_kfold_stats(X=lucas_clusters, y= lucas_clusters.lc1, 
                                          groups = lucas_clusters.folds)

In [None]:
lucas_clusters_stats

## 2. Spatial Blocks

### 2.1 spatial resampled random blocks  

In [None]:
# create 10 random blocks 
lucas_rnd_blocks = spatial_blocks(gdf=lucas, width=25000, height=25000, 
                                 method='random', nfolds=10, 
                                 random_state=42)

In [None]:
lucas_rnd_blocks.head()

In [None]:
fig, ax = plt.subplots(1,1 , figsize=(9, 4)) 

lucas_rnd_blocks.plot(column = 'folds',cmap = color_ramp, ax = ax ,lw=0.7, legend = True)
lucas.plot(ax=ax,  markersize = 1, color = 'r')
ax.set_title('Random Blocks Folds')

In [None]:
# Resample the ames data with the prepared blocks 

In [None]:
lucas_res_rnd_blk = gpd.overlay(lucas, lucas_rnd_blocks)

In [None]:
lucas_res_rnd_blk.head()

In [None]:
# plot the resampled blocks
fig, ax = plt.subplots(1,1 , figsize=(9, 4)) 

lucas_rnd_blocks.plot(facecolor="none",edgecolor='grey', ax = ax ,lw=0.7)
lucas_res_rnd_blk.plot(column = 'folds', cmap = color_ramp,legend = True, ax = ax, markersize = 3)
ax.set_title('Spatially Resampled\nRandom Blocks')

In [None]:

lucas_res_rnd_blk_stats = spatial_kfold_stats(X=lucas_res_rnd_blk, y= lucas_res_rnd_blk.lc1, 
                                             groups = lucas_res_rnd_blk.folds)


In [None]:
lucas_res_rnd_blk_stats

### 2.2 Continuous spatial resampled blocks
Two option are availble with orientation :

'tb-lr' : top-bottom, left-right

'bt-rl' : bottom-top, right-left

In [None]:
lucas_cont_blocks = spatial_blocks(gdf=lucas, width=25000, height=25000, 
                                  method='continuous', orientation='tb-lr' ,
                                  nfolds=10, random_state=42)

In [None]:
fig, ax = plt.subplots(1,1 , figsize=(9, 4)) 

lucas_cont_blocks.plot(column = 'folds',cmap = color_ramp, ax = ax ,lw=0.7, legend = True)
lucas.plot(ax=ax,  markersize = 1, color = 'r')
ax.set_title('Continuous Blocks Folds\norientation:"tb-lr"')

In [None]:
# resample the ames data with the prepared blocks 
lucas_res_cont_blk = gpd.overlay(lucas, lucas_cont_blocks)

In [None]:
fig, ax = plt.subplots(1,1 , figsize=(9, 4)) 

lucas_cont_blocks.plot(facecolor="none",edgecolor='grey', ax = ax ,lw=0.7)
lucas_res_cont_blk.plot(column = 'folds', cmap = color_ramp,legend = True, ax = ax, markersize = 2)
ax.set_title('Spatially Resampled\nContinuous Blocks Folds. "tb-lr"')
plt.show()

In [None]:
lucas_res_cont_blk_stats = spatial_kfold_stats(X=lucas_res_cont_blk, y=lucas_res_cont_blk.lc1, 
                                              groups=lucas_res_cont_blk.folds)

In [None]:
lucas_res_cont_blk_stats

## 3. Plotting function
Plot the partitioning of the data at each fold

In [None]:
# check the folds number 
np.unique(lucas_clusters.folds.values)

In [None]:
for i in range(1,11):
    fig, ax = plt.subplots(1,1 , figsize=(9, 4))

    spatial_kfold_plot(X=lucas_clusters, 
                       geometry=lucas_clusters.geometry, 
                       groups=lucas_clusters.folds, 
                       fold_num=i, cmap='viridis', ax=ax)    