# Spatial Interpolation: Kriging

- non-deterministic interpolation
- measures of uncertainty

In [None]:
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (16, 9)


In [None]:
import rasterio

In [None]:
from rasterio.mask import mask
import geopandas as gpd
import fiona
import pandas as pd
import pykrige

In [None]:
precip = gpd.read_file("precip_sd.geojson")


In [None]:
precip.plot(column='inches', legend=True);

In [None]:
county = gpd.read_file("sdcounty.geojson")

In [None]:
m = county.explore()
precip.explore(column='inches', m=m)


### Interpolate to Grids


In [None]:
import tobler

In [None]:
county_utm = county.to_crs(county.estimate_utm_crs())
precip_utm = precip.to_crs(precip.estimate_utm_crs())

In [None]:
from tobler.util import h3fy


In [None]:
county_h3 = h3fy(county_utm)

In [None]:
county_h3.plot()

In [None]:
m = county_h3.plot(color='grey')
precip_utm.plot(column='inches', ax=m, legend=True);

In [None]:
county_h3.shape

In [None]:
import pykrige.kriging_tools as kt
from pykrige.ok import OrdinaryKriging
import numpy

data = numpy.array(
    [
        [0.3, 1.2, 0.47],
        [1.9, 0.6, 0.56],
        [1.1, 3.2, 0.74],
        [3.3, 4.4, 1.47],
        [4.7, 3.8, 1.74],
    ]
)

gridx = numpy.arange(0.0, 5.5, 0.5)
gridy = numpy.arange(0.0, 5.5, 0.5)

In [None]:
OK = OrdinaryKriging(
    data[:, 0],
    data[:, 1],
    data[:, 2],
    variogram_model="linear",
    verbose=False,
    enable_plotting=False,
)


In [None]:
z, ss = OK.execute("grid", gridx, gridy)


In [None]:
kt.write_asc_grid(gridx, gridy, z, filename="output.asc")
plt.imshow(z)
plt.show()

In [None]:
OK.display_variogram_model()

In [None]:
# on our data

In [None]:
# need data as an array x, y, z
xy = precip_utm.get_coordinates().values
xy

In [None]:
z = precip_utm.inches.values
z

In [None]:
z = z.reshape(-1, 1)


In [None]:
data = numpy.hstack([xy,z])

In [None]:
OK = OrdinaryKriging(
    data[:, 0],
    data[:, 1],
    data[:, 2],
    variogram_model='gaussian',
    verbose=False,
    enable_plotting=False
)

In [None]:
OK.display_variogram_model()

In [None]:
data.shape

In [None]:
OK.get_variogram_points()

In [None]:
OK.print_statistics()

In [None]:
targetxy = county_h3.centroid.get_coordinates().values
targetxy

In [None]:
z, ss = OK.execute('points', targetxy[:,0], targetxy[:,1])

In [None]:
z

In [None]:
z.data.shape

In [None]:
targetxy.shape

In [None]:
county_h3['ok_est'] = z.data

In [None]:
county_h3.plot(column='ok_est')

### Nearest neighbor

In [None]:
hcents = county_h3.centroid


In [None]:
m = hcents.plot(color='r')
precip_utm.plot(column='inches', ax=m, legend=True);

In [None]:
gpd.sjoin_nearest(county_h3, precip_utm, distance_col="distances",
    lsuffix="left", rsuffix="right", exclusive=True)

In [None]:
county_h3.shape

In [None]:
county_h3['nn1_est'] = gpd.sjoin_nearest(county_h3, precip_utm, distance_col="distances",
    lsuffix="left", rsuffix="right", exclusive=True).inches

In [None]:
county_h3.plot(column='nn1_est', legend=True);

### Knn5

In [None]:
X = [[0], [1], [2], [3]]
y = [0, 0, 1, 1]
from sklearn.neighbors import KNeighborsRegressor
neigh = KNeighborsRegressor(n_neighbors=2)
neigh.fit(X, y)
print(neigh.predict([[1.5]]))



In [None]:
# Set number of neighbors to use
neighbors = 5

# Initialize KNN regressor
knn_regressor = KNeighborsRegressor(n_neighbors = neighbors, weights = "uniform") # no distance decay distance

# Fit to observed locations
knn_regressor.fit(precip_utm.get_coordinates(), precip_utm.inches)

In [None]:
knn_regressor.predict(hcents.get_coordinates())


In [None]:
county_h3['nn5_est'] = knn_regressor.predict(hcents.get_coordinates())


In [None]:
county_h3.plot(column='nn5_est', legend=True);

In [None]:
import matplotlib.pyplot as plt
f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
county_h3.plot(column='nn1_est',ax=ax1)
county_h3.plot(column='nn5_est',ax=ax2)
ax1.set_title("knn=1")
ax2.set_title("knn=5");



In [None]:
# Set number of neighbors to use
neighbors = 5

# Initialize KNN regressor
knn_regressor = KNeighborsRegressor(n_neighbors = neighbors, weights = "distance") # inverse distance weighting
# Fit to observed locations
knn_regressor.fit(precip_utm.get_coordinates(), precip_utm.inches)

In [None]:
county_h3['nn5id_est'] = knn_regressor.predict(hcents.get_coordinates())


In [None]:
f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
county_h3.plot(column='nn5_est',ax=ax1)
county_h3.plot(column='nn5id_est',ax=ax2)
ax1.set_title("knn=5 (Unweighted)")
ax2.set_title("knn=5 (Inverse Distance Weights)");


## Fit

In [None]:
h3_cents = county_h3.centroid
import rioxarray
import rasterio

In [None]:
# get observed values for all grid cells


clipped = rasterio.open("clipped_example.tif")

In [None]:
clipped.meta

In [None]:
h3_cents_4326 = h3_cents.to_crs(clipped.meta['crs'])
cp = h3_cents_4326
coord_list = [(x, y) for x, y in zip(cp.x, cp.y)]


In [None]:
observations = [x[0] for x in clipped.sample(coord_list)]
county_h3['inches'] = observations

In [None]:
county_h3.plot(column='inches', legend=True)

In [None]:
# calculate fit for each approach (MAPE)
def mape(est, obs):
    err = est-obs
    aerr = numpy.abs(err)
    den = obs + (obs == 0)
    paerr = aerr / den
    paerr *= 100
    return paerr.mean()

In [None]:
mape(county_h3.nn1_est, county_h3.inches)

In [None]:
mape(county_h3.nn5_est, county_h3.inches)

In [None]:
mape(county_h3.nn5id_est, county_h3.inches)

In [None]:
mape(county_h3.ok_est, county_h3.inches)

In [None]:
# plot fit


In [None]:
# map errors for different models

In [None]:
en1 = county_h3.nn1_est - county_h3.inches

county_h3['nn1_error'] = en1

county_h3.plot(column='nn1_error', legend=True, cmap='coolwarm')

In [None]:
en5id = county_h3.nn5id_est - county_h3.inches

county_h3['nn5id_error'] = en5id

county_h3.plot(column='nn5id_error', legend=True, cmap='coolwarm')

In [None]:
import seaborn as sns

In [None]:
sns.scatterplot(x=county_h3.inches, y=county_h3.nn5id_error);

WIP Below Here

### Surface to Area Interpolation

#### Spatial Join on Centroid

In [None]:
cents = tracts.centroid

In [None]:
cents.plot()

In [None]:
type(cents)

In [None]:
coord_list = [(x, y) for x, y in zip(cents.x, cents.y)]
tracts['centest'] = [x[0] for x in clipped.sample(coord_list)]
tracts.head()

In [None]:
tracts['centroid'] = tracts.centroid
tracts.set_geometry('centroid', inplace=True)

In [None]:
tracts.plot(column='centest', legend=True);

In [None]:
tracts.set_geometry('geometry', inplace=True)
tracts.plot(column='centest', legend=True);

#### Zonal Methods of Surface to Area Interpolation

In [None]:
import rasterstats

In [None]:
gdf.head()

In [None]:
tracts.plot()

In [None]:
from rasterstats import zonal_stats
tstats = zonal_stats(tracts, "clipped_example.tif",
            stats="count min mean max median")

#elevations2 = zonal_stats(
#    sd_tracts.to_crs(dem.rio.crs),  # Geotable with zones
#    "../data/nasadem/nasadem_sd.tif",  # Path to surface file
#)
#elevations2 = pandas.DataFrame(elevations2)

In [None]:
tstats[:5]

In [None]:
tstats = pd.DataFrame(tstats)

In [None]:
tstats.head()

In [None]:
tstats.shape

In [None]:
tracts.shape

In [None]:
tracts['mean'] = tstats['mean'].values
tracts.plot(column='mean', legend=True);

In [None]:
tracts['median'] = tstats['median'].values
tracts.plot(column='median', legend=True);

In [None]:
tracts['range'] = tstats['max'].values - tstats['min'].values
tracts.plot(column='range', legend=True);

In [None]:
import matplotlib.pyplot as plt

In [None]:
import seaborn as sns

In [None]:
sns.scatterplot(data=tracts, x='centest', y='mean')
plt.plot([10, 40], [10, 40]);

In [None]:
sns.scatterplot(data=tracts, x='median', y='mean')
plt.plot([10, 40], [10, 40]);