# Example 01: Regression Kriging 

Demonstates an ordinary kriging method without any regression features. Additionally, it demonstrates a basic regression kriging approach with a small number of coarse resolution features. 

## 1. Call Imports

In [None]:
import importlib
import sys
import glob
import matplotlib.pyplot as plt

sys.path.insert(0, "/home/lucasnorth/")

process_points = importlib.import_module("2026_nvhackathon.kriging.process_points")
calc_met_gof = process_points.calc_met_gof
filter_sufficient_data = process_points.filter_sufficient_data

process_rasters = importlib.import_module("2026_nvhackathon.kriging.process_rasters")
ProjWUS = process_rasters.ProjWUS

run_kriging = importlib.import_module("2026_nvhackathon.kriging.run_kriging")
KrigHackathon = run_kriging.KrigHackathon

## 2. Define Prediction Grid

Larger and higher resolution grids will require more memory.

In [None]:
# Define prediction grid

pred_grid_3000 = ProjWUS(
    lon_min = -111,
    lat_min = 37,
    lon_max = -102,
    lat_max = 45,
    res_x_m = 3000,
    res_y_m = 3000
)

print(pred_grid_3000)

## 3. Process point data

Only need to do this once. **CHANGE THESE DIRECTORIES**

In [None]:
# # Commented out after running once

# IFS_PATHS = sorted(glob.glob("/project/cowy-nvhackathon/cowy-wildfire/data/nwp/ifs_yearly/*"))
# OBSERVATIONS = '/project/cowy-nvhackathon/cowy-wildfire/data/observations/cowy_madis_metar_mesonet_2024.nc'

# points_gof = calc_met_gof(
#     path_ifs = IFS_PATHS[0],
#     path_obs = OBSERVATIONS,
#     path_save = "/project/cowy-nvhackathon/lucasnorth/df_ifs_points_gof_test.nc",
#     option_save = True
# )

# points_filtered = filter_sufficient_data(
#     path_gof = "/project/cowy-nvhackathon/lucasnorth/df_ifs_points_gof_test.nc",
#     path_save = "/project/cowy-nvhackathon/lucasnorth/df_ifs_points_gof_filtered_test.nc",
#     option_save = True
# )


## 4.1. Ordinary Kriging

### 4.1.1 Intialize Kriging Model

Define the `KrigHackathon` class by providing it the processed point data and prediction grid.
Then, retrieve the kriging arrays and print some diagnostics.

In [None]:
my_krig = KrigHackathon(
        path_points = "/project/cowy-nvhackathon/lucasnorth/df_ifs_points_gof_filtered_test.nc",
        varname_points = "ws_error",
        varname_x = "longitude",
        varname_y = "latitude",
        varname_time = "time",
        src_crs = "EPSG:4326",
        ProjWUS = pred_grid_3000  
)

# HARDCODED threshold for bad data into this function for ws_error
# TODO: move this to a data processing step.
# This filters out stations with mean biases greater than 10 m/s

kx, ky, kz = my_krig.get_kriging_arrays()

# Print some diagnostics
print("Length of kriging x vector: ", len(kx))
print("Length of kriging y vector: ", len(ky))
print("Length of kriging z vector: ", len(kz))

print("Mean of z vector: ", kz.mean().item())
print("Max of z vector: ", kz.max().item())
print("Min of z vector: ", kz.min().item())

xgrid=pred_grid_3000.x_vect
ygrid=pred_grid_3000.y_vect

print("Range of x grid: ", xgrid.min(), xgrid.max())
print("Range of y grid: ", ygrid.min(), ygrid.max())
print("Check x width: ", len(xgrid), pred_grid_3000.width)
print("Check x width: ", len(ygrid), pred_grid_3000.height)

### 4.1.2. Run Ordinary Kriging

Pass the kriging vectors derived in step 4.1.1.
This may take a few minutes for a CO-WY size grid at 3 km.
Then, plot a preliminary visualization. The grid vectors are based on cell centers, so they will not match the point data exactly.

In [None]:
# Interpolate basic kriging on a grid

z_grid, z_var = my_krig.interpolate_grid(kx, ky, kz)

plt.imshow(z_grid, cmap='gray')
plt.colorbar()

## 4.2 Regression Kriging

### 4.2.1 Reproject Feature Rasters

Now that we tried a basic ordinary kriging, lets process the rasters for regression kriging.
This code gets .nc raster files onto the prediction grid defined in step 2.
It also retrieves the features at the points to create the regression. 

In [None]:
# Reproject the rasters, get values at points

elev_grid, elev_points = my_krig.reproj_rasters(
    ds_path = "/project/cowy-nvhackathon/cowy-wildfire/data/terrain_data/terrain_990m/conus_elev_reprojected_wgs84_cowy_990m.nc",
    varname = "HGT",
    src_crs = "EPSG:4326"
)

aspect_grid, aspect_points = my_krig.reproj_rasters(
    ds_path = "/project/cowy-nvhackathon/cowy-wildfire/data/terrain_data/terrain_990m/aspect_reprojected_wgs84_cowy_990m.nc",
    varname = "aspect",
    src_crs = "EPSG:4326"
)

slope_grid, slope_points = my_krig.reproj_rasters(
    ds_path = "/project/cowy-nvhackathon/cowy-wildfire/data/terrain_data/terrain_990m/slope_reprojected_wgs84_cowy_990m.nc",
    varname = "slope",
    src_crs = "EPSG:4326" 
)


### 4.2.2 Stack the Raster and Points, Run Regression

This stacks each feature grid and point vector column wise, suitable for a multi-linear regression.
The regression is then run, predicting the "z" variable at the points and on the grid using the regression.
This is considered the "deterministic" component of regression kriging. 

In [None]:
x_grid, x_points = my_krig.stack_raster_data(
    grid_out = [elev_grid, aspect_grid, slope_grid],
    points_out = [elev_points, aspect_points, slope_points]
)

z_hat_grid, z_hat_points = my_krig.run_regression(
    predictor_grid = x_grid,
    predictor_points = x_points,
    z = kz # pass the predictand defined in step 4.1.1
)

### 4.2.3 Run Regression Kriging

Regression kriging is defined by the deterministic and stochastic components.
The regression model is considered the deterministic component, where your predictand (ws_error in this case) varies as a function of the topographic components. 
The stochastic component is what the regression model cannot explain, such as larger scale spatial patterns/covariance. A kriging model is run on the residuals of the regression.
Lastly, the two components are added together. 

See the wikipedia:
https://en.wikipedia.org/wiki/Regression-kriging

In [None]:
# Krig the residuals of the regrssion

epsilon = kz - z_hat_points

# Pass epsilon, the residuals of the regression
epsilon_grid, epsilon_var = my_krig.interpolate_grid(kx, ky, epsilon)

# Add the two components together
z_rk = z_hat_grid + epsilon_grid

# Create a heatmap
fig, ax = plt.subplots()
cax = ax.imshow(z_rk, cmap='RdBu_r')
cbar = fig.colorbar(cax)
cbar.set_label('Ann. Avg. Windspeed Error (m/s)', rotation=90, labelpad=15)

In [None]:
# Compare to topography alone

plt.imshow(
    elev_grid.reshape(pred_grid_3000.height, pred_grid_3000.width),
    cmap='cubehelix')
plt.colorbar()

In [None]:
# Inspect regression with elevation

import matplotlib.pyplot as plt
import numpy as np

slope, intercept = np.polyfit(elev_points, kz, 1)

y_pred = slope * elev_points + intercept

plt.scatter(x = elev_points, y = kz)
plt.plot(elev_points, y_pred, color='red', label=f'Regression line: y={slope:.2f}x+{intercept:.2f}')
plt.ylabel("Avg. Residual (m/s)")
plt.xlabel("Elev. (m)")