In [None]:
%reload_ext autoreload
%autoreload 2

import numpy as np
from scipy.interpolate import griddata
import geopandas as gpd
import rasterio as rio
from rasterio.transform import from_origin
from rasterio.plot import show, show_hist
from rasterio.mask import mask
import json
import pandas as pd
import hvplot.pandas  # noqa
import holoviews as hv
hv.extension('bokeh')
import altair as alt
alt.data_transformers.disable_max_rows()
from matplotlib import pyplot as plt
from pathlib import Path

try:  # if on phy-server local modules will not be found if their directory is not added to PATH
    import sys
    sys.path.append("/silod7/lenz/MPSchleiSediments/analysis/")
    import os
    os.chdir("/silod7/lenz/MPSchleiSediments/analysis/")
except Exception:
    pass

from settings import Config
from prepare_data import patsy_transform
from geo import make_grid, grid_interp, grid_clip, grid_load, sample_array_raster
from cv import performance

## Geospatial interpolation

In [None]:
# create geodataframe from geojson file
poly = gpd.read_file('../data/SchleiCoastline_from_OSM.geojson')
# poly.plot()

In [None]:
## Read predicted data from model run
savestamp = '20230403_233901'
# savestamp = '20230501_172522'
f = [c for c in Path('../data/exports/models/predictions').glob(f'{savestamp}*.csv')][0]
target = f.name.split('_')[-2]
station_data = pd.read_csv(f)
station_data = gpd.GeoDataFrame(station_data, geometry=gpd.points_from_xy(station_data.LON, station_data.LAT), crs='EPSG:4326')

station_data.to_crs(Config.baw_epsg, inplace=True)
poly.to_crs(Config.baw_epsg, inplace=True)

In [None]:
res = Config.interpolation_resolution
xgrid, ygrid, xmin, ymin, xmax, ymax = make_grid(poly, res, round_grid_coords=True)
print(f'grid bounds: xmin={xmin}, ymin={ymin}, xmax={xmax}, ymax={ymax}, grid shape: width: {xgrid.shape[1]}, height: {xgrid.shape[0]}') 


### Activate one of the following blocks!

### Block 1: in-notebook interpolation
target_values = grid_interp(station_data, xgrid, ygrid, name=target)  # interpolate here in notebook using the target variable
if target=='Concentration':
    MassConc_values = grid_interp(station_data, xgrid, ygrid, name='MassConcentration')

### Block 2: external interpolation
# target_values = grid_load('../data/exports/models/predictions/interpolated/interpolated_sibson_clipped.tif')  # load a saved interpolation from a tiff file
# MassConc_values = grid_load('../data/exports/models/predictions/interpolated/<<MASSCONC>>.tif')  # load a saved interpolation from a tiff file


# Interpolate sediment dry bulk density over the whole grid
sedDBD_values = grid_interp(station_data, xgrid, ygrid, name='SedDryBulkDensity')

# Clip both grids to the Schlei polygon extend (will make NaNs outside of Schlei)
target_clipped = grid_clip(target_values, poly, xgrid, ygrid)
if target=='Concentration':
    MassConc_clipped = grid_clip(MassConc_values, poly, xgrid, ygrid)
sDBD_clipped = grid_clip(sedDBD_values, poly, xgrid, ygrid)
print('grid dimensions:', 'width:', target_clipped.shape[1], ', height:', target_clipped.shape[0])

In [None]:
cell_area = res ** 2  # grid cell area in m² from cell width * cell height in m
cell_sedVol = cell_area * Config.sediment_layer_depth  # volume of sediment layer considered in m³
sedMass_grid = np.nan_to_num(sDBD_clipped) * cell_sedVol  # mass of sediment in each cell in kg, calculated from (interpolated) sediment dry bulk density (kg m⁻³) * volume of sediment per cell (m³)
abundance_grid = np.nan_to_num(target_clipped) * sedMass_grid  # grid of target amounts (MP particles if target==Conentration; MP mass if target==MassConcentration)
total = abundance_grid.sum()

## Print results

In [None]:
print(f'Total MP in upper {Config.sediment_layer_depth*100} cm of Schlei sediments: {np.round(total / 1e12, 1)} Trillion')
print(f'MP per m² and cm sediment depth: {round(total / poly.area[0] / (Config.sediment_layer_depth * 100))}')
print(f'Schlei-wide MP as ratio total MP / total sed mass: {total / sedMass_grid.sum()}')
print(f'Schlei-wide MP as mean of grid data (i.e. assuming equal sediment mass in all cells): {np.nanmean(target_clipped)}')
print(f'Schlei-wide MP as mean of ALL stations (observed + predicted): {station_data[target].mean()}')
print(f'Schlei-wide MP as mean of OBSERVED stations only: {station_data.loc[station_data.Type=="observed", target].mean()}')

In [None]:
station_data.insert(4, f'{target}_interpolated', sample_array_raster(target_clipped, xmin, ymax, res, station_data))
if target=='Concentration':
    station_data.insert(8, f'MassConcentration_interpolated', sample_array_raster(MassConc_clipped, xmin, ymax, res, station_data))

In [None]:
alt.Chart(station_data.loc[station_data.Type=='observed']).mark_bar().encode(
    x=alt.X('key:N', sort=None),
    y='value:Q',
    color=alt.Color('key:N', sort=None),
    column='Sample',
).transform_fold(
    ['Concentration_observed', 'Concentration_predicted', 'Concentration_interpolated']
).resolve_scale(y='independent')

In [None]:
## Calculate the performance of the prediction against the seen trainings data
performance(station_data.set_index('Sample').loc[~station_data.set_index('Sample')[f'{target}_observed'].isna(), f'{target}_observed'],
            station_data.set_index('Sample').loc[~station_data.set_index('Sample')[f'{target}_observed'].isna(), f'{target}_interpolated'])

In [None]:
## Run perforance test for "MassConcentration", regressed from "Concentration"
performance(station_data.set_index('Sample').loc[~station_data.set_index('Sample')[f'MassConcentration_observed'].isna(), f'MassConcentration_observed'],
            station_data.set_index('Sample').loc[~station_data.set_index('Sample')[f'MassConcentration_observed'].isna(), f'MassConcentration_interpolated'])

## Display maps

In [None]:
# bounds=(xmin, ymin, xmax, ymax)   # Coordinate system: (left, bottom, right, top)
# hv.Image(target_clipped, bounds=bounds).opts(cmap='RdYlBu_r', cnorm='log', clim=(40,40000), width=int(target_values.shape[1]/5), height=int(target_values.shape[0]/5))#, colorbar=True)

In [None]:
# bounds=(xmin, ymin, xmax, ymax)   # Coordinate system: (left, bottom, right, top)
# hv.Image(MassConc_clipped, bounds=bounds).opts(cmap='RdYlBu_r', cnorm='log', clim=(40,40000), width=int(target_values.shape[1]/5), height=int(target_values.shape[0]/5))#, colorbar=True)

In [None]:
# bounds=(xmin, ymin, xmax, ymax)   # Coordinate system: (left, bottom, right, top)
# hv.Image(sDBD_clipped, bounds=bounds).opts(cmap='RdYlBu_r', cnorm='log', clim=(40,40000), width=int(target_values.shape[1]/5), height=int(target_values.shape[0]/5))#, colorbar=True)

In [None]:
plt.imshow(target_clipped, cmap='terrain', interpolation='nearest')
plt.show()

In [None]:
##Saving raster to tiff file and reading it back in as rasterio dataset
f = f'../data/exports/models/predictions/{savestamp}_raster.tif'
transform = from_origin(xmin, ymax, Config.interpolation_resolution, Config.interpolation_resolution)
new_dataset = rio.open(f, 'w', driver='GTiff',
                            height = target_values.shape[0], width = target_values.shape[1],
                            count=1, dtype=str(target_values.dtype),
                            crs=Config.baw_epsg,
                            transform=transform)
new_dataset.write(target_clipped, 1)
new_dataset.close()
with rio.open(f) as rasta:
    out_img, out_transform = mask(rasta, poly.geometry[0].geoms, crop=True)
    show(out_img, cmap='terrain')