In [None]:
%reload_ext autoreload
%autoreload 2

import numpy as np
from scipy.interpolate import griddata
import geopandas as gpd
import rasterio as rio
from rasterio.transform import from_origin
from rasterio.plot import show, show_hist
from rasterio.mask import mask
import json
import pandas as pd
import hvplot.pandas  # noqa
import holoviews as hv
hv.extension('bokeh')
import altair as alt
alt.data_transformers.disable_max_rows()
from matplotlib import pyplot as plt
from pathlib import Path

try:  # if on phy-server local modules will not be found if their directory is not added to PATH
    import sys
    sys.path.append("/silod7/lenz/MPSchleiSediments/analysis/")
    import os
    os.chdir("/silod7/lenz/MPSchleiSediments/analysis/")
except Exception:
    pass

from settings import Config
from prepare_data import patsy_transform
from cv import performance

In [None]:
# create geodataframe from geojson file
poly = gpd.read_file('../data/SchleiCoastline_from_OSM.geojson')
# poly.plot()

In [None]:
## Read predicted data from model run
# savestamp = '20230403_233901'
savestamp = '20230501_172522'
f = [c for c in Path('../data/exports/models/predictions').glob(f'{savestamp}*.csv')][0]
target = f.name.split('_')[-2]
station_data = pd.read_csv(f)

if target == 'Concentration':
    for k, v in Config.massConc_from_numConc.items():
        station_data[k] = patsy_transform(v, station_data)  # OBS: MassConc is in µg/kg. Divide by 1e9 to get MassConc in kg MP per kg dry sediment!

station_data = gpd.GeoDataFrame(station_data, geometry=gpd.points_from_xy(station_data.LON, station_data.LAT), crs='EPSG:4326')
# station_data.head(30)

## old mehod
# station_data = gpd.GeoDataFrame(sdd_iow, geometry=gpd.points_from_xy(sdd_iow['LON'], sdd_iow['LAT'], crs='EPSG:4326')).to_crs("EPSG:3857")

In [None]:
## Run this cell to exclude outlier samples S05 and S32

# station_data.loc[station_data.Sample=='S05', 'Concentration_observed'] = np.nan
# station_data.loc[station_data.Sample=='S32', 'Concentration_observed'] = np.nan

# station_data.loc[station_data.Sample=='S05', 'MassConcentration_observed'] = np.nan
# station_data.loc[station_data.Sample=='S32', 'MassConcentration_observed'] = np.nan

In [None]:
## Calculate the performance of the prediction against the seen trainings data
performance(station_data.set_index('Sample').loc[~station_data.set_index('Sample')[f'{target}_observed'].isna(), f'{target}_observed'],
            station_data.set_index('Sample').loc[~station_data.set_index('Sample')[f'{target}_observed'].isna(), f'{target}_predicted'])

In [None]:
## Run perforance test for "MassConcentration", regressed from "Concentration"
performance(station_data.set_index('Sample').loc[~station_data.set_index('Sample')[f'MassConcentration_observed'].isna(), f'MassConcentration_observed'],
            station_data.set_index('Sample').loc[~station_data.set_index('Sample')[f'MassConcentration_observed'].isna(), f'MassConcentration'])

In [None]:
station_data.to_crs(Config.baw_epsg, inplace=True)
poly.to_crs(Config.baw_epsg, inplace=True)
poly_as_str = [json.loads(poly.to_json())['features'][0]['geometry']]

In [None]:
def grid_interp(data, name, xgrid, ygrid):
    '''
    Interpolates point data from geopandas geoseries to a numpy 2D-array of regularly spaced grid points.
    '''

    points = np.vstack((data.geometry.x, data.geometry.y)).T
    values = griddata(
        points, data[name],
        (xgrid, ygrid),
        method=Config.interpolation_method,  # 'linear' and 'cubic' will result in nan outside of the convex hull of data points
    )
    nan_mask = np.isnan(values)  # if there are any nan points re-interpolate them using method 'nearest'

    if np.any(nan_mask):
        values2 = griddata(
            points, data[name],
            (xgrid, ygrid), method='nearest',
        )
        values[nan_mask] = values2[nan_mask]
    return values

In [None]:
def grid_clip(values, poly, xgrid, ygrid):
    '''
    Clips raster layers (ndarray), by converting into geodataframe, using clip and extracting the 
    '''
    grid_gdf = gpd.GeoDataFrame({'vals': values.ravel()}, 
                                geometry=gpd.points_from_xy(xgrid.ravel(), ygrid.ravel()),
                                crs=Config.baw_epsg,
                                )
    clipper = grid_gdf.clip(poly)
    ## old method:
    # clipper = gpd.overlay(grid_gdf, poly, how='intersection')  # takes about 15 min
    # clipper = clipper.loc[grid_gdf.intersects(poly.geometry[0])]  # takes about 11 min

    grid_gdf.loc[~grid_gdf.index.isin(clipper.index), 'vals'] = np.nan
    return grid_gdf['vals'].values.reshape(values.shape)

In [None]:
xres = yres = Config.interpolation_resolution
xmin, ymin, xmax, ymax = poly.total_bounds
xgrid, ygrid = np.meshgrid(np.arange(xmin, xmax + xres, xres), 
                           np.arange(ymin, ymax + yres, yres),
                          )

# target_values = grid_interp(station_data, target, xgrid, ygrid)
target_values = grid_interp(station_data, 'MassConcentration', xgrid, ygrid)
sedDBD_values = grid_interp(station_data, 'SedDryBulkDensity', xgrid, ygrid)

target_clipped = grid_clip(target_values, poly, xgrid, ygrid)
sDBD_clipped = grid_clip(sedDBD_values, poly, xgrid, ygrid)

In [None]:
# # src_filename = '/home/rob/ownCloud/microSCHLEI/Sediment_K/predictions/whitebox_IDWinterp_MinNum2_weight1_radius200.tif'
# # src_filename = '/home/rob/ownCloud/microSCHLEI/Sediment_K/predictions/final_interpolations/MP_conc_woproxy_idwwb_interpolated_clipped.tif'
# src_filename = '/home/rob/ownCloud/microSCHLEI/Sediment_K/predictions/final_interpolations/interpolated_sibson_clipped.tif'
# with rio.open(src_filename, "r") as src:
#        target_clipped = src.read(1)
# target_clipped[target_clipped==src.nodata] = np.nan
# src.meta

In [None]:
# clipped.plot(column=target, cmap='OrRd', edgecolor="none", antialiased=False)
# alt.Chart(clipped.assign(X = clipped.geometry.x, Y = clipped.geometry.y)).mark_square(size=100).encode(
#     x='X',
#     y='Y',
#     color=target
# ).interactive()

In [None]:
cell_area = xres * yres  # grid cell are in m² from cell width * cell height in m
cell_sedVol = cell_area * Config.sediment_layer_depth  # volume of sediment layer considered in m³
sedMass_grid = np.nan_to_num(sDBD_clipped) * cell_sedVol  # mass of sediment in each cell, calculated from (interpolated) sediment dry bulk density (kg m⁻³) * volume of sediment per cell (m³)
abundance_grid = np.nan_to_num(target_clipped) * sedMass_grid  # grid of target amounts (MP particles if target==Conentration; MP mass if target==MassConcentration)
total = abundance_grid.sum()

In [None]:
print(f'Total MP in upper {Config.sediment_layer_depth*100} cm of Schlei sediments: {np.round(total / 1e12, 1)} Trillion')
print(f'MP per m² and cm sediment depth: {round(total / poly.area[0] / (Config.sediment_layer_depth * 100))}')

In [None]:
total / sedMass_grid.sum()

In [None]:
np.nan_to_num(target_clipped).mean()

In [None]:
# hv.help(hv.Image)

In [None]:
bounds=(xmin, ymin, xmax, ymax)   # Coordinate system: (left, bottom, right, top)
hv.Image(target_clipped, bounds=bounds).opts(cmap='RdYlBu_r', cnorm='log', clim=(40,40000), width=int(target_values.shape[1]/5), height=int(target_values.shape[0]/5), invert_yaxis=True)#, colorbar=True)

In [None]:
plt.imshow(sDBD_clipped, origin='lower', interpolation='nearest')
plt.show()

In [None]:
f = f'../data/exports/models/predictions/{savestamp}_raster.tif'

transform = from_origin(xmin, ymax, Config.interpolation_resolution, Config.interpolation_resolution)

new_dataset = rio.open(f, 'w', driver='GTiff',
                            height = target_values.shape[0], width = target_values.shape[1],
                            count=1, dtype=str(target_values.dtype),
                            crs=Config.baw_epsg,
                            transform=transform)
new_dataset.write(target_values, 1)
new_dataset.close()
rasta = rio.open(f)

In [None]:
out_img, out_transform = mask(rasta, poly_as_str, crop=True)

In [None]:
show((out_img, 1), cmap='terrain')