## Plot squared Pearson R$^2$ and Nash-Sutcliffe efficiency NSE


Both measures (R2) and NSE are indications of how well the observed data fits the simulated one. 

$$NSE= 1-  \frac{\sum \limits_{i=1}^{n} (Y^{obs}_i -Y^{sim}_i )^2}{\sum \limits_{i=1}^{n}(Y^{obs}_i -Y^{mean}_i )^2}  $$

In [2]:
#Import 
%matplotlib notebook
import pandas as pd
import matplotlib.colors as colors
from matplotlib_scalebar.scalebar import ScaleBar
import contextily as cx
from mpl_toolkits.axes_grid1 import make_axes_locatable
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
import geopandas as gpd
from functions import mapplot
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from shapely.geometry import Point, MultiPolygon, Polygon, GeometryCollection,mapping
from shapely import ops
from joypy import joyplot
import rasterio
from rasterio.plot import show
import warnings
warnings.simplefilter(action='ignore', category=Warning)
from scipy import stats
import rasterio.mask
import pcraster
from matplotlib.colors import LinearSegmentedColormap
pathfig="D:/Erasmus/Thesis/Figures/"

In [27]:
pathshp="D:/Erasmus/Thesis/data/SHP/SHP2/"
pathpick="D:/Erasmus/Thesis/data/Pickle/"
pathg='D:/Erasmus/Thesis/Code/08072022/GW_forecasting_ML/'
pathfig="D:/Erasmus/Thesis/Figures/"
pathdat="D:/Erasmus/Thesis/data/"

In [3]:

#Load error files
folder = Path(pathg+"Results_seq12/")
df = pd.concat([
    pd.read_csv(fname)
    for fname in folder.glob("summary_CNN_*.txt")
])

In [4]:
#Load error files
folder2 = Path(pathg+"wihtoutRH/")
df_norh = pd.concat([
    pd.read_csv(fname)
    for fname in folder2.glob("summary_CNN_*.txt")
])

In [5]:
len(df), len(df_norh)

(505, 505)

In [6]:
df.loc[df.NSE<0, 'NSE'] = 0
df_norh.loc[df_norh.NSE<0, 'NSE'] = 0

In [7]:
df.r2.describe()
#df.NSE.describe()

count    505.000000
mean       0.681291
std        0.153057
min        0.001982
25%        0.608454
50%        0.711304
75%        0.791740
max        0.910183
Name: r2, dtype: float64

In [8]:
df['NSE_RH']=df['NSE']
df['r2_RH']=df['r2']
dfcomb=pd.concat([df.reset_index().NSE_RH,df.reset_index().r2_RH, df_norh.reset_index().NSE, 
                  df_norh.reset_index().r2], axis=1)

In [9]:
fig, (ax1, ax2) = plt.subplots(1, 2,figsize=(8,4), sharey=True)
sns.set_theme(style="dark", palette="deep")
#sns.histplot(data=df.NSE[:len(df_norh)],kde=True)
sp= sns.histplot(data=dfcomb[['NSE','NSE_RH']][:],kde=True,binwidth=0.1, ax=ax1)

ax1.set_xlabel('NSE')
ax1.set_ylabel(r'Number of wells')
legendax1 = ax1.get_legend()
handlesax1 = legendax1.legendHandles
legendax1.remove()
ax1.legend(handlesax1, ['P, T', 'P, T, RH'],fontsize=3,prop={'size': 8}, loc=2)

sp2=sns.histplot(data=dfcomb[['r2','r2_RH']][:],kde=True,binwidth=0.1, ax=ax2)
ax2.set_xlabel('r$^2$')
ax2.set_ylabel(r'Number of wells')
legendax2 = ax2.get_legend( )
handlesax2 = legendax2.legendHandles
legendax2.remove()
ax2.legend(handlesax2,['P, T', 'P, T, RH'],prop={'size': 8}, loc=2)
plt.tight_layout() 
plt.savefig(pathfig+"histcomp.pdf")

<IPython.core.display.Javascript object>

In [10]:
sns.set_theme(style="darkgrid")
fig, ax = plt.subplots(figsize=(8,3))
colors = ['#EDC1A8', '#EDC1A8', '#A5B8D7','#A5B8D7']
ax = sns.violinplot(data=dfcomb[:], palette=colors, inner=None, linewidth=0.5)
sns.boxplot(data=dfcomb[:], saturation=0.5, width=0.03,
            color='#A8A8A8', boxprops={'zorder': 2}, linewidth=0.5, ax=ax)
for violin, alpha in zip(ax.collections[::], [0.6,1,0.6,1]):
    violin.set_alpha(alpha)
ax.set_xticklabels(["NSE (+RH)","r$^2$ (+RH)","NSE","r$^2$"])
plt.tight_layout()
plt.savefig(pathfig+"violin.pdf")

<IPython.core.display.Javascript object>

## Geospatial analyses

This section is to check the spatial relations between the model performance results and the spatial features such as landcover,  and distance to rivers

In [12]:
#this lines takes a while for running due to the size of the landuse shapefile

#Load shapefiles
gw_sel=gpd.read_file(pathshp+"GWF2.shp")

#LS GIS information OSM
waterbodies=gpd.read_file(pathshp+"waterbodiesND.shp")
waterways=gpd.read_file(pathshp+"waterwaysND.shp")

#Administrative boundaries
germany_states = gpd.read_file(pathshp+"DEU_adm1.shp")
ND=germany_states[germany_states.NAME_1== "Niedersachsen"]

In [13]:
#BGR hydrogeology map 1:250k clipped for Lower Saxony
hydrogeologischeND=gpd.read_file(pathshp+"hydrogeologischeND.shp")

#LS GIS information OSM
citiesND=gpd.read_file(pathshp+"citiesND.shp")
landuse=gpd.read_file(pathshp+"LS/gis_osm_landuse_a_free_1.shp")
places=gpd.read_file(pathshp+"LS/gis_osm_places_a_free_1.shp")

First, the model performance should be associated with the shapefile information

In [14]:
#Create a column to store the accuracy of the models- this is created in the shapefile that has the wells ID
gw_sel[['NSE','r2','NSE_2','r2_2']]=np.nan
for i in range(len(df)):
    index=gw_sel.loc[gw_sel.MEST_ID==df.wellid.values[i]].index[0]
    gw_sel.at[index, 'NSE'] = df.NSE.values[i]
    gw_sel.at[index, 'r2'] = df.r2.values[i]
    if i < len(df_norh):
        gw_sel.at[index, 'NSE_2'] = df_norh.NSE.values[i]
        gw_sel.at[index, 'r2_2'] = df_norh.r2.values[i]
        
    

In [15]:
c=gw_sel.NSE.copy()
c.dropna(inplace=True)
c.describe()

count    505.000000
mean       0.503161
std        0.225006
min        0.000000
25%        0.373459
50%        0.556643
75%        0.678329
max        0.887068
Name: NSE, dtype: float64

In [16]:
gw_sel['diffr2']=gw_sel['r2']-gw_sel['r2_2']
gw_sel['diffr2']=gw_sel['diffr2'].abs()
gw_sel['diffNSE']=gw_sel['NSE']-gw_sel['NSE_2']
gw_sel['diffNSE']=gw_sel['diffNSE'].abs()

### Plot NSE and R2

In [17]:
#Project shapefiles for mapping
proj_coor=4647
gw_sel=gw_sel.to_crs(epsg=proj_coor)
waterbodies=waterbodies.to_crs(epsg=proj_coor)
waterways=waterways.to_crs(epsg=proj_coor)

hydrogeo=hydrogeologischeND.to_crs(epsg=proj_coor)
cities=citiesND.to_crs(epsg=proj_coor)
land_use=landuse.to_crs(epsg=proj_coor)
germany_states=germany_states.to_crs(epsg=proj_coor)
ND=ND.to_crs(epsg=proj_coor)


In [18]:
def mapplots(column,cmap,bound,axs,gws=gw_sel):
    gw=gw_sel.plot(ax=axs,figsize=(10, 10),column=column, markersize=15,
               marker="v", facecolor=None,cmap=cmap, zorder=3, edgecolor = '#d4d4d4',
            linewidth = 0.2, alpha=1)
    wb=waterbodies.plot( ax=axs, alpha=0.8, color='b', linewidth=0.8, zorder=1)
    ww=waterways.plot( ax=axs, alpha=0.3, color='b', linewidth=.5,zorder=2)
    #gdff=gdf.plot( ax=gw, alpha=0.5, color='r',markersize=12,zorder=2)
    #cit=cities.plot( ax=gw, alpha=0.5, color='r',markersize=40,zorder=2)
    NS=ND.boundary.plot( ax=axs, alpha=0.3, edgecolor='k', linewidth=1, zorder=1)

    #Colorbar 
    #sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=gw_sel[column].min(), 
    #                                                                      vmax=gw_sel[column].describe()[6]))
    #sm = plt.cm.ScalarMappable(cmap=cmap, norm=colors.TwoSlopeNorm(vmin=0, vcenter=0.6, vmax=1))
    sm = plt.cm.ScalarMappable(cmap=cmap)
    #sm = plt.cm.ScalarMappable(cmap="coolwarm_r", norm=plt.Normalize(vmin=gw_sel[column].min(), 
    #                                                                       vmax=gw_sel[column].max()))

    fig=gw.get_figure()
    divider = make_axes_locatable(axs)
    cax = divider.append_axes('bottom', size='4%', pad=0.5)
    cbar = fig.colorbar(sm,orientation="horizontal",fraction=0.001,cax=cax)
    if column =='r2' or column == 'r2_2':
        cbar.ax.set_xlabel('r$^2$')
    elif column == 'NSE_2':
        cbar.ax.set_xlabel('NSE')
    elif column == 'diffr2':
        cbar.ax.set_xlabel('$\Delta$r$^2$')
    elif column == 'diffNSE':
        cbar.ax.set_xlabel('$\Delta$NSE')
    else:
        cbar.ax.set_xlabel(column)


    #for x, y, label in zip(gw_sel.geometry.x[:len(df)], gw_sel.geometry.y[:len(df)], gw_sel.MEST_ID[:len(df)]):
    #    gw.annotate(label, xy=(x, y), xytext=(3, 3), textcoords="offset points", fontsize=5)



    #scalebar
    scalebar = ScaleBar(0.5, "m", dimension="si-length", length_fraction=0.10, location="lower left")
    gw.add_artist(scalebar)
    gw.tick_params(axis='y', which='major', labelsize=8, rotation=90)
    gw.tick_params(axis='x', which='major', labelsize=8, rotation=0)
    startx, endx = gw.get_xlim()
    starty, endy = gw.get_ylim()

    #North arrow
    arrx=endx- endx*0.002
    arry=endy-endy*0.0040
    gw.text(x=arrx-arrx*0.0001, y=arry, s='N', fontsize=16,alpha=0.8)
    gw.arrow(arrx, arry-arry*0.002, 0, 10000, length_includes_head=True,
              head_width=8000, head_length=20000, overhang=.2, ec="k",facecolor='k', alpha=0.4)

    #Basemap
    #cx.providers.OpenStreetMap.Mapnik
    cx.add_basemap(ax=NS,  crs=ND.crs.to_string(), source=cx.providers.OpenStreetMap.Mapnik,alpha=0.5,zoom=10,attribution=False,zorder=0)

    return axs

In [22]:
sns.set_theme(style="ticks")
bound=germany_states.to_crs(gw_sel.crs.to_string()) 
#cmap="coolwarm_r"
#cmap='plasma'

cmap = LinearSegmentedColormap.from_list( 'mycmap', [(0, '#d35555'),(0.6, '#d8dadc'), (1, '#009ad8')])
cmap2 = LinearSegmentedColormap.from_list( 'mycmap', [(0, '#d35555'),(0.5, '#d8dadc'), (1, '#009ad8')])
#cmap='viridis_r'
#cmap2='viridis_r'

fig, (gw, gw2) = plt.subplots(ncols=2, figsize=(13, 8))
gw=mapplots(column='r2_2',cmap=cmap,bound=bound,axs=gw, gws=gw_sel)
gw2=mapplots(column='NSE_2',cmap=cmap2,bound=bound,axs=gw2, gws=gw_sel)
plt.tight_layout()    
#plt.savefig(pathfig+"diff2.pdf")
plt.savefig(pathfig+"PT.jpg",bbox_inches="tight",dpi=200)

<IPython.core.display.Javascript object>

## Geospatial relations

### Distribution of wells according to the hydrogeology

Information in the hydrogeological map

HE_B Hydrogeologic unit, abbreviation 01K (1-4)A \
HE_B_BEZ Hydrogeologic unit, designation Quaternary (unclassified) \
GA Rock type, abbreviation S \
__GA_bez__ Rock type, designation Sedimentary \
VF consolidation, abbreviation L \
__VF_bez__ consolidation, designation unconsolidated rock -- Lockergestein (loose rock), Festgestein (hard rock) \
HA Cavity type, abbreviation P \
__HA_bez__ Cavity type, designation pores \
GC Geochemical rock type, abbreviation s \
__GC_bez__ Geochemical rock type, designation siliceous \
KF Permeability (m/s), abbreviation 11 \
__KF_bez__ Permeability (m/s), designation highly variable \

LChar Conductor character, abbreviation GWG/GWL \
__LChar_bez__ Conductor character, designation groundwater conductor/groundwater conductor \
__Litho__ Lithology Gravel, sand, silt \
Strat1 Stratigraphy (General) Quaternary \
Start2 Stratigraphy (Detail) Quaternary 

In [23]:
#Intersect map with the well locations
hgint=gpd.overlay(gw_sel, hydrogeo, how='intersection')

All wells are located in sedimentary rock-type

In [24]:
dic_VF={'Lockergestein':'Loose rock',
'Festgestein': 'Hard rock' }
hgint['VF_bez_dic']=hgint['VF_bez'].map(dic_VF)

dic_HA={'Poren':'Porous',
'Kluft': 'Fracture','Kluft/Karst':'mixed','Kluft/Poren':'mixed'}
hgint['HA_bez_dic']=hgint['HA_bez'].map(dic_HA)

#Permeability (m/S)
dict_kf={3:'Medium (>10$^{-4}$ - 10$^{-3}$)',
9: 'Medium to moderate  (>10$^{-5}$ - 10$^{-3}$)' , 11:'Highly variable', 5:'other', 4:'other', 10:'other'}
hgint['kf_dic']=hgint['kf'].map(dict_kf)

dic_litho={'Sand, Kies':'Sand, gravel',
'Sand, Feinsand, Schluff, untergeordnet Kies': 'Sand, fine sand, silt', 'Kalkstein, Mergelstein':'Other',
           'Tonstein, Sandstein, Dolomitstein, Kohle':'Other'}
hgint['Litho_dic']=hgint['Litho'].map(dic_litho)


In [24]:
column=['VF_bez_dic','HA_bez_dic','kf_dic','LChar_bez','Litho_dic']
#column=['HA_bez_dic']
for cl in column:
    hgintcl=pd.DataFrame(hgint[cl].value_counts())
    order=hgintcl.index
    
    #my_circle = plt.Circle( (0,0), 0.3, color='white')
    palette = sns.color_palette(None, len(hgintcl))
    plt.figure(figsize=(5,3))
    explode=np.zeros(len(hgintcl))
    if cl== 'HA_bez_dic':
        explode = (0, 0.2, 0.2)
    
    
    piep=plt.pie(hgintcl[cl].values,explode=explode, labels=hgintcl[cl].index, colors=palette,wedgeprops={'alpha':0.6},textprops={'size': 10})
    #p = plt.gcf()
    #p.gca().add_artist(my_circle)
    plt.tight_layout()   

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [25]:
column=['HA_bez_dic','Litho_dic']
#column=['HA_bez_dic']

fig, axs = plt.subplots(1, 2, figsize=(8,3))
c=0
for cl in column:
    hgintcl=pd.DataFrame(hgint[cl].value_counts())
    order=hgintcl.index
    
    palette = sns.color_palette("tab10")
    explode=np.zeros(len(hgintcl))
    if cl== 'HA_bez_dic':
        explode = (0, 0.2, 0.2)
    else:
        explode = (0, 0, 0.2)
    
    piep=axs[c].pie(hgintcl[cl].values,explode=explode, 
                    labels=hgintcl[cl].index, colors=palette,
                    wedgeprops={'alpha':.4},textprops={'size': 10})

    c+=1
axs[0].annotate("(a)", xy=(0.5, - 0.05), xycoords="axes fraction")
axs[1].annotate("(b)", xy=(0.5, -0.05), xycoords="axes fraction")
plt.tight_layout()  
#plt.savefig(pathfig+"hydrogeoprop.png",dpi=300)
plt.savefig(pathfig+"hydrogeoprop.pdf",bbox_inches="tight")

<IPython.core.display.Javascript object>

In [36]:
column=['VF_bez','HA_bez','kf','LChar_bez','Litho']
for cl in column:
    hgintcl=pd.DataFrame(hgint[cl].value_counts())
    order=hgintcl.index
    fig, ax = plt.subplots(figsize=(8,5))
    cp=sns.swarmplot(ax=ax,x=cl, y="NSE",order=order, data=hgint)
    cp.set_xticklabels(order, rotation=55)
    ax.set_xlabel(cl)
    ax.set_ylabel('NSE')
    plt.tight_layout() 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## 1. Depth to groundwater 

In [28]:
#Load additional information of GWL observations
root=pathdat+'/Grundwasserstandsdaten/'
basisdaten=pd.read_csv(root+'PROJEKT_BASISDATEN.txt', sep=';')
#monatswert=pd.read_csv(root+'PROJEKT_MONATSWERTE.txt', sep=';')

basisdatenc=basisdaten.set_index(basisdaten.MEST_ID)
gw_selc= gw_sel.set_index(gw_sel.MEST_ID)
#Join gw_sel with the additional information contained in basisdaten file
gw_seladd=gw_selc.join(basisdatenc, lsuffix='', rsuffix='_2') 

FileNotFoundError: [Errno 2] No such file or directory: 'D:/Erasmus/Thesis/data//Grundwasserstandsdaten/PROJEKT_BASISDATEN.txt'

In [38]:
gw_seladd['FUK2']=gw_seladd['FUK'].apply(lambda x: x.replace(',','.')).astype(float)
gw_seladd['FOK2']=gw_seladd['FOK'].apply(lambda x: x.replace(',','.')).astype(float)
gw_seladd['GOK_NN2']=gw_seladd['GOK_NN'].apply(lambda x: x.replace(',','.')).astype(float)
gw_seladd['MBP_NN2']=gw_seladd['MBP_NN'].apply(lambda x: x.replace(',','.')).astype(float)
gw_seladd['MBP-GOK']=gw_seladd['MBP_NN2'] - gw_seladd['GOK_NN2']

In [54]:
plt.figure()
sns.set_theme()
#plt.histplot(np.arange(len(gw_seladd)),gw_seladd['FOK2'].sort_values(), '.-')
sns.histplot(data=gw_seladd['FUK2'].sort_values(), kde=True)
#plt.hist(gw_seladd['FUK2'].sort_values(), color='#3097b6', lw=1)
plt.ylabel('Number of wells')
plt.xlabel('Filter depth (m below ground level)')
plt.grid(True, alpha=0.3)
plt.savefig(pathfig+"filterdepth.png",dpi=300)

<IPython.core.display.Javascript object>

In [62]:
plt.figure()
sns.set_theme()
#plt.histplot(np.arange(len(gw_seladd)),gw_seladd['FOK2'].sort_values(), '.-')
sns.histplot(data=gw_seladd['GOK_NN2'].sort_values(), kde=True, color='#268e99')
#plt.hist(gw_seladd['FUK2'].sort_values(), color='#3097b6', lw=1)
plt.ylabel('Number of wells')
plt.xlabel('m above sea level')
plt.grid(True, alpha=0.3)
plt.savefig(pathfig+"groundlevel.png",dpi=300)

<IPython.core.display.Javascript object>

In [217]:
sns.set_theme(style="white")
fig, ax = plt.subplots(figsize=(8,5))
dfaux=gw_seladd.sort_values(by="FOK2")[:]
slope, intercept, r_value, p_value, std_err = stats.linregress(dfaux['FOK2'],
                                                               dfaux["r2"])
print(r_value)
cp=sns.regplot(ax=ax,x="FOK2", y="r2", data=dfaux[:], fit_reg=True, order=1,
 color='darkblue',scatter_kws={"s": 10,"color": "c"})
#cp=sns.regplot(ax=ax,x="GOK_NN2", y="r2", data=gw_seladd.sort_values(by="GOK_NN2"), fit_reg=True, order=2,
#               ci=False, color='darkblue',scatter_kws={"s": 10,"color": "c"})
ax.grid(alpha=0.5)

#plt.savefig('D:/Data/students/mariana/Documents/Figures/Spatial_relations/FOK_r2_2.png',bbox_inches='tight')

<IPython.core.display.Javascript object>

-0.013301313827987332


In [63]:
# Add time series to the main dataframe
gw_sel['GOK_NN']=gw_seladd['GOK_NN2'].values
gw_sel['FOK']=gw_seladd['FOK2'].values
gw_sel['FUK']=gw_seladd['FUK2'].values

## 2. Intersection with the OSM data 

The OpenStreetMap data for the lower Saxony can be obtained in the following link: https://download.geofabrik.de/europe/germany/niedersachsen.html

In [64]:
r=gpd.overlay(gw_sel, land_use, how='intersection')

In [65]:
cl=pd.DataFrame(r.fclass.value_counts())
cl['names']=cl.index
cl.names.replace('nature_reserve','Reserve', inplace=True)
order=cl.names[:]

In [67]:
#fig, ax = plt.subplots(figsize=(8,5))
#cp=sns.swarmplot(ax=ax,x="fclass", y="r2",order=order, data=rdrop)
#cp.set_xticklabels(order, rotation=55)
#ax.set_xlabel('Landuse')
#ax.set_ylabel('r$^2$')
#plt.tight_layout() 

In [70]:
rdropsel=r.loc[r['fclass'].isin(['forest','farmland','residential','meadow','grass','farmyard'])]

In [71]:
cl='fclass'
rdropselcl=pd.DataFrame(rdropsel[cl].value_counts())
order=rdropselcl.index

In [72]:

my_circle = plt.Circle( (0,0), 0.3, color='white')
palette = sns.color_palette(None, len(rdropselcl))
plt.figure(figsize=(5,4))
explode=np.zeros(len(rdropselcl))
piep=plt.pie(rdropselcl[cl].values,explode=explode, labels=rdropselcl[cl].index, colors=palette,wedgeprops={'alpha':0.6},textprops={'size': 10})
p = plt.gcf()
p.gca().add_artist(my_circle)
plt.tight_layout()  

<IPython.core.display.Javascript object>

In [60]:
rdropsel2=rdropsel.rename(columns={"r2": "r$^2$"})
joyplot(
    data=rdropsel2[['NSE',"r$^2$", 'fclass']], 
    by='fclass',
    color=[ '#eb4d4b','#686de0'],
    lw=0.1,
    alpha=0.5,
    legend=True,
    figsize=(8, 6),
)

<IPython.core.display.Javascript object>

(<Figure size 800x600 with 7 Axes>,
 [<AxesSubplot:>,
  <AxesSubplot:>,
  <AxesSubplot:>,
  <AxesSubplot:>,
  <AxesSubplot:>,
  <AxesSubplot:>,
  <AxesSubplot:>])

## 2.1. Waterworks OSM

In [73]:
#Load the corine land cover 2008 
waterworks_points=gpd.read_file("D:/Data/students/mariana/data/SHP/waterworks/wasserwerk_point.shp")
waterworks_lines=gpd.read_file("D:/Data/students/mariana/data/SHP/waterworks/wasserwerk_line.shp")
waterworks_polys=gpd.read_file("D:/Data/students/mariana/data/SHP/waterworks/wasserwerk_pol.shp")
waterworks_point=waterworks_points.to_crs(epsg=proj_coor)
waterworks_line=waterworks_lines.to_crs(epsg=proj_coor)
waterworks_poly=waterworks_polys.to_crs(epsg=proj_coor)

In [74]:
#points=Point(coastlines.geometry[0].coords[0])
l=[]
for ww in waterworks_point.geometry:
    for i in ww :
        l.append(Point(i))
        
for wwl in waterworks_lines.geometry:
    for i in wwl.coords :
        l.append(Point(i))
        
for wwp in waterworks_polys.geometry:
    map_pol=mapping(wwp)['coordinates']
    for polcoor in map_pol :
        [l.append(Point(coords[1],coords[0])) for coords in polcoor]

        
gdfww=gpd.GeoDataFrame(index=np.arange(len(l)), crs='epsg:'+str(proj_coor), geometry=l)

In [75]:
gw_selc=gw_sel.copy()
gdfs = gpd.GeoDataFrame(geometry=gw_selc.geometry).to_crs("EPSG:"+str(proj_coor))
dmtca=gdfs.geometry.apply(lambda g: gdfww.distance(g))
vdist=[]
for i in range(len(dmtca)):
    vdist.append(min(dmtca[dmtca.columns][i:i+1].values[0])/1000)
gw_sel['dist_pww']=vdist  


In [84]:
sns.set_theme(style="white")
fig, ax = plt.subplots(figsize=(8,5))
data=gw_sel.sort_values(by="dist_pww")[:70]
cp=sns.regplot(ax=ax,x="dist_pww", y="r2", data=data, fit_reg=True)
ax.grid(alpha=0.5)

slope, intercept, r_value, p_value, std_err = stats.linregress(data["dist_pww"],
                                                                   data["r2_2"])
#print(stats.spearmanr(data["dist_pww"],data["r2_2"]))
print(r_value)

<IPython.core.display.Javascript object>

0.3886766640919675


## 3. Intersection with Corine-land cover data

Check the relation witht the CORINE Land Cover (CLC) inventory for 2019. 
Downloaded from https://land.copernicus.eu/pan-european/corine-land-cover

It consists of 44 classes described here https://land.copernicus.eu/user-corner/technical-library/corine-land-cover-nomenclature-guidelines/html

'112'--> __Discontinuous urban fabric (Artificial Surfaces )__\
'121'--> Industrial or commercial units (Artificial Surfaces )\
'124'--> Airports (Artificial Surfaces )\
'142'--> Sport and leisure facilities (Artificial Surfaces )\
'211'--> __Non-irrigated arable land (Agricultural areas )__\
'231'--> __Pastures (Agricultural areas )__\
'311'--> Broad-leaved forest (Forest and seminatural areas )\
'312'--> __Coniferous forest  (Forest and seminatural areas )__\
'313'--> Mixed forest (Forest and seminatural areas )\
'322'--> Moors and heathland  (Forest and seminatural areas )\
'412'--> Peatbogs (Wetlands)\
'512'--> Water bodies 

note: the bold categories are the most relevant 

In [100]:
#Load the corine land cover 2008 
clc=gpd.read_file("D:/Data/students/mariana/data/SHP/clc2018/clc2018_LSr.shp")
clcs=clc.to_crs(epsg=proj_coor)

In [101]:
#Intersect the layer and the wells ID
clcsint=gpd.overlay(gw_sel, clcs, how='intersection')

In [102]:
dic_cat={'112':'Discontinuous urban fabric',
'121': 'Industrial or commercial units ',
'124' : 'Airports',
'142': 'Sport and leisure facilities',
'211': 'Non-irrigated arable land' ,
'231': 'Pastures',
'311' : 'Broad-leaved forest', 
'312': 'Coniferous forest' , 
'313': 'Mixed forest' ,
'322': 'Moors and heathland' , 
'412': 'Peatbogs' ,
'512':' Water bodies' }

#Map dictionary into a new column to recognize which code corresponds to which definition
clcsint['name']=clcsint['Code_18'].map(dic_cat)
clcs['name']=clcs['Code_18'].map(dic_cat)

In [103]:
cl='name'
clcsintcl=pd.DataFrame(clcsint[cl].value_counts())
order=clcsintcl.index

In [104]:
Lcat=clcsintcl.loc[clcsintcl.name<8].index
ind=clcsint.loc[clcsint['name'].isin(Lcat)].name.index
clcsint.loc[ind, 'name'] = "others"

In [105]:
gw_sel.columns

Index(['MEST_ID', 'MS_LBEZ', 'MS_MSA_ID', 'UTM_X', 'UTM_Y', 'MS_EIG', 'MS_BTR',
       'MS_GOKNN', 'MS_FOK', 'MS_FUK', 'BGR_NSE_R2', 'BGR_REFERE',
       'KLIGL_GRUP', 'AUSWAHL_KL', 'AUSWERTUNG', 'CD_ID_1', 'CD_ID_2',
       'geometry', 'NSE', 'r2', 'NSE_2', 'r2_2', 'diffr2', 'diffNSE', 'GOK_NN',
       'FOK', 'FUK', 'dist_pww'],
      dtype='object')

In [137]:
cl='name'
clcsintcl=pd.DataFrame(clcsint[cl].value_counts())
order=clcsintcl.index
#palette = sns.color_palette(None, len(rdropselcl))
plt.figure(figsize=(7,4))
#my_circle = plt.Circle( (0,0), 0.3, color='white')
colors = ['chocolate','#66b3ff','#99ff99','#ffcc99','c','olive']
plt.pie(clcsintcl[cl].values, labels=clcsintcl[cl].index, colors=colors,wedgeprops={'alpha':0.6},textprops={'size': 10})
#p = plt.gcf()
#p.gca().add_artist(my_circle)
plt.tight_layout()
plt.savefig(pathfig+"clc.png",dpi=300)

<IPython.core.display.Javascript object>

In [232]:
col='Code_18'
clcclass=pd.DataFrame(clcsint[col].value_counts())
order=clcclass.index
fig, ax = plt.subplots(figsize=(8,5))
cp=sns.swarmplot(ax=ax,x=col, y="NSE",order=order, data=clcsint)
cp.set_xticklabels(order, rotation=55)
ax.set_xlabel(col)
ax.set_ylabel('NSE')
plt.tight_layout() 

<IPython.core.display.Javascript object>

In [233]:
col='name'
clcclass=pd.DataFrame(clcsint[col].value_counts())
order=clcclass.index
fig, ax = plt.subplots(figsize=(8,5))
cp=sns.swarmplot(ax=ax,x=col, y="NSE",order=order, data=clcsint)
cp.set_xticklabels(order, rotation=55)
ax.set_xlabel(col)
ax.set_ylabel('NSE')
plt.tight_layout() 

<IPython.core.display.Javascript object>

In [234]:
dic=clcclass.name.to_dict()
clcsint['name_sor']=clcsint.name.map(dic)
clcsint_sort=clcsint.sort_values(by='name_sor',ascending=False)
clcsint_sort1=clcsint_sort.loc[clcsint_sort.name!="others"]
clcsint_sort2=clcsint_sort1.rename(columns={"r2": "r$^2$"})

In [237]:
col='name'
joyplot(
    data=clcsint_sort2[['NSE',"r$^2$", col]], 
    by=col,
    color=[ '#eb4d4b','#686de0'],
    lw=0.1,
    alpha=0.5,
    legend=True,
    figsize=(8, 6)
)

<IPython.core.display.Javascript object>

(<Figure size 800x600 with 6 Axes>,
 [<AxesSubplot:>,
  <AxesSubplot:>,
  <AxesSubplot:>,
  <AxesSubplot:>,
  <AxesSubplot:>,
  <AxesSubplot:>])

### 3.1. Intersection with 4 km buffer
Inntersect buffer with the shape of corine land covers.

In [139]:
#Set the influence area to 1km 
clcsint['buffer']=clcsint.buffer(1000)

gw_sel['buff_1km']=gw_sel.buffer(1000)
gw_sel['buff_2km']=gw_sel.buffer(3000)
gw_sel['buff_3km']=gw_sel.buffer(3000)
gw_sel['buff_4km']=gw_sel.buffer(4000)
gw_sel['buff_5km']=gw_sel.buffer(3000)
#b=clcsint.buffer(1000)
#b.to_file('D:/Data/students/mariana/data/test.shp')  

In [140]:
#clcs is the shapefile of corine land cover
rectangles=clcs.geometry[:]
names_dic=clcs.name
wellid=clcsint.MEST_ID # To make sure the well id corresoinds with the buffer
names=clcs.Code_18
dfs, wids=[] , []

count=0
for c in clcsint['buffer'].geometry:
   
    vnames, vnamesdic, geotypes, geoms, parea=[] , [], [] ,[] ,[]

    for r, nm, nmdic in zip(rectangles,names,names_dic):
        # create intersection 
        inters=c.intersection(r)
        #Check geometry type
        geoms.append(inters)
        #Store the names of the land covers to associate with every geometry
        vnames.append(nm)
        vnamesdic.append(nmdic)
        geotypes.append(inters.geom_type)

    # create shapely GeometryCollection
    geom_collection = GeometryCollection(geoms)

    pols, vnams, vnamsdic=[], [], []
    for i,vn, vndic, gt in zip(geom_collection, vnames,vnamesdic, geotypes):
        if gt == 'Polygon':
            poly=Polygon(i)
        else:
            poly=MultiPolygon(i) 

        if not poly.is_empty:
            pols.append(poly)
            vnams.append(vn)
            vnamsdic.append(vndic)
            parea.append(poly.area/10E6) # add area in km2
    
    
    
    prop=parea/np.sum(parea)
    d = {'col1': vnams, 'names':vnamsdic ,'geometry': pols, 'area':parea, 'prop':prop}
    df = gpd.GeoDataFrame(d, crs="EPSG:4647") #(assign epsg)
    dfs.append(df)
    wids.append(wellid[count])
    count+=1
    #df.to_file('D:/Data/students/mariana/data/test3.shp')

In [141]:

#List of the code for the main categories of clc in the study area
lcod=['211','231','312','112','311']
catname=[ 'Non-irrigated arable land' , 'Pastures','Coniferous forest' , 
         'Discontinuous urban fabric', 'Broad-leaved forest']

for categ, name in zip(lcod,catname):
    li=[]
    for datf in dfs:
        pr=datf[datf.col1==categ].prop
        if not pr.empty:
            li.append(pr.values[0]*100)

        else:
            li.append(0.0)


    #gw_sel['prop_'+categ]=np.zeros(len(gw_sel))
    gw_selaux=gw_sel.loc[gw_sel.MEST_ID.isin(wids)]
    gw_selaux['prop_'+categ]=li
    gw_selaux1=gw_selaux.loc[gw_selaux['r2']>0]
    gw_selaux2=gw_selaux1.loc[gw_selaux1['prop_'+categ]>0]

    selcol=['MEST_ID']+list(gw_selaux.columns[-1:])
    gw_sel=gw_sel.merge(gw_selaux[selcol],how='left',left_on='MEST_ID', right_on='MEST_ID')
    
    
    sns.set_theme(style="white")
    fig, ax = plt.subplots(figsize=(8,5))
    cp=sns.regplot(ax=ax,x='prop_'+categ, y="r2", data=gw_selaux2.sort_values(by='prop_'+categ), fit_reg=True, order=1,
     color='darkblue',scatter_kws={"s": 10,"color": "c"})
    ax.set_xlabel('Coverage (%) '+name)
    #cp=sns.regplot(ax=ax,x="GOK_NN2", y="r2", data=gw_seladd.sort_values(by="GOK_NN2"), fit_reg=True, order=2,
    #               ci=False, color='darkblue',scatter_kws={"s": 10,"color": "c"})
    ax.grid(alpha=0.5)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## 4. Multi-Order Hydrologic Position

Max project-
J:\NUTZER\Noelscher.M\Studierende\Daten\multiorder_hydrologic_position\germany\time_invariant\raster\v013_1_1\data

There are three main products:\
SD: Distance to the stream \
LP: relative position between the stream and the catchment boundary \
DSD: distance to the stream plus distance to the catchment boundary

#### SD - Distance to stream

In [142]:
ni=2
ln=np.arange(2,5)

for nm in ['sd','dsd','lp']:
    fig, ax = plt.subplots(len(ln),1, figsize=(10,5),sharex=True)
    for i in ln:
        ho=rasterio.open("D:/Data/students/mariana/data/Raster/multiorder_hydroposition/mohp_custom_germany_"+nm+"_hydrologicorder"+str(i)+"_30m_4647.tif")

        cl="ho"+str(i)+"_30m_"+nm
        measure='r2'
        coord_list = [(x,y) for x,y in zip(gw_sel['geometry'].x , gw_sel['geometry'].y)]
        gw_sel[cl] = [x[0]/1000 for x in ho.sample(coord_list)]

        cp=sns.regplot(ax=ax[i-ni],x=cl, y=measure, data=gw_sel.sort_values(by=cl), fit_reg=False, label=cl[:-4],
                      scatter_kws={'s':8})
        cp.set(xlabel=None)
        if measure== 'r2':
            ax[i-2].set_ylabel("r$^2$")
        #ax[i-2].set_title(cl)
        ax[i-ni].legend(loc=1)
        ax[i-ni].grid(alpha=0.5)
        fig.supxlabel("Distance km",fontsize=12)
        fig.suptitle(nm, fontsize=16)
        plt.subplots_adjust(left=0.1,
                        bottom=0.1, 
                        right=0.9, 
                        top=0.9, 
                        wspace=0.4, 
                        hspace=0.2)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

### Anlyses per catchment

In [242]:
#Catchments
catchmens=gpd.read_file("D:/Data/students/mariana/data/SHP/catchment.shp")
catchment=catchmens.to_crs(epsg=proj_coor)
gw_selcat=gpd.overlay(gw_sel, catchment, how='intersection')
clname='NAME_2500'
gw_selcatu=pd.DataFrame(gw_selcat[clname].value_counts())
print(gw_selcatu)
gw_selname=gw_selcat[gw_selcat[clname]=='Hase']

       NAME_2500
Ems          133
Hase         104
Elbe          66
Weser         63
Hunte         41
Aller         37
Issel         33
Leine         16
Bode           1


In [243]:
ni=2
ln=np.arange(2,5)

for nm in ['sd','dsd','lp']:
    fig, ax = plt.subplots(len(ln),1, figsize=(10,5),sharex=True)
    for i in ln:
        ho=rasterio.open("D:/Data/students/mariana/data/Raster/multiorder_hydroposition/mohp_custom_germany_"+nm+"_hydrologicorder"+str(i)+"_30m_4647.tif")

        cl="ho"+str(i)+"_30m"
        measure='r2'
        coord_list = [(x,y) for x,y in zip(gw_selname['geometry'].x , gw_selname['geometry'].y)]
        gw_selname[cl] = [x[0]/1000 for x in ho.sample(coord_list)] #values in km

        cp=sns.regplot(ax=ax[i-ni],x=cl, y=measure, data=gw_selname.sort_values(by=cl), fit_reg=False, label=cl[:-4],
                      scatter_kws={'s':8})
        cp.set(xlabel=None)
        if measure== 'r2':
            ax[i-2].set_ylabel("r$^2$")
        ax[i-ni].legend(loc=1)
        ax[i-ni].grid(alpha=0.5)
        fig.supxlabel("Distance km",fontsize=12)
        fig.suptitle(nm, fontsize=16)
        plt.subplots_adjust(left=0.1,bottom=0.1, right=0.9, top=0.9, 
                        wspace=0.4, hspace=0.2)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [332]:
fig, ax = plt.subplots(figsize=(8,5))
cp=sns.swarmplot(ax=ax,x="NAME_2500", y="r2", data=gw_selcat, order=gw_selcatu.index)
ax.set_ylabel('r$^2$')
ax.set_xlabel('Catchment name 1:2500')

<IPython.core.display.Javascript object>

Text(0.5, 0, 'Catchment name 1:2500')

## 5. Distance to the sea

In [143]:
#Load the coastline of Lower Saxony
coastline=gpd.read_file("D:/Data/students/mariana/data/SHP/coastline.shp")
coastlines=coastline.to_crs(epsg=proj_coor)

In [144]:
#points=Point(coastlines.geometry[0].coords[0])
l=[]
for i in waterways.geometry[0].coords:
    l.append(Point(i))
gdf=gpd.GeoDataFrame(index=np.arange(len(l)), crs='epsg:'+str(proj_coor), geometry=l)

In [145]:
gw_selc=gw_sel.copy()

In [146]:
gdfs = gpd.GeoDataFrame(geometry=gw_selc.geometry).to_crs("EPSG:"+str(proj_coor))
dmtca=gdfs.geometry.apply(lambda g: gdf.distance(g))
vdist=[]
for i in range(len(dmtca)):
    vdist.append(min(dmtca[dmtca.columns][i:i+1].values[0])/1000)
gw_sel['dist_coastkm']=vdist  

In [147]:
fig, ax = plt.subplots(figsize=(8,5))
cp=sns.regplot(ax=ax,x="dist_coastkm", y="r2", data=gw_sel.sort_values(by="dist_coastkm")[:50], fit_reg=True)
ax.grid(alpha=0.5)

<IPython.core.display.Javascript object>

## 6. Waterways

In [249]:
#points=Point(coastlines.geometry[0].coords[0])
l2=[]
for j in range(len(waterways.geometry)):
    if waterways.type[j] =='MultiLineString':
        lenmline=len(waterways.geometry[j])
        for ln in range(lenmline):
            l=len(waterways.geometry[j][ln].coords)
            for v in waterways.geometry[j][ln].coords:
                l2.append(Point(v))
    else:
        
        for i in waterways.geometry[j].coords:
                l2.append(Point(i))


#gdf2=gpd.GeoDataFrame(index=np.arange(len(l)), crs='epsg:'+str(proj_coor), geometry=l2)
#gdf2.plot()


In [250]:
gdf=gpd.GeoDataFrame(index=np.arange(len(l2)), crs='epsg:'+str(proj_coor), geometry=l2)
gdf.plot(markersize=0.03)
gw_selc=gw_sel.copy()

<IPython.core.display.Javascript object>

In [251]:
gdfs = gpd.GeoDataFrame(geometry=gw_selc.geometry).to_crs("EPSG:"+str(proj_coor))
dmtca=gdfs.geometry.apply(lambda g: gdf.distance(g))
vdist=[]
for i in range(len(dmtca)):
    vdist.append(min(dmtca[dmtca.columns][i:i+1].values[0])/1000)
gw_sel['dist_water']=vdist  

In [252]:
fig, ax = plt.subplots(figsize=(8,5))
cp=sns.regplot(ax=ax,x="dist_water", y="r2", data=gw_sel.sort_values(by="dist_water")[:], fit_reg=True)
ax.grid(alpha=0.5)

<IPython.core.display.Javascript object>

## 7.  Impervious layer 

Check how much the model performance is affected by the imperviousness density (%)

https://land.copernicus.eu/pan-european/high-resolution-layers/imperviousness/status-maps/2006?tab=mapview


In [148]:
sns.set_theme(style="ticks")
impervious=rasterio.open("D:/Data/students/mariana/data/Raster/Impervious/IMD_2006_100m_eu_03035_d03_E40N30_4647.tif")
#plt.figure(figsize=(3,3))
#show(impervious)

In [149]:
cl="impervious"
coord_list = [(x,y) for x,y in zip(gw_sel['geometry'].x , gw_sel['geometry'].y)]
gw_sel[cl] = [x[0] for x in impervious.sample(coord_list)]


In [150]:
gw_selcop=gw_sel.loc[gw_sel[cl]<5]

In [256]:
gw_selimp=pd.DataFrame(gw_selcop[cl].value_counts())
order=gw_selimp.index
plt.figure(figsize=(5,5))
my_circle = plt.Circle( (0,0), 0.5, color='white')
colors = ['chocolate','#66b3ff','#99ff99','#ffcc99','c','olive']
plt.pie(gw_selimp[cl].values, labels=gw_selimp[cl].index, colors=colors,wedgeprops={'alpha':0.5})
p = plt.gcf()
p.gca().add_artist(my_circle)

<IPython.core.display.Javascript object>

<matplotlib.patches.Circle at 0x2c0dc3dbeb0>

In [257]:
lsel=gw_selimp.impervious[0:5].index.astype(int).sort_values()

In [258]:
gw_sel[cl]=gw_sel[cl].astype(str)
gw_selaux=gw_sel[['NSE','r2', cl]].loc[gw_sel[cl].isin(lsel.astype(str))]
joyplot(
    data=gw_selaux[['NSE','r2', cl]], 
    by=cl,
    color=[ '#eb4d4b','#686de0'],
    lw=0.1,
    alpha=0.5,
    legend=True,
    figsize=(8, 6)
)

<IPython.core.display.Javascript object>

(<Figure size 800x600 with 6 Axes>,
 [<AxesSubplot:>,
  <AxesSubplot:>,
  <AxesSubplot:>,
  <AxesSubplot:>,
  <AxesSubplot:>,
  <AxesSubplot:>])

## 7.1 Impervious layer with '''km buffer

In [151]:
gw_sel['imperv_buff']=np.zeros(len(gw_sel))
for i in range(len(gw_sel)):
    out_mask, out_transform = rasterio.mask.mask(impervious, gw_sel['buff_1km'][i:i+1], crop=True)
    meanval=np.mean(out_mask[out_mask!=impervious.meta['nodata']])
    alllist=out_mask[out_mask!=impervious.meta['nodata']]
    paveprop=len(alllist[alllist>0])/len(alllist)
    gw_sel['imperv_buff'][i]=paveprop

In [153]:
fig, ax = plt.subplots(figsize=(10,5))
data=gw_sel[gw_sel['imperv_buff']>0].sort_values(by=['imperv_buff'])
cp=sns.regplot(ax=ax,x='imperv_buff', y="r2_2", data=data, fit_reg=True)
plt.grid(alpha=0.5)

slope, intercept, r_value, p_value, std_err = stats.linregress(data["imperv_buff"],
                                                                   data["r2_2"])
print(r_value)

<IPython.core.display.Javascript object>

-0.023747218063516998


## 8. Geomorphology

Geomorphography from the Bundesanstalt für Geowissenschaften und Rohstoffe (BGR)

In [26]:
sns.set_theme(style="ticks")
geomorphology=rasterio.open("D:/Data/students/mariana/data/Raster/geomorphography/gmk1000_250_4647.tif")
geomorphologyc=rasterio.open("D:/Data/students/mariana/data/Raster/geomorphography/gmk1000_250_4647c.tif")
#geomorphology.plot()
#show(geomorphology)

RasterioIOError: D:/Data/students/mariana/data/Raster/geomorphography/gmk1000_250_4647.tif: No such file or directory

1011: very low inclined relief units in very low height above depth contour and high soil moisture index\
1012: low inclined relief units at low altitude  above depth contour and medium soil moisture index\
1021: moderately inclined relief units at medium height  above depth contour and low soil moisture index\
111: Sink areas at very low elevation above  depth contour and with very high soil moisture index\
112: Sink areas at very low elevation above  depth contour and with high soil moisture index\
113: Sink areas at low elevation above depth contour and with high soil moisture index\
114: Sink areas at medium height above depth contour and with medium soil moisture index\
115: Sink areas at medium height above depth contour and with low soil moisture index

In [156]:
dic_geom={1011:'Low Relief/ High-mod SMI',
113: 'Sink areas/ High SMI',
114 : 'Sink areas/ Medium SMI',
112: 'Sink areas/ High SMI',
115: 'Sink areas/ Low SMI' ,
1012: 'Low Relief/ High-mod SMI',
111 : 'Sink areas/ High SMI', 
1021: 'Moderate Relief/ low SMI' , 
3290: 'Others',
3213: 'Others' , 
116: 'Others' ,
3212:'Others',
3211:'Others',
1022:'Others' }

In [157]:
gw_sel["geomorphology_dic"]=gw_sel[cl].map(dic_geom)

In [185]:
cl='geomorphology_dic'
gw_selgeo=pd.DataFrame(gw_sel[cl].value_counts())
order=gw_selgeo.index
plt.figure(figsize=(6,3))
colors = ['chocolate','#66b3ff','#99ff99','#ffcc99','c','olive']
#palette = sns.color_palette(None, len(hgintcl))
plt.pie(gw_selgeo[cl].values, 
        labels=gw_selgeo[cl].index, 
        colors=colors,wedgeprops={'alpha':0.6},textprops={'size': 8})
plt.tight_layout() 
plt.savefig(pathfig+"geoprop.png",dpi=300)

<IPython.core.display.Javascript object>

In [266]:
gw_selaux=gw_sel[['NSE','r2', 'geomorphology_dic']].loc[~gw_sel['geomorphology_dic'].isin(['Others'])]

joyplot(
    data=gw_selaux[['NSE','r2', cl]], 
    by=cl,
    color=[ '#eb4d4b','#686de0'],
    lw=0.1,
    alpha=0.5,
    legend=True,
    figsize=(8, 6)
)

<IPython.core.display.Javascript object>

(<Figure size 800x600 with 6 Axes>,
 [<AxesSubplot:>,
  <AxesSubplot:>,
  <AxesSubplot:>,
  <AxesSubplot:>,
  <AxesSubplot:>,
  <AxesSubplot:>])

### 8.1 Geomorphology categorical fraction around 1 km buffer

1011:'Low Relief/ High-mod SMI',\
113: 'Sink areas/ High SMI',\
112: 'Sink areas/ High SMI',\
111 : 'Sink areas/ High SMI',\
114 : 'Sink areas/ Medium SMI',\
115: 'Sink areas/ Low SMI' ,\
1012: 'Low Relief/ High-mod SMI', \
1021: 'Moderate Relief/ low SMI' ,

In [162]:
#Most relevant geomorphological categories for the study area
categories=[1011,114,111,112,113,115,1012,1021]

for c in categories:
    gw_sel['geo_'+str(c)]=np.zeros(len(gw_sel))
    for i in range(len(gw_sel)):
        out_mask, out_transform = rasterio.mask.mask(geomorphologyc, gw_sel['buff_1km'][i:i+1], crop=True)
        out_meta = geomorphologyc.meta

        arr=np.array(out_mask[0])
        count=np.count_nonzero(arr == c)
        nnan=np.count_nonzero(arr != int(out_meta['nodata']))
        prop_cat=count/nnan   #Proportion of the specific category
        
        if c== 112 or c== 113:  ## put same categories in the same pandas series
            c=111
            
        gw_sel['geo_'+str(c)][i]=prop_cat 
        
gw_sel.drop(['geo_112','geo_113'],axis=1, inplace=True)    
    

In [268]:
col='geo_114'
fig, ax = plt.subplots(figsize=(8,5))
cp=sns.regplot(ax=ax,x=col, y="r2", data=gw_sel[gw_sel[col]>0].sort_values(by=col), fit_reg=True)
plt.grid(alpha=0.5)

<IPython.core.display.Javascript object>

## 9. Intersection with long-term recharge

In [163]:
sns.set_theme(style="ticks")
recharge=rasterio.open("D:/Data/students/mariana/data/Raster/gw_long_term_recharge/bgr/data/GWN1000__3034_v1_raster1_4647.tif")
dem=rasterio.open("D:/Data/students/mariana/data/Raster/dem/lsdem1km_4647.tif")                   


In [164]:
cl="recharge"
coord_list = [(x,y) for x,y in zip(gw_sel['geometry'].x , gw_sel['geometry'].y)]
gw_sel[cl] = [x[0] for x in recharge.sample(coord_list)]

In [353]:
fig, ax = plt.subplots(figsize=(8,5))
cp=sns.regplot(ax=ax,x=cl, y="r2", data=gw_sel[gw_sel[cl]<900].sort_values(by=cl), fit_reg=False)
plt.grid(alpha=0.5)

<IPython.core.display.Javascript object>

### 9.1 Long term recharge with Buffer

In [165]:
i=2
out_mask_dem, out_transform_dem = rasterio.mask.mask(dem, gw_sel['buff_1km'][i:i+1], crop=True)

In [330]:
m=np.matrix(out_mask_dem[0])

In [353]:
pos=np.where(m>np.percentile(out_mask_dem,75))

In [166]:
gw_sel['recharge_buff']=np.zeros(len(gw_sel))
for i in range(len(gw_sel)):
    out_mask, out_transform = rasterio.mask.mask(recharge, gw_sel['buff_1km'][i:i+1], crop=True)
    #out_mask_dem, out_transform_dem = rasterio.mask.mask(dem, gw_sel['buff_5km'][i:i+1], crop=True)
    #matrix=np.matrix(out_mask_dem[0])
    #pos=np.where(matrix>np.percentile(out_mask_dem,75))
    #nodata=
    meanval=np.mean(out_mask[out_mask!=recharge.meta['nodata']])
    gw_sel['recharge_buff'][i]=meanval
    

In [390]:
fig, ax = plt.subplots(figsize=(8,5))
cp=sns.regplot(ax=ax,x='recharge_buff', y="r2_2", data=gw_sel.sort_values(by=['recharge_buff']), fit_reg=True)
plt.grid(alpha=0.5)

<IPython.core.display.Javascript object>

## LAI : Leaf Area Index

The information is downloaded from  https://data.jrc.ec.europa.eu/dataset/jrc-mappe-europe-setup-d-18-lai#details .

Corrrespond to average monthly maps of LAI for 2018. 
Original epsg: 3035 -- reprojected here

Pistocchi, Alberto (2015): Leaf Area Index (MAPPE model). European Commission, Joint Research Centre (JRC) [Dataset] PID: http://data.europa.eu/89h/jrc-mappe-europe-setup-d-18-lai

In [167]:
lai01=rasterio.open("D:/Data/students/mariana/data/Raster/D_18_lai/Export01/lai01_4647_v2.tif")

In [170]:
gw_sel['lai01_buff']=np.zeros(len(gw_sel))
for i in range(len(gw_sel)):
    out_mask, out_transform = rasterio.mask.mask(lai01, gw_sel['buff_1km'][i:i+1], crop=True)
    meanval=np.mean(out_mask[out_mask!=lai01.meta['nodata']])
    gw_sel['lai01_buff'][i]=meanval

In [171]:
fig, ax = plt.subplots(figsize=(8,5))
cp=sns.regplot(ax=ax,x='lai01_buff', y="r2", data=gw_sel.sort_values(by=['lai01_buff']), fit_reg=True)
plt.grid(alpha=0.5)

<IPython.core.display.Javascript object>

In [172]:
lai06=rasterio.open("D:/Data/students/mariana/data/Raster/D_18_lai/Export01/lai06_4647_v2.tif")

In [173]:
gw_sel['lai06_buff']=np.zeros(len(gw_sel))
for i in range(len(gw_sel)):
    out_mask, out_transform = rasterio.mask.mask(lai06, gw_sel['buff_1km'][i:i+1], crop=True)
    meanval=np.mean(out_mask[out_mask!=lai06.meta['nodata']])
    gw_sel['lai06_buff'][i]=meanval

In [174]:
fig, ax = plt.subplots(figsize=(8,5))
cp=sns.regplot(ax=ax,x='lai06_buff', y="r2", data=gw_sel.sort_values(by=['lai06_buff']), fit_reg=True)
plt.grid(alpha=0.5)

<IPython.core.display.Javascript object>

In [175]:
gw_sel['lai0106_buff']=gw_sel['lai06_buff']-gw_sel['lai01_buff']

In [176]:
fig, ax = plt.subplots(figsize=(8,5))
cp=sns.regplot(ax=ax,x='lai0106_buff', y="r2", data=gw_sel.sort_values(by=['lai0106_buff']), fit_reg=True)
plt.grid(alpha=0.5)

<IPython.core.display.Javascript object>

In [177]:
dat=gw_sel.sort_values(by=['lai01_buff'])
slope, intercept, r_value, p_value, std_err = stats.linregress(dat["lai01_buff"],
                                                                   dat["r2"])
print(r_value,p_value)

-0.11967086734592605 0.007096419282189292


## Save the data 

In [178]:
#Remove uncompatible columns
l=list(gw_sel.columns)
for i in range(1,6):
    l.remove('buff_'+str(i)+'km')

In [179]:
#Save the information excluding the buffer columns
gw_sel[l].to_file('D:/Data/students/mariana/data/SHP/gw_sel_int.shp')  