# Imports and setting up viz

NB : conda env1 on Mac, lam1env on spirit (Python3.12)

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

#import personnal tools
import sys
sys.path.append('../../python_tools/')
from tools import *
from tools_mapping import *
from tools_routing import *

In [None]:
rivers = cfeature.NaturalEarthFeature('physical', 'rivers_lake_centerlines', '10m',edgecolor=(0, 0, 0, 1), facecolor='none')

# Load files

LAM output analysis, routing files.

Simu irr et simu no-irr.

## Sims

In [None]:
# noirr_dir='../../../JZ_simu_outputs/LAM/LAM_1000_40/noirr_2010_2022'
# irr_dir='../../../JZ_simu_outputs/LAM/LAM_1000_40/irr_2010_2022'
noirr_dir='../../../JZ_simu_outputs/LAM/LAM_1500_60/noirr'
irr_dir='../../../JZ_simu_outputs/LAM/LAM_1500_60/irr'

In [None]:
rename_dict = {
        'time_counter' : 'time',
        'routing_stream_reservoir_r' : 'streamr',
        'routing_fast_reservoir_r' : 'fastr',
        'routing_slow_reservoir_r' : 'slowr',
        'routing_drainage_r' : 'drainage',
        'routing_runoff_r' : 'runoff',
        'routing_hydrographs_r' : 'hydrographs',
        'routing_irrigation_r' : 'irrigation',
        'irrig_netereq_r' : 'netirrig'
        # 'routing_riverflow_r' : 'riverflow',
        # 'routing_coastalflow_r' : 'coastalflow'
        }

long_name_dict = {
        'streamr' : 'stream reservoir',
        'fastr' : 'fast reservoir',
        'slowr' : 'slow reservoir',
        'drainage' : 'drainage',
        'runoff' : 'runoff',
        'hydrographs' : 'hydrographs',
        'irrigation' : 'irrigation',
        'netirrig' : 'net irrigation'
        # 'riverflow' : 'river flow',
        # 'coastalflow' : 'coastal flow'
        }

In [None]:
# filename = '{}/SRF/MO/sim1*_hydrographs_monthly_*.nc'.format(noirr_dir)
filename = '{}/*/SRF/MO/*diag_routing_r.nc'.format(noirr_dir)
sim_noirr = xr.open_mfdataset(filename)

sim_noirr = sim_noirr.rename(rename_dict)
sim_noirr = sim_noirr.sel(lon=slice(lon_min, lon_max),lat=slice(lat_max, lat_min))


#add long name to variables
for var in sim_noirr.variables:
    if var in long_name_dict.keys():
        sim_noirr[var].attrs['long_name'] = long_name_dict[var]
from dateutil.relativedelta import relativedelta

#move data 1 month back
sim_noirr['time'] = sim_noirr['time'].copy(
    data=[(pd.Timestamp(t.item()) - relativedelta(months=1)).to_datetime64() for t in sim_noirr['time'].values]
)

#add unit m³/s to hydrographs
sim_noirr['hydrographs'].attrs['units'] = 'm³/s'

sim_noirr.attrs['name'] = 'noirr'
sim_noirr.attrs['plot_color'] = 'red'

sim_noirr

In [None]:
filename = '{}/*/SRF/MO/*diag_routing_r.nc'.format(irr_dir)
sim_irr = xr.open_mfdataset(filename)

sim_irr = sim_irr.rename(rename_dict)
sim_irr = sim_irr.sel(lon=slice(lon_min, lon_max),lat=slice(lat_max, lat_min))

#add long name to variables
for var in sim_irr.variables:
    if var in long_name_dict.keys():
        sim_irr[var].attrs['long_name'] = long_name_dict[var]

#move data 1 month back
sim_irr['time'] = sim_irr['time'].copy(
    data=[(pd.Timestamp(t.item()) - relativedelta(months=1)).to_datetime64() for t in sim_irr['time'].values]
)

#add unit m³/s to hydrographs
sim_irr['hydrographs'].attrs['units'] = 'm³/s'

sim_irr.attrs['name']='irr'
sim_irr.attrs['plot_color'] = 'blue'

sim_irr

## Obs

In [None]:
filename = '../../../obs/streamflow/GRDC-Monthly_Spain.nc'
obs = xr.open_dataset(filename)
obs

# River outlets and stations

format :     id: {'name':,  'river':, 'lon_grid':, 'lat_grid':, 'year_min':, 'year_max': },


In [None]:
river_coords_merit = {
        1:{'name':'Ebro','river':'Ebro', 'lon_grid': 0.86, 'lat_grid': 40.725, 'year_min':2010, 'year_max':2022 },
        2:{'name':'Minho','river':'Minho', 'lon_grid': -8.875, 'lat_grid': 41.86, 'year_min':2010, 'year_max':2022 },
        3:{'name':'Tajo','river':'Tajo', 'lon_grid': -9.175, 'lat_grid': 38.6875, 'year_min':2010, 'year_max':2022 },
        4:{'name':'Duero','river':'Duero', 'lon_grid': -8.675, 'lat_grid': 41.1375, 'year_min':2010, 'year_max':2022 },
        5:{'name':'Guadiana','river':'Guadiana', 'lon_grid': -7.39, 'lat_grid': 37.175, 'year_min':2010, 'year_max':2022 },
        6:{'name':'Guadalquivir','river':'Guadalquivir', 'lon_grid': -6.325, 'lat_grid': 36.91, 'year_min':2010, 'year_max':2022 }
}

In [None]:
stations_merit = {
    6298992: {'name': 'Albarreal De Tajo',  'river': 'Tajo', 'lon_grid':-4.17499303817749, 'lat_grid':39.891666412353516, 'year_min':2010, 'year_max':2021 },
    # 6298249: {'name': 'Zaragoza',  'river': 'Ebro', 'lon_grid':-0.8749926686286926, 'lat_grid':41.65833282470703, 'year_min':2010, 'year_max':2021},
    # 6298481: {'name': 'San Miguel del Pino', 'river': 'Duero', 'lon_grid':-4.92499303817749, 'lat_grid':41.508331298828125, 'year_min':2010, 'year_max':2021},
    6298259:{'name': 'Badajoz', 'river': 'Guadiana', 'lat_grid': 38.85833358764648, 'lon_grid': -7.008326530456543, 'last_record': '2013-09-15', 'year_min':2010, 'year_max':2021},
    6298564: {'name': 'Villanueva de Azoague', 'lon_grid':-5.641659736633301, 'lat_grid':41.974998474121094, 'year_min':2010, 'year_max':2010}
}

In [None]:
#keeping only data after 2010

proper_stations_dict = {
    6226800: {'name': 'Tortosa',            'river': 'Ebro',            'lat_grid': 40.82500,   'lon_grid': 0.525007,   'station_nb': 1},
    6226400: {'name': 'Zaragoza',           'river': 'Ebro',            'lat_grid': 41.67499,   'lon_grid': -0.90832,   'station_nb': 2},
    6226300: {'name': 'Castejon',           'river': 'Ebro',            'lat_grid': 42.17499,   'lon_grid': -1.69165,   'station_nb': 3},
    6226600: {'name': 'Seros',              'river': 'Segre',           'lat_grid': 41.45833,   'lon_grid': 0.425007,   'station_nb': 4},
    6226650: {'name': 'Fraga',              'river': 'Cinca',           'lat_grid': 41.52499,   'lon_grid': 0.341674,   'station_nb': 5},
    6212410: {'name': 'Tore',               'river': 'Douro',           'lat_grid': 41.50833,   'lon_grid': -5.47499,   'station_nb': 6},
    6212700: {'name': 'Peral De Arlanza',   'river': 'Arlanza',         'lat_grid': 42.07500,   'lon_grid': -4.07499,   'station_nb': 7},
    6213700: {'name': 'Talavera',           'river': 'Tagus',           'lat_grid': 39.95833,   'lon_grid': -4.82499,   'station_nb': 8},
    6213800: {'name': 'Trillo',             'river': 'Tagus',           'lat_grid': 40.70833,   'lon_grid': -2.57499,   'station_nb': 9},
    6213900: {'name': 'Peralejos',          'river': 'Tagus',           'lat_grid': 40.59166,   'lon_grid': -1.92499,   'station_nb': 10},
    6216510: {'name': 'Azud de Badajoz',    'river': 'Guadiana',        'lat_grid': 38.86199,   'lon_grid': -7.01,      'station_nb': 11}, 
    6116200: {'name': 'Pulo do Lobo',       'river': 'Guadiana',        'lat_grid': 37.803,     'lon_grid': -7.633,     'station_nb': 12},         
    6216530: {'name': 'La Cubeta',          'river': 'Guadiana',        'lat_grid': 38.975,     'lon_grid': -2.895,     'station_nb': 13},         
    6216520: {'name': 'Villarubia',         'river': 'Guadiana',        'lat_grid': 39.125,     'lon_grid': -3.59073,   'station_nb': 14},      
    6216800: {'name': 'Quintanar',          'river': 'Giguela',         'lat_grid': 39.64166,   'lon_grid': -3.07499,   'station_nb': 15},
    6217140: {'name': 'Mengibar',           'river': 'Guadalquivir',    'lat_grid': 37.98425,   'lon_grid': -3.79939,   'station_nb': 16},     
    6217200: {'name': 'Arroyo Maria',       'river': 'Guadalquivir',    'lat_grid': 38.17905,   'lon_grid': -2.83594,   'station_nb': 17}, 
    6217700: {'name': 'Pinos Puente',       'river': 'Frailes',         'lat_grid': 37.27499,   'lon_grid': -3.75832,   'station_nb': 18},
}
#keeping only 3 representative stations for 3 larger rivers, for simple figure
#keep only first 3 stations of proper_stations_dict
representative_stations_dict = dict(list(proper_stations_dict.items())[:5])


In [None]:
stations_map_dict(proper_stations_dict, title=None, extent=[-10, 2.5, 36, 44])

In [None]:
# plot station location on a map with rivers and coastlines
import cartopy
# Define 16 different colors using a colormap
cmap = plt.get_cmap('tab20')  # Or any other colormap you prefer
colors = [cmap(i) for i in np.linspace(0, 1, 16)]

fig = plt.figure(figsize=(10, 10))
ax = plt.axes(projection=ccrs.PlateCarree())
ax.coastlines()
ax.add_feature(cartopy.feature.RIVERS)
ax.set_extent([-10, 2.5, 35, 45])
gl = ax.gridlines(draw_labels=True, dms=True, x_inline=False, y_inline=False)
gl.ylocator = gl.ylocator = plt.MaxNLocator(5)
gl.right_labels = False
gl.top_labels = False
idx=0
for key, coordinates in stations_dict_filtered.items():
    plt.scatter(coordinates['lon_grid'], coordinates['lat_grid'], s=40, label=coordinates['name'], marker='o', color=colors[idx])
    idx+=1
plt.title('Location of selected stations')

#activate legend 
# plt.legend(loc='lower right')
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), borderaxespad=0.1)


# Maps

In [None]:
ds=sim_irr
var='irrigation'
map_ave(ds, var, cmap=wetW, hex=False)

In [None]:
#diff
ds1=sim_irr
ds2=sim_noirr
var='hydrographs'
map_diff_ave(ds1, ds2, var)

In [None]:
#Display map of var for ds on restricted area
var='hydrographs'
ds = sim_irr

subdomain = {
    'latmin': 38.95,
    'latmax': 39.05,
    'lonmin': -2.95,
    'lonmax': -2.85
}
# subdomain = subdomain_spain

#restrict ds to latmin, latmax, lonmin, lonmax
ds=restrict_ds(ds, subdomain)
plotvar = ds[var].mean(dim='time')
map_plotvar(plotvar, cmap=reds)

# Time series

## Stations

In [None]:
fig=plt.figure(figsize=(10, 6))
ax = plt.axes()
station_id=6216530
station_id, station_data=next(iter(proper_stations_dict.items()))
ds_list=[sim_noirr, sim_irr]
ts_station(obs, ax, station_id)
ts_with_obs(ds_list, obs, ax, station_id, station_data)

In [None]:
fig=plt.figure(figsize=(10, 6))
ax = plt.axes()
station_id=6216530
# station_id, station_data=next(iter(proper_stations_dict.items()))
ds_list=[sim_noirr, sim_irr]
sc_station(obs, ax, station_id)
sc_with_obs(ds_list, obs, ax, station_id, station_data)

In [None]:
import numpy as np

def percent_valid_coverage(ds, start_year, end_year, time_var='time', data_var=None):
    """
    Calculates the percentage of valid (non-NaN) monthly values in a NetCDF time series
    between start_year and end_year (inclusive), using an open xarray Dataset.

    Parameters:
        ds (xarray.Dataset): Opened xarray dataset.
        start_year (int): Start year (inclusive).
        end_year (int): End year (inclusive).
        time_var (str): Name of the time variable.
        data_var (str): Name of the data variable. If None, uses the first variable.

    Returns:
        float: Percentage coverage (0 to 100).
    """
    if data_var is None:
        data_var = list(ds.data_vars)[0]
    
    data = ds[data_var]
    time = ds[time_var]
    years = time.dt.year

    # Filter by year range
    mask = (years >= start_year) & (years <= end_year)
    filtered_data = data.sel({time_var: mask})
    filtered_time = time.sel({time_var: mask})

    valid_count = np.count_nonzero(~np.isnan(filtered_data))
    total_months = filtered_time.size

    if total_months == 0:
        return 0.0
    # print(valid_count, total_months)
    return round((valid_count / total_months) * 100, 2)


In [None]:
stations_ds=obs
for station_id, station_data in proper_stations_dict.items():
    station = stations_ds.sel(id=station_id)
    coverage = percent_valid_coverage(station, 2010, 2017, time_var='time', data_var='runoff_mean')
    print(f"Station Name: {station_data['name']}, number: {station_data['station_nb']} Coverage: {coverage}%")

# station_id=6216530
# station_id, station_data=next(iter(proper_stations_dict.items()))
# print('Station Name:', station_data['name'])
# station = stations_ds.sel(id=station_id)
# percent_valid_coverage(station, 2010, 2017, time_var='time', data_var='runoff_mean')

In [None]:
#plot a time series for each station from dict
fig, axes= plt.subplots(6,3, figsize=(20,30))
axes= axes.flatten()
ds_list=[sim_noirr, sim_irr]
for i, (station_id, station_data) in enumerate(proper_stations_dict.items()):
    ts_station(obs,axes[i], station_id, name=station_data['name'], year_min=2010, year_max=2022)
    ts_with_obs(ds_list, obs, axes[i], station_id, station_data, year_min=2010, year_max=2022)

In [None]:
#plot a seasonnal cycle for each station from station_dict
fig, axes= plt.subplots(6,3, figsize=(20,24))
axes= axes.flatten()
ds_list=[sim_noirr, sim_irr]
for i, (station_id, station_data) in enumerate(proper_stations_dict.items()):
    sc_station(obs,axes[i], station_id, name=station_data['name'], year_min=2010, year_max=2022)
    sc_with_obs(ds_list, obs, axes[i], station_id, station_data, year_min=2010, year_max=2022)

In [None]:
#plot a time series for each station from representative_stations_dict
fig, axes= plt.subplots(5,1, figsize=(10,25))
axes= axes.flatten()
ds_list=[sim_noirr, sim_irr]
for i, (station_id, station_data) in enumerate(representative_stations_dict.items()):
    ts_station(obs,axes[i], station_id, name=station_data['name'], ylabel='River discharge (m³/s)', year_min=2010, year_max=2022)
    ts_with_obs(ds_list, obs, axes[i], station_id, station_data, ylabel='River discharge (m³/s)', year_min=2010, year_max=2022)

In [None]:
#plot a seasonnal cycle for each station from station_dict_filtered
fig, axes= plt.subplots(5, 1, figsize=(10,25))
axes= axes.flatten()
ds_list=[sim_noirr, sim_irr]
for i, (station_id, station_data) in enumerate(representative_stations_dict.items()):
    sc_station(obs,axes[i], station_id, name=station_data['name'], ylabel='River discharge (m³/s)', year_min=2010, year_max=2022)
    sc_with_obs(ds_list, obs, axes[i], station_id, station_data, ylabel='River discharge (m³/s)', year_min=2010, year_max=2022)

## Spatial averaging

In [None]:
var='hydrographs'

ds1=sim_irr
# ds2=ORCirr
ds_list=[ds1, ds2]
ds_list=[ds1]

year_max=2015
time_series_ave(ds_list, var, year_max=year_max, title=None)
seasonal_cycle_ave(ds_list, var, title=None)

## En un point lon-lat (ou plusieurs avec dict)

In [None]:
#Time series
var="hydrographs"
ds1=sim
ds_list=[ds1]
year_min=2010
year_max=2015
lon=-6.325
lat=36.91

time_series_lonlat(ds_list, var, lon, lat, year_min=year_min, year_max=year_max, title=None)

In [None]:
ds_list=[sim]
discharge_coord_ts(ds_list, river_coords_merit, var='hydrographs', year_min=2010, year_max=2015)

In [None]:
ds_list=[sim]
discharge_coord_sc(ds_list, river_coords_merit, var='hydrographs', year_min=2010, year_max=2015)

# Metrics

## Compute and display in table

In [None]:
station_id, station = next(iter(proper_stations_dict.items()))
metric_list=[metric_sim_module, metric_obs_module, metric_bias, metric_rmse, metric_tcorr, metric_nse, metric_kge]
for metric_to_use in metric_list:
    metric_value=compute_metric_station(sim_noirr, obs, station_id, station, metric_to_use)
    print('{} for station {} : {}'.format(metric_to_use.__short_name__, station['name'], metric_value))

In [None]:
# output a pandas dataframe with all metric values for a given list of metrics and stations
sim=sim_noirr
metric_list=[metric_sim_module, metric_obs_module, metric_bias, metric_rmse, metric_tcorr, metric_nse, metric_kge]
stations_dict=proper_stations_dict
# define dataframe with one row per station and one column per metric
df=[]
for station_id, station in stations_dict.items():
    label='Station {} ({})'.format(station['station_nb'],station['name'])
    df.append({'Station':label})
    for metric in metric_list:
        name=metric.__short_name__
        metric_value=compute_metric_station(sim, obs, station_id, station, metric)
        metric_value=np.round(metric_value, 2)
        # append metric_value to df
        df[-1][name]=metric_value

# convert df to pandas dataframe
df_noirr=pd.DataFrame(df)
# set Station as index
df_noirr.set_index('Station', inplace=True)
df_noirr['Bias (%)'] = np.round(df_noirr['Bias (m³/s)'] / df_noirr['Module (obs, m³/s)'], 3) * 100
#move Bias (%) to the 4th column
cols = df_noirr.columns.tolist()
cols = cols[:3] + cols[-1:] + cols[3:-1]
df_noirr = df_noirr[cols]
#add average row
df_noirr.loc['Mean'] = df_noirr.mean()
df_noirr.drop('Module (sim, m³/s)', axis=1, inplace=False)

In [None]:
# output a pandas dataframe with all metric values for a given list of metrics and stations
sim=sim_irr
metric_list=[metric_sim_module, metric_obs_module, metric_bias, metric_rmse, metric_tcorr, metric_nse, metric_kge]
stations_dict=proper_stations_dict
# define dataframe with one row per station and one column per metric
df=[]
for station_id, station in stations_dict.items():
    label='Station {} ({})'.format(station['station_nb'],station['name'])
    df.append({'Station':label})
    for metric in metric_list:
        name=metric.__short_name__
        metric_value=compute_metric_station(sim, obs, station_id, station, metric)
        metric_value=np.round(metric_value, 2)
        # append metric_value to df
        df[-1][name]=metric_value

# convert df to pandas dataframe
df_irr=pd.DataFrame(df)
# set Station as index
df_irr.set_index('Station', inplace=True)
df_irr['Bias (%)'] = np.round(df_irr['Bias (m³/s)'] / df_irr['Module (obs, m³/s)'], 3) * 100
#move Bias (%) to the 4th column
cols = df_irr.columns.tolist()
cols = cols[:3] + cols[-1:] + cols[3:-1]
df_irr = df_irr[cols]
#add average row
df_irr.loc['Mean'] = df_irr.mean()
df_irr.drop('Module (obs, m³/s)', axis=1, inplace=False).drop('Module (sim, m³/s)', axis=1, inplace=False)

In [None]:
df_noirr.describe()

In [None]:
df_irr.describe()

In [None]:
df_diff=df_irr-df_noirr
df_diff['Bias absolute relative change (%)'] = np.round(df_diff['Bias (m³/s)'] / df_noirr['Bias (m³/s)'], 3) * 100
df_diff.drop('Module (obs, m³/s)', axis=1, inplace=True)
df_diff.drop('Module (sim, m³/s)', axis=1, inplace=True)
df_diff.drop('Bias (m³/s)', axis=1, inplace=True)
df_diff.drop('Bias (%)', axis=1, inplace=True)
# df_diff.loc['Mean'] = df_diff.mean()
df_diff

In [None]:
df_diff.describe()

In [None]:
df_noirr.drop('Module (sim, m³/s)', axis=1, inplace=True)
df_noirr.drop('Bias (m³/s)', axis=1, inplace=True)
df_noirr

In [None]:
#export to csv to use in latex or excel
df_noirr.to_csv('figures/df_noirr.csv')
df_irr.to_csv('figures/df_irr.csv')
df_diff.to_csv('figures/df_diff.csv')

## Display on map

In [None]:
sim=sim_noirr
metric_to_use = metric_rmse
display_metric_map(sim, obs, proper_stations_dict, metric_to_use, metric_min=0, metric_max=100, legend=True)

In [None]:
sim=sim_irr
metric_to_use = 
display_metric_map(sim, obs, proper_stations_dict, metric_to_use, metric_min=-100, metric_max=100, legend=True)

In [None]:
sim=sim_noirr
metric_list=[metric_bias, metric_rmse, metric_tcorr, metric_nse, metric_kge]
# metric_list=[metric_bias]
for metric_to_use in metric_list:
    display_metric_map(sim, obs, proper_stations_dict, metric_to_use, legend=False)

In [None]:
sim=sim_irr
metric_list=[metric_bias, metric_rmse, metric_tcorr, metric_nse, metric_kge]
for metric_to_use in metric_list:
    display_metric_map(sim, obs, proper_stations_dict, metric_to_use, legend=False)

In [None]:
sim1=sim_irr
sim2=sim_noirr
metric_list=[metric_bias, metric_rmse, metric_tcorr, metric_nse, metric_kge]
metric_list=[metric_rmse]
vmin=-25
vmax= 25
for metric_to_use in metric_list:
    display_metric_diff_map(sim1, sim2, obs, proper_stations_dict, metric_to_use,  metric_min=vmin, metric_max=vmax,legend=False)