In [1]:
import geopandas as gpd
import glob
import folium
import ipywidgets as widgets
import json
import matplotlib.pyplot as plt
import os
import xarray as xr

In [2]:
import benchmarks
import calculate_metrics
import visualization
import uruguay as provider

In [3]:
import psutil

# Get the available memory in bytes
available_memory_bytes = psutil.virtual_memory().available

# Convert to gigabytes for readability
available_memory_gb = available_memory_bytes / 1024 ** 3

print(f"Available memory: {available_memory_gb:.2f} GB")

Available memory: 30.26 GB


# Load Experiment Data

## Load Basin & Gaguge Shapefile

In [4]:
with open(provider.SHAPEFILE_PATH, 'rb') as f:
  geography_gdf = gpd.read_file(f)
geography_gdf.set_index('unique_gauge_id', inplace=True)
geography_gdf

ERROR 1: PROJ: proj_create_from_database: Open of /home/gsnearing/miniconda3/envs/wmo-pilot-results-analysis/share/proj failed


Unnamed: 0_level_0,area,gauge_longitude,gauge_latitude,drain_area,station_code,provider,data_source,altitude,river,gauge_serial_number,...,DIST_SINK,DIST_MAIN,gauge_sub_basin_area,hybas_upstream_area,gauge_pfaf_id,ENDO,COAST,ORDER,SORT,geometry
unique_gauge_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
DNAUY_52,6687.0,-55.47099,-31.87573,,52.0,DNAUY,uruguay_metadata_jan_2024.csv,,,,...,559.5,559.5,111.9,6687.5,641608700300,0,0,2,87210,"POLYGON ((-55.53333 -31.98750, -55.53971 -31.9..."
DNAUY_1743,11630.0,-55.22658,-32.44356,,1743.0,DNAUY,uruguay_metadata_jan_2024.csv,,,,...,525.4,525.4,155.1,11629.7,641609303000,0,0,1,87172,"POLYGON ((-54.79583 -32.84583, -54.80000 -32.8..."
DNAUY_46.1,872.7,-54.9008,-34.7694,,46.1,DNAUY,uruguay_metadata_jan_2024.csv,,,,...,12.8,12.8,79.0,872.8,641503403000,0,0,1,86601,"POLYGON ((-54.92083 -34.82500, -54.92500 -34.8..."
DNAUY_174,487.1,-54.95316,-34.7476,,174.0,DNAUY,uruguay_metadata_jan_2024.csv,,,,...,12.8,12.8,140.8,487.3,641503402100,0,0,2,86602,"POLYGON ((-54.92500 -34.82083, -54.92917 -34.8..."
DNAUY_44,2812.8,-55.7411,-34.2161,,44.0,DNAUY,uruguay_metadata_jan_2024.csv,,,,...,131.1,131.1,214.9,2812.5,641504709000,0,0,1,86717,"POLYGON ((-54.99167 -34.42083, -54.99167 -34.4..."
DNAUY_59.1,4914.1,-56.2492,-34.3647,,59.1,DNAUY,uruguay_metadata_jan_2024.csv,,,,...,75.5,75.5,83.0,4913.8,641504701300,0,0,1,86673,"POLYGON ((-54.99167 -34.42083, -54.99167 -34.4..."
DNAUY_133,8389.9,-56.4011,-34.4497,,133.0,DNAUY,uruguay_metadata_jan_2024.csv,,,,...,43.6,43.6,42.7,8390.0,641504500100,0,0,1,86658,"POLYGON ((-54.99167 -34.42083, -54.99167 -34.4..."
DNAUY_53.1,1825.1,-56.2033,-34.0911,,53.1,DNAUY,uruguay_metadata_jan_2024.csv,,,,...,95.7,95.7,233.2,1825.3,641504603000,0,0,2,86683,"POLYGON ((-56.19167 -34.16667, -56.19223 -34.1..."
DNAUY_28.0,219910.3,-57.610707,-30.260656,,28.0,DNAUY,uruguay_metadata_jan_2024.csv,,,,...,340.9,340.9,209.7,219907.4,641819090000,0,0,1,87662,"MULTIPOLYGON (((-57.07379 -28.62443, -57.07042..."
DNAUY_26676,243099.2,-58.02,-31.38,,26676.0,DNAUY,uruguay_metadata_jan_2024.csv,,,,...,198.1,198.1,111.3,243096.4,641815030100,0,0,1,87499,"MULTIPOLYGON (((-57.07379 -28.62443, -57.07042..."


## Load Performance Metrics

In [5]:
metrics_file = f'{provider.EXPERIMENT_RESULTS_PATH}/metrics.nc'
with open(metrics_file, 'rb') as f:
  metrics_ds = xr.open_dataset(f)

## Dimensions

In [6]:
experiments = metrics_ds.experiment.values.tolist()
gauges = metrics_ds.gauge_id.values.tolist()
lead_times = metrics_ds.lead_time.values.tolist()
available_metrics = list(metrics_ds.data_vars)

## Load Hydrographs

In [7]:
hydrographs = {
    experiment: calculate_metrics.load_model_runs_for_experiment(
        experiment=experiment,
        gauges=provider.TRAIN_GAUGES,
        model_run_base_path=provider.MODEL_RUN_DIRECTORY,
    ).assign_coords(experiment=experiment)
    for experiment in experiments if experiment not in ['persistence', 'monthly_climatology']
}
hydrographs = xr.concat(hydrographs.values(), dim='experiment')
hydrographs

# Basin Geography



In [8]:
@widgets.interact(
    gauge_options = widgets.SelectMultiple(
        options=gauges,
        value=gauges,
        description='Gauge ID',
    )
)
def interactive_basin_and_gauge_geography(
    gauge_options,
):
  gauges = list(gauge_options)
  if len(gauge_options) == 1:
    map_center = (
          geography_gdf.loc[gauges[0]].gauge_latitude,
          geography_gdf.loc[gauges[0]].gauge_longitude
      )
    initial_map_zoom = provider.MAP_ZOOM + 3
  else:
    map_center = provider.MAP_CENTER
    initial_map_zoom = provider.MAP_ZOOM

  basin_map = visualization.basin_and_gauge_geography(
      geography=geography_gdf,
      gauges=gauge_options,
      map_center=map_center,
      initial_map_zoom=initial_map_zoom,
      height=500,
      width='50%'
  )
  return basin_map.add_child(folium.LayerControl())

# Gauge Hydrographs
This is the section that does not work because of the NetCDF compatability issue mentioned above.

In [9]:
# Select gauge to view hydrographs.
gauge = gauges[0]
gauge_selector = widgets.widgets.Dropdown(
    options=gauges, description='Gauge ID')
def on_selector_change(change):
  global gauge
  gauge = change['new']
gauge_selector.observe(on_selector_change, names='value')
display(gauge_selector)

Dropdown(description='Gauge ID', options=('DNAUY_52', 'DNAUY_1743', 'DNAUY_46.1', 'DNAUY_174', 'DNAUY_44', 'DN…

In [10]:
# Re-run this cell after making a selection from the dropdown box above.
visualization.plot_hydrographs_for_gauge(
    hydrographs=hydrographs,
    gauge=gauge,
    experiments=experiments,
)

# Forecast Timeseries

In [19]:
# Plot observations.
plt.figure(figsize=(12, 6))
obs = ds.sel(lead_time=0).observation.to_series()
fig = px.line(obs, title=f'Gauge: {gauge}, Experiment: {experiment}')

# Add the time slider.
fig.update_layout(xaxis=dict(rangeslider=dict(visible=True), type='date'))

print(fig.layout.xaxis.range)
fig.show()


None


<Figure size 1200x600 with 0 Axes>

In [47]:
visualization.plot_forecast_traces(
    hydrographs=hydrographs,
    experiment=experiment,
    gauge=gauge,
    start_time_ids=start_time_idx,
    time_window_days=time_window_days
)

array([False, False, False, ..., False, False, False])

In [56]:
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
import numpy as np

# ---------

gauge = 'DNAUY_52'
experiment = 'gauged'
start_time_idx = 1000
time_window_days = 200

# ---------

ds = hydrographs.sel(
    {
        'experiment': experiment,
        'gauge_id': gauge
    }
)


issue_times = ds['time'].values
num_lead_times = len(ds['lead_time'].values)

start_date = issue_times[start_time_idx]
time_window = pd.Timedelta(days=time_window_days)
plot_times_mask = (issue_times > start_date) & (issue_times < start_date + time_window) 
issue_times = issue_times[plot_times_mask]

ds = ds.sel(time=issue_times)

# Plot observations.
plt.figure(figsize=(12, 6))
obs = ds.sel(lead_time=0).observation.to_series()
fig = px.line(obs, title=f'Gauge: {gauge}, Experiment: {experiment}')

# Plot forecast traces.
for i, issue_time in enumerate(issue_times[:1000]):
    ds_issue = ds.sel(time=issue_time)
    lead_times = issue_times[i:i+num_lead_times]
    forecast_values = ds_issue.prediction.values
    if len(lead_times) == len(forecast_values):
      fig.add_trace(
          go.Scatter(
              x=lead_times,
              y=forecast_values,
              showlegend=False
          )
      )

# Add the time slider.
fig.update_layout(xaxis=dict(rangeslider=dict(visible=True), type='date'))
fig.show()

<Figure size 1200x600 with 0 Axes>

# Gauge Metrics

In [None]:
@widgets.interact(
    metrics = widgets.SelectMultiple(
        options=available_metrics,
        value=visualization.DEFAULT_METRICS,
    )
)
def interactive_scores_table(
    metrics,
    gauge=gauges,
    lead_time=lead_times,
):
  visualization.scores_table(
      gauge=gauge,
      lead_time=lead_time,
      metrics=metrics,
      metrics_ds=metrics_ds,
  )

# Inspect Experiments

## Map of Cross Validation Splits

In [None]:
# Load mapping from gauge to model run directory.
with open(provider.GAUGE_TO_MODEL_PATH_MAPPING_FILE, 'r') as f:
  gauge_to_model_path = json.load(f)

In [None]:
@widgets.interact
def interactive_plot_cross_validation_splits(
    experiment=gauge_to_model_path.keys(),
):
  gauge_to_split = {
      gauge: os.path.basename(model_path)
        for gauge, model_path in gauge_to_model_path[experiment].items()
  }
  cross_validation_splits = {split: [] for split in set(gauge_to_split.values())}
  for gauge, split in gauge_to_split.items():
    cross_validation_splits[split].append(gauge)

  basin_map = visualization.plot_cross_validation_experiment_folium(
    splits=cross_validation_splits,
    geography=geography_gdf,
    map_center=provider.MAP_CENTER,
    initial_map_zoom=provider.MAP_ZOOM,
    height=500,
    width='50%',
  )
  return basin_map.add_child(folium.LayerControl())

In [None]:
@widgets.interact
def interactive_plot_cross_validation_splits(
    experiment=gauge_to_model_path.keys(),
):
  gauge_to_split = {gauge: os.path.basename(model_path) for gauge, model_path in gauge_to_model_path[experiment].items()}
  cross_validation_splits = {split: [] for split in set(gauge_to_split.values())}
  for gauge, split in gauge_to_split.items():
    cross_validation_splits[split].append(gauge)

  visualization.plot_cross_validation_experiment_cartoon(
    splits=cross_validation_splits,
    geography=geography_gdf,
    provider_countries=provider.COUNTRIES,
  )

## Score Map

In [None]:
@widgets.interact
def interactive_score_map(
    metric=available_metrics,
    experiment=experiments,
    lead_time=lead_times,
):
  visualization.score_map(
      basin_geometries=geography_gdf,
      experiment=experiment,
      metric=metric,
      lead_time=lead_time,
      metrics=metrics_ds,
      provider_countries=provider.COUNTRIES,
  )

## Score Distribution Boxplots

In [None]:
@widgets.interact(
    experiments = widgets.SelectMultiple(
        options=experiments,
        value=experiments,
        desctiption='Experiment(s)'
    )
)
def interactive_score_distribution_box_plot(
    experiments,
    metric=available_metrics,
):
  visualization.score_distribution_box_plot(
      metrics=metrics_ds,
      experiments=experiments,
      gauges=gauges,
      metric=metric,
  )



## Score Distribution CDFs

In [None]:
@widgets.interact(
    experiments_options = widgets.SelectMultiple(
        options=experiments,
        value=experiments,
        desctiption='Experiment(s)'
    ),
    lead_times_options = widgets.SelectMultiple(
        options=lead_times,
        value=lead_times[:1],
        desctiption='Lead Time(s)'
    )
)
def interactive_plot_cdfs(
    experiments_options,
    lead_times_options,
):
  visualization.plot_cdfs(
      metrics=metrics_ds[visualization.DEFAULT_METRICS],
      experiments=experiments_options,
      lead_times=lead_times_options,
      gauges=gauges,
  )
  plt.show()



## Score Difference Map

In [None]:
@widgets.interact
def interactive_score_map(
    metric=available_metrics,
    experiment=experiments,
    baseline_experiment=experiments,
    lead_time=lead_times,
):
  visualization.score_map(
      basin_geometries=geography_gdf,
      experiment=experiment,
      baseline_experiment=baseline_experiment,
      metric=metric,
      lead_time=lead_time,
      metrics=metrics_ds,
      provider_countries=provider.COUNTRIES,
  )