This notebook is used to create 10 sets of stations, each with 7 stations and use them to create 10 sets of optimization

In [1]:
import json
import numpy as np

In [2]:
np.random.seed(101)

In [3]:
stations_specs = json.load(open(('/work/yadegari/single_basin_optim_2/stations.json')))
sts_names = stations_specs.keys()

In [4]:
station_sets = np.random.choice(np.array(list(sts_names)), 7 * 10, replace=False).reshape(10, 7)

In [5]:
json.dump(
    {
        f"set_{k}": list(v) for (k, v) in enumerate(station_sets, start=1)
    },
    open('stations_study.json', 'w'), 
    indent=4
)

two directories: `with_bucket_system` and `without_bucket_system`

In [6]:
import os
from path import Path

In [7]:
control_str = """
mpr_tf = "{mpr_tf}"

training = \
{training_dictionary}

validation = {{}}

penalty = {{}}

params = \
{params}
"""

In [15]:
submit_optim="""#!/usr/bin/bash

#SBATCH --ntasks=8
#SBATCH --time=192:00:00
#SBATCH --output=./LOG.run.%j.out
#SBATCH --error=./LOG.run.%j.err
#SBATCH --mem-per-cpu=32G
#SBATCH --export=ALL
#SBATCH --cpus-per-task=2
#SBATCH --job-name={job_name}

# either OpenMPI or comparable library
ml OpenMPI

set -x
restart_option=""
while getopts ':r' 'OPTKEY'; do
    case ${{OPTKEY}} in
        'r')
            restart_option="-r"
            ;;
    esac
done


export OMPI_MCA_mpi_warn_on_fork=0
export OMP_NUM_THREADS=${{SLURM_CPUS_PER_TASK}}

srun python -m mpi4py.futures {optim_path}/driver.py \\
         -n {num_iterations} \\
         -c ./control_file_squash.py \\
         --clean-completed \\
         $restart_option \\
         -l {optim_path}/basin_lut.org
"""

In [9]:
import re

def get_training_dict(stations, station_specs, warmup):
    return {
        re.match('.+_(\d+)', st)[1]: 
        {'year_begin': station_specs[st]['yb'][0] ,
         'year_end': station_specs[st]['yb'][0] + station_specs[st]['nyrs'][0] - 1,
         'warmup': warmup
        } for st in stations
    }

In [10]:
def shell_cmd(cmd):
    out, err = sp.Popen(cmd,
                        shell=True,
                        stderr=sp.PIPE,
                        stdout=sp.PIPE,
                        text=True
                       ).communicate()
    if err:
        print(err)

In [11]:
import subprocess as sp

def prepare_domains(optim_path,
                    mpr_tf,
                    params,
                    study_name,
                    sts,
                    stations_specs,
                    warmup,
                    prepare_basin_cmd,
                    control_str,
                    squash_cmd,
                    num_iteration_per_parameter):
    with Path(study_name).mkdir():
        for optim_set, stations in sts.items():
            with Path(optim_set).mkdir():
                training_dict = get_training_dict(stations, stations_specs, warmup)
                with open('control.py', 'w') as fh:
                    fh.writelines(
                        control_str.format(
                            mpr_tf=mpr_tf,
                            training_dictionary=json.dumps(training_dict, indent=4),
                            params=json.dumps(params, indent=4)
                        )
                    )
                with open('submit_optim.sh', 'w') as fh:
                    fh.writelines(
                        submit_optim.format(
                            job_name=f'{study_name}_{optim_set}',
                            optim_path=optim_path,
                            num_iterations=f'{num_iteration_per_parameter * len(params)}'
                        )
                    )
                shell_cmd(prepare_basin_cmd)
                shell_cmd(squash_cmd)
                

here are the parameters that are shared between two sets

In [12]:
optim_path = '/work/yadegari/optim_crash/'
station_sets = json.load(open('stations_study.json'))


prepare_basin_cmd = \
f'python \
{optim_path}/prepare_domains.py \
-c ./control.py \
-l {optim_path}/basin_lut.org'



parameters for set **witohout** water bucket 

In [13]:
mpr_tf = 'zacharias_res_new'
study_name = 'without_bucket_system'
params = json.load(open('params_without_waterbucket.json'))

squash_cmd = \
f'python {optim_path}/squash_multiyear.py \
-c ./control.py \
-l {optim_path}/basin_lut.org \
--disable-wbcheck'

prepare_domains(optim_path=optim_path,
                mpr_tf=mpr_tf,
                params=params,
                study_name=study_name,
                sts=station_sets,
                stations_specs=stations_specs,
                warmup=365,
                prepare_basin_cmd=prepare_basin_cmd,
                control_str=control_str,
                squash_cmd=squash_cmd,
                num_iteration_per_parameter=100
               )

parameters for set **with** water bucket 

In [14]:
mpr_tf = 'zacharias_mhm'
study_name = 'with_bucket_system'
params = json.load(open('params_with_waterbucket.json'))

squash_cmd = \
f'python {optim_path}/squash_multiyear.py \
-c ./control.py \
-l {optim_path}/basin_lut.org \
--disable-wbcheck \
--htessel-exec master1s_bucket.exe'

prepare_domains(optim_path=optim_path,
                mpr_tf=mpr_tf,
                params=params,
                study_name=study_name,
                sts=station_sets,
                stations_specs=stations_specs,
                warmup=365,
                prepare_basin_cmd=prepare_basin_cmd,
                control_str=control_str,
                squash_cmd=squash_cmd,
                num_iteration_per_parameter=100
               )