# Managing adsorption film workflows

In [None]:
%load_ext autoreload

In [None]:
%autoreload 2

In [None]:
%config Application.log_level="WARN"

## Init

In [None]:
from sys import path

In [None]:
# NEMO
path.append('/home/fr/fr_fr/fr_jh1130/git/fireworks-private/jlh')

In [None]:
# JURECA
path.append('/homea/hka18/hka184/git/fireworks-private/jlh')

In [None]:
import sys

In [None]:
print( sys.executable )

In [None]:
# tune noteboook width to desired value
# the margin-left option is necessary if toc is displayed on left hand side
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100%; margin-left:15%; margin-right:0 !important; }</style>"))

In [None]:
import logging
import os
import datetime
from pprint import pprint

In [None]:
logging.getLogger().handlers #Delete Jupyter notebook root logger handler
logger = logging.getLogger()
logger.handlers = []

#Create logger as usual
logger = logging.getLogger(__name__)
logger.setLevel(logging.WARN)

In [None]:
logging.basicConfig(level=logging.WARN)

In [None]:
import pandas as pd

In [None]:
from fireworks import Firework, Workflow
from fireworks.user_objects.firetasks.jlh_tasks import DummyParentTask, RecoverPackmolTask

In [None]:
from fwtools.JobAdmin import JobAdmin
from fwtools.JobAdmin import l_CTAB, head_atom_number_CTAB, tail_atom_number_CTAB
from fwtools.JobAdmin import l_SDS, head_atom_number_SDS, tail_atom_number_SDS

In [None]:
# NEMO
# module use /work/ws/nemo/fr_lp1029-IMTEK_SIMULATION-0/modulefiles within wrapper
# double braces to protect from python replacement
# lmp_cmd = ' '.join((
#     'module purge;',
#     'module load lammps/16Mar18-gnu-5.2-openmpi-2.1;',
#     'mpirun ${{MPIRUN_OPTIONS}} lmp -in {inputFile:s}' ))
# integrated into jobadmin (20180917)

In [None]:
# JUWELS
# module use /work/ws/nemo/fr_lp1029-IMTEK_SIMULATION-0/modulefiles within wrapper
# double braces to protect from python replacement
# lmp_cmd = ' '.join((
#     'module use /gpfs/homea/hka18/hka184/modules/modulefiles;',
#     'module load jlh/lammps/16Mar18-intel;',
#     'srun lmp -in {inputFile:s}' ))
# integrated into jobadmin (20180917)

In [None]:
# JURECA / JUWELS
juwels_path_prefix = os.sep + 'gpfs'
prefix = juwels_path_prefix + os.getcwd()

In [None]:
# prefix automatically for NEMO
prefix = os.getcwd()

In [None]:
# manually for JUWELS:
prefix = '/gpfs/homea/hka18/hka184/jobs/lmplab/ctab/201809'

In [None]:
# manually for NEMO
prefix = '/work/ws/nemo/fr_jh1130-201708-0/jobs/lmplab/ctab/201809'

In [None]:
template_prefix = prefix + os.sep + 'N_surfactant_on_substrate_template'

In [None]:
output_prefix = prefix + os.sep + 'sys'

In [None]:
# minimal JobAdmin instance
jobadmin = JobAdmin( 
    queue   = 'NEMO',
    template_prefix = template_prefix,
    output_prefix   = output_prefix
)

In [None]:
# pull simulation datafile from db
content, doc = jobadmin.fp.get_file(identifier='surfactant_on_AU_111_df_json')

In [None]:
sim_df = pd.read_json(content, orient='index')

In [None]:
# OR: read simulation data from file
# sim_df = pd.read_pickle(sim_data_prefix + os.sep + 'CTAB_on_AU_111.pkl')

In [None]:
sim_df['sf_preassembly'].unique()

In [None]:
jobadmin.use_nemo_queue()
jobadmin.use_nemo_templates()

In [None]:
jobadmin.use_juwels_queue()
jobadmin.use_juwels_templates()

In [None]:
jobadmin._sim_df = sim_df

In [None]:
# jobadmin._sim_df = sim_df.loc[
#     (sim_df['sf_preassembly'] == 'cylinders_with_counterion') \
#     | (sim_df['sf_preassembly'] == 'hemicylinders_with_counterion') ]

In [None]:
os.getlogin()

In [None]:
template_geninfo = jobadmin._template_geninfo

template_geninfo

In [None]:
# on JURECA
template_geninfo = jobadmin._template_geninfo
time=datetime.datetime.now().ctime()
user = !whoami
machine = os.uname().nodename
geninfo = template_geninfo.format(datetime=time,user=user[0],machine=machine)
# rester to avoid duplicate recognition
jobadmin.geninfo(reset=True,static_str=geninfo)

In [None]:
# on NEMO
jobadmin.geninfo(reset=True)

## Reinit

### on JUWELS

In [None]:
jobadmin = JobAdmin( 
    queue   = 'JUWELS',
    template_prefix = template_prefix,
    output_prefix   = output_prefix,
    sim_df = sim_df
)
jobadmin.std_worker = 'juwels_noqueue'
jobadmin.use_juwels_queue()
jobadmin.use_juwels_templates()
jobadmin.geninfo(reset=True)

In [None]:
sub_sim_df = sim_df[ sim_df["indenter"].isna() ] # select only systems without indenter

In [None]:
len(sub_sim_df)

### on NEMO
rerun everytime methods have been added or their def's altered in order to reinstantiate JobAdmin object

In [None]:
#  NEMO
from fwtools.JobAdmin import JobAdmin
from fwtools.JobAdmin import l_CTAB, head_atom_number_CTAB, tail_atom_number_CTAB
from fwtools.JobAdmin import l_SDS, head_atom_number_SDS, tail_atom_number_SDS

prefix = '/work/ws/nemo/fr_jh1130-201708-0/jobs/lmplab/ctab/201809'
template_prefix = prefix + os.sep + 'N_surfactant_on_substrate_template'
output_prefix = prefix + os.sep + 'sys'

jobadmin = JobAdmin( 
    queue   = 'NEMO',
    template_prefix = template_prefix,
    output_prefix   = output_prefix,
    sim_df = sim_df
)
jobadmin.std_worker = 'nemo_noqueue'
jobadmin.use_nemo_templates()
jobadmin.use_nemo_queue()
jobadmin._template_lmp_cmd = ' '.join((
    'module purge;',
    'module use ${{HOME}}/modulefiles;',
    'module load lammps/16Mar18-gnu-7.3-openmpi-3.1-colvars-20Sep18;',
    'mpirun ${{MPIRUN_OPTIONS}} lmp -in {inputFile:s}'))
jobadmin.geninfo(reset=True)

## Pick subselection of systems

In [None]:
# several samples for system selections
# system_name_scope should be the overall scope of systems to look at, i.e. all CTAB systems
# system_names is meant as a subset of system_nbame_scope to be worked on 

In [None]:
preassemblies = {
    'cylinders_with_counterion',
    'hemicylinders_with_counterion',
    'bilayer_with_counterion',
    'monolayer_with_counterion',
    'inverse_monolayer_with_counterion'}

In [None]:
type(preassemblies)

In [None]:
system_name_scope = list( sim_df[ 
    sim_df['sf_preassembly'].isin(preassemblies) ].index
                   )

In [None]:
# original SDS systems without indenter
system_name_scope = list( sim_df[ 
        sim_df['indenter'].isna() & \
        (sim_df['surfactant'] == 'SDS')
    ].index )

In [None]:
system_name_scope = list( sim_df[ 
    sim_df['sf_preassembly'].isin(preassemblies) ].index
                   )

In [None]:
# original systems without indenter
system_name_scope = list( sim_df[ 
        sim_df['indenter'].isna()
    ].index )

In [None]:
# original CTAB systems without indenter
system_name_scope = list( sim_df[ 
        sim_df['indenter'].isna() & \
        (sim_df['surfactant'] == 'CTAB' )
    ].index )

In [None]:
# CTAB with indenter
system_name_scope = list( sim_df[ 
    (sim_df['indenter'] == '50Ang_stepped') &
    (sim_df['surfactant'] == 'CTAB' ) ].index )

In [None]:
# original CTAB systems without indenter, counterions at polar heads
system_name_scope = list( sim_df[ 
        sim_df['indenter'].isna() & \
        (sim_df['surfactant'] == 'CTAB') & \
        (sim_df['ci_initial_placement'] == 'at_polar_head') & \
        (sim_df["sb_name"] == 'AU_111_63x36x2') & \
        (sim_df['sf_preassembly'] != 'inverse_monolayer_with_counterion')
    ].index )

# Initial systems (no indenter)

## Prepare substrates

if necessary

In [None]:
# TODO: change to "wrapper" & batch_prepare_fw stlye, see other fireqworks below

In [None]:
prep_subs_wf_dict = jobadmin.prepare_substrates(system_names)

In [None]:
prep_subs_fw_id_dict = jobadmin.add_fw(prep_subs_wf_dict)

## Prepare aggregates

In [None]:
# TODO: switch monolayers and bilayers to wrapper & batch_prepare_fw 

### (hemi-) cylinders

In [None]:
jobadmin.packmol_queueadapter

In [None]:
jobadmin.packmol_queueadapter["walltime"] = '96:00:00'

In [None]:
pack_cylinders_wrapper = lambda sfN, sb_measures, surfactant, counterion: \
    jobadmin.pack_cylinders(sfN, sb_measures, surfactant, counterion,
        hemicylinders=True, l_surfactant=l_CTAB, 
        head_atom_number=head_atom_number_CTAB,
        tail_atom_number=tail_atom_number_CTAB)

In [None]:
prepare_packmol_wrapper = lambda system_name: jobadmin.prepare_packmol(
    system_name, pack_cylinders_wrapper, nloop=200, maxit=500)

In [None]:
fw_dict = jobadmin.batch_prepare_fw( system_names, prepare_packmol_wrapper )

In [None]:
fw_dict[system_names[0]].as_dict()

In [None]:
jobadmin.add_wf(fw_dict)

In [None]:
logging.info("Querying appended packing workflows.")
packmol_fw_id_dict = jobadmin.query_systems(
    system_names, 
    step='packmol',
    state=["FIZZLED","COMPLETED", "RUNNING", "READY", "WAITING", "RESERVED"])


In [None]:
packmol_fw_id_dict

In [None]:
pdb2lmp_wf_dict = jobadmin.batch_prepare_fw(system_names, jobadmin.prepare_pdb2lmp)

In [None]:
pdb2lmp_wf_dict[system_names[0]].as_dict()

In [None]:
logging.info("Appening LAMMPS conversion")
jobadmin.append_wf_by_key(pdb2lmp_wf_dict, packmol_fw_id_dict)


In [None]:

logging.info("Querying appended LAMMPS conversion workflows")
ch2lmp_fw_id_dict = jobadmin.query_step(step='ch2lmp')

### (inverse) monolayer

In [None]:
## Template for monolayer packing
logging.info("Pack systems.")
# for normal monlayer (charged head towards surface)
# pack_wf_dict = jobadmin.pack_monolayer(
#     system_names=system_names,                     
#     l_surfactant=l_CTAB, 
#     head_atom_number=head_atom_number_CTAB,
#     tail_atom_number=tail_atom_number_CTAB)

# inverse monolayer
pack_wf_dict = jobadmin.pack_monolayer(
    system_names=system_names,                     
    l_surfactant=l_CTAB, 
    head_atom_number=tail_atom_number_CTAB,
    tail_atom_number=head_atom_number_CTAB)

logging.info("Append packing")
jobadmin.add_wf(pack_wf_dict)

logging.info("Querying appended packing workflows.")
packmol_fw_id_dict = jobadmin.query_systems(
    system_names, 
    step='packmol',
    state=["FIZZLED","COMPLETED", "RUNNING", "READY", "WAITING", "RESERVED"])

# fizzled might be ok if walltime expired, 
# packmol gets good results half-way

logging.info("Preparing LAMMPS conversion")
pdb2lmp_wf_dict = jobadmin.prepare_pdb2lmp(system_names)
logging.info("Appening LAMMPS conversion")
jobadmin.append_wf_by_key(pdb2lmp_wf_dict, packmol_fw_id_dict)

logging.info("Querying appended LAMMPS conversion workflows")
ch2lmp_fw_id_dict = jobadmin.query_step(step='ch2lmp')

### bilayer

In [None]:
sim_df["sb_name"].unique()

In [None]:
system_names = list( sim_df[ 
    (sim_df['sb_name'] == 'AU_111_63x36x2') \
    & (sim_df['sf_preassembly'] == 'bilayer_with_counterion') ].index )

In [None]:
system_names

In [None]:
# Template for bilayer packing
logging.info("Pack systems.")
pack_wf_dict = jobadmin.pack_bilayer(
    system_names=system_names,                     
    l_surfactant=l_CTAB, 
    head_atom_number=head_atom_number_CTAB,
    tail_atom_number=tail_atom_number_CTAB)

logging.info("Append packing")
jobadmin.add_wf(pack_wf_dict)

In [None]:
logging.info("Querying appended packing workflows.")
packmol_fw_id_dict = jobadmin.query_systems(
    system_names, 
    step='packmol',
    state=["FIZZLED","COMPLETED", "RUNNING", "READY", "WAITING", "RESERVED"])

# fizzled might be ok if walltime expired, 
# packmol gets good results half-way

logging.info("Preparing LAMMPS conversion")
pdb2lmp_wf_dict = jobadmin.prepare_pdb2lmp(system_names)
logging.info("Appening LAMMPS conversion")
jobadmin.append_wf_by_key(pdb2lmp_wf_dict, packmol_fw_id_dict)

logging.info("Querying appended LAMMPS conversion workflows")
ch2lmp_fw_id_dict = jobadmin.query_step(step='ch2lmp')

## Query

In [None]:
# check where subsequent steps are missing

In [None]:
to_initialize_formats = jobadmin.identifyIncompleteWorkflowsInScope(
    system_name_scope=system_name_scope, precedent_step_name='packmol', subsequent_step_name='ch2lmp')

In [None]:
to_initialize_formats

In [None]:
# Use ch2lmp as reference
to_minimize = identifyIncompleteWorkflowsInScope(precedent_step_name='ch2lmp',
    subsequent_step_name='minimization', system_name_scope=system_name_scope)

In [None]:
to_minimize

In [None]:
to_minimize = identifyIncompleteWorkflowsInScope(precedent_step_name='prepare_system_files',
    subsequent_step_name='minimization', system_name_scope=system_name_scope)

In [None]:
to_minimize

In [None]:
to_equilibrate_nvt = identifyIncompleteWorkflowsInScope(
    precedent_step_name='minimization', subsequent_step_name='equilibration_nvt',
    system_name_scope=system_name_scope)

In [None]:
to_equilibrate_nvt

In [None]:
to_equilibrate_npt = identifyIncompleteWorkflowsInScope(
    precedent_step_name='equilibration_nvt',
    subsequent_step_name='equilibration_npt',
    system_name_scope=system_name_scope)

In [None]:
to_equilibrate_npt

In [None]:
to_run_production = identifyIncompleteWorkflowsInScope(
    precedent_step_name='equilibration_npt',
    subsequent_step_name='10ns_production_mixed',
    system_name_scope=system_name_scope)

In [None]:
to_run_production

In [None]:
unfinished_systems = to_run_production.keys()

In [None]:
previous_runs = jobadmin.query_systems(unfinished_systems,step='10ns_production_mixed')

In [None]:
previous_runs

## LAMMPS jobs

In [None]:
# TODO: switch to wrapper & batch_prepare_fw style

In [None]:
# Template for appending LAMMPS runs
prep_fw_dict = jobadmin.prepare_systems(system_names)
jobadmin.append_wf_by_key( prep_fw_dict, ch2lmp_fw_id_dict )
prep_fw_id_dict = jobadmin.query_step(
    step = 'prepare_system_files')

In [None]:
# TODO: update "prepare_restarts" to enable restarts with colvars module
prod_fw_dict = jobadmin.prepare_production(system_names)
jobadmin.append_wf_by_key(prod_fw_dict, npt_fw_id_dict)
prod_fw_id_dict = jobadmin.query_systems(
    system_names, 
    step='10ns_production_mixed',
    state=["COMPLETED", "RUNNING", "READY", "WAITING", "RESERVED"])

restart_fw_dict = jobadmin.prepare_restarts(system_names)
jobadmin.append_wf_by_key(restart_fw_dict, prod_fw_id_dict)

### Preparation

In [None]:
fw_id_dict = jobadmin.query_systems(system_names,step="prepare_system_files")

or

In [None]:
system_names = list( to_minimize.keys() )

In [None]:
to_minimize = { key: sorted(list(val)) for key, val in to_minimize.items() }

In [None]:
to_minimize

In [None]:
# Certain steps push representative data files to the database.

In [None]:
pull_datafile_from_db_wrapper = lambda system_name: \
    jobadmin.pull_datafile_from_db(system_name, data_file_identifier_suffix='_psfgen.data')

In [None]:
fw_dict = jobadmin.batch_prepare_fw( system_names, pull_datafile_from_db_wrapper )

In [None]:
jobadmin.append_wf_by_key( fw_dict, fw_id_dict )

### Minimization

In [None]:
to_minimize = identifyIncompleteWorkflowsInScope(precedent_step_name='pull_datafile_from_db',
    subsequent_step_name='minimization', system_name_scope=system_name_scope)

In [None]:
to_minimize

In [None]:
fw_id_dict = jobadmin.query_systems(
    system_names,
    step='pull_datafile_from_db', 
    state=["COMPLETED", "RUNNING", "READY", "WAITING", "RESERVED"])

In [None]:
minimize_wrapper = lambda system_name: jobadmin.minimize(
    system_name,
    lmp_suffix_template=' '.join((
        '-v baseName {baseName:s} -v dataFile {dataFile:s} -v has_indenter 0',
        '-v robust_minimization 0 -v pbc2d 0 -v surfactant_name CTAB -v mpiio 0')))

In [None]:
fw_dict = jobadmin.batch_prepare_fw( system_names, minimize_wrapper )

In [None]:
jobadmin.append_wf_by_key( fw_dict, fw_id_dict )

### NVT equilibration

In [None]:
system_names

In [None]:
fw_id_dict = jobadmin.query_systems(
    system_names,
    step='minimization', 
    state=["COMPLETED", "RUNNING", "READY", "WAITING", "RESERVED"])

In [None]:
fw_id_dict

In [None]:
nvtEquilibrate_wrapper = lambda system_name: jobadmin.nvtEquilibrate(
    system_name,
    lmp_suffix_template=' '.join((
        '-v baseName {baseName:s} -v dataFile {dataFile:s}', 
        '-v has_indenter 0 -v pbd2d 0 -v reinitialize_velocities 1',
        '-v surfactant CTAB -v mpiio 0')))

In [None]:
fw_dict = jobadmin.batch_prepare_fw( system_names, nvtEquilibrate_wrapper )

In [None]:
jobadmin.append_wf_by_key(fw_dict, fw_id_dict)

### NPT equilibration

In [None]:
system_names

In [None]:
fw_id_dict = jobadmin.query_systems(
    system_names,
    step='equilibration_nvt', 
    state=["COMPLETED", "RUNNING", "READY", "WAITING", "RESERVED"])

In [None]:
fw_id_dict

In [None]:
nptEquilibrate_wrapper = lambda system_name: jobadmin.nptEquilibrate(
    system_name,
    lmp_suffix_template= ' '.join((
        '-v baseName {baseName:s}',
        '-v dataFile {dataFile:s}',
        '-v has_indenter 0',
        '-v pbd2d 0 -v reinitialize_velocities 0',
        '-v surfactant CTAB')) )

In [None]:
fw_dict = jobadmin.batch_prepare_fw( system_names, nptEquilibrate_wrapper )

In [None]:
jobadmin.append_wf_by_key(fw_dict, fw_id_dict)

## Check progress and (re-) run if necessary

In [None]:
# REWORK functionality here, outdated

In [None]:
healthy_production_runs = jobadmin.get_set_of_healthy_production_runs()

In [None]:
completed_production_runs = jobadmin.get_set_of_evolved_systems()

In [None]:
system_names = list(completed_production_runs & set(system_name_scope))

In [None]:
system_names

In [None]:
incomplete_but_healthy_production_runs = healthy_production_runs - completed_production_runs

In [None]:
incomplete_but_healthy_production_runs

In [None]:
discontinued_production_runs = jobadmin.get_discontinued_production_runs()

In [None]:
discontinued_production_runs

In [None]:
system_name_scope = system_names

In [None]:
to_prepare = jobadmin.get_to_prepare()

In [None]:
system_names = list( set(system_name_scope) & set(to_prepare) )

In [None]:
to_minimize = jobadmin.get_to_minimize()

In [None]:
system_names = list( set(system_name_scope) & set(to_minimize) )

In [None]:
system_names

### NVT eq

In [None]:
to_nvt_equilibrate = jobadmin.get_to_nvt_equlibrate()

In [None]:
to_nvt_equilibrate

In [None]:
system_names = list(set(to_nvt_equilibrate) & set(system_name_scope))

In [None]:
system_names

### NPT eq

In [None]:
to_npt_equilibrate = jobadmin.get_to_npt_equlibrate()

In [None]:
system_names = list(set(to_npt_equilibrate) & set(system_name_scope))

In [None]:
system_names

### Production

In [None]:
to_run_production = jobadmin.get_to_run_production()

In [None]:
system_names = list(set(to_run_production) & set(system_name_scope))

In [None]:
# manually delete a file from Filepad db
jobadmin.fp.delete_file(
    identifier='1010_CTAB_on_AU_111_63x36x2_cylinders_with_counterion_psfgen.data')

## Recovering stuff

### Defuse dead ends & children

In [None]:
steps = ['prepare_system_files',
         'minimization', 
         'equilibration_nvt', 
         'equilibration_npt',
        ]

In [None]:
step_fizzled_dict = {}
for step in steps:
    step_fizzled_dict[step] = jobadmin.query_step(
        step = step, state_list = ["FIZZLED"] )

In [None]:
reduced_step_fizzled_dict = {}
for step, fw_id_dict in step_fizzled_dict.items():
    reduced_step_fizzled_dict[step] = { system_name: fw_id_lst \
        for system_name, fw_id_lst in fw_id_dict.items() \
        if system_name in system_name_scope }

In [None]:
reduced_step_fizzled_dict

In [None]:
defused_fw_id_list = []
for step, fw_id_dict in reduced_step_fizzled_dict.items():
    for system_name, fw_id_lst in fw_id_dict.items():
        for fw_id in fw_id_lst:
            logging.info("Defuse {} : {} : {} and its children...".format(
                step, system_name, fw_id ) )
            defused_fw_id_list.extend( jobadmin.defuse_children(fw_id) )
            


### Recover packmol runs
packmol runs can fail due to walltime. However, often the (not yet converged) configurations are fine to use.

In [None]:
packmol_fw_id_dict = jobadmin.query_step(step='packmol')

In [None]:
packmol_fw_id = packmol_fw_id_dict['1298_CTAB_on_AU_111_63x36x2_cylinders_with_counterion'][-1]

In [None]:
packmol_fw_id

In [None]:
packmol_launchdir = jobadmin.lpad.get_launchdir(packmol_fw_id)

In [None]:
system_name = '1298_CTAB_on_AU_111_63x36x2_cylinders_with_counterion'

In [None]:
# for systems with "fizzled" packmol tasks, append "dummy" restart parents
# They run in the same directory as previous fizzled packmol and forward the latest configurations
dummy_parent_fw_dict = {}
for system_name in system_names:
    packmol_launchdir = jobadmin.lpad.get_launchdir( 
        packmol_fw_id_dict[system_name][-1] )
    dummy_parent_fw = Firework( [
            DummyParentTask() 
        ],
        spec = {
            '_category':   jobadmin.std_worker,
            '_launch_dir': packmol_launchdir,
            '_files_out': {
                'packmol_pdb': system_name + '_packmol.pdb',
                'packmol_pdb_FORCED': system_name + '_packmol.pdb_FORCED'
            },
            'system_name': system_name,
            'geninfo':     jobadmin.geninfo(),
            'step':        'packmol_dummy'
        },
        name = system_name + '_dummy_parent'
    )
    dummy_parent_fw_dict[system_name] = dummy_parent_fw

In [None]:
jobadmin.append_wf_by_key(dummy_parent_fw_dict, packmol_fw_id_dict)

In [None]:
packmol_dummy_fw_id_dict = jobadmin.query_step('packmol_dummy')

In [None]:
packmol_dummy_fw_id_dict

In [None]:
system_names

In [None]:
# delete previously written packmol results from db 
# (otherwise, will not be overwritten by recovered packmol results)
for system_name in system_names:
    file_id = system_name + '_psfgen.data'
    jobadmin.fp.delete_file(file_id)

In [None]:
# sample for a single packmol recovery task
recover_packmol_fw = Firework( 
    RecoverPackmolTask( { 
        'dest':   output_prefix + os.sep + system_name,
        'recover': True,
        'glob_patterns': [ '*_restart.pack',
            '*_packmol.pdb', '*_packmol.pdb_FORCED', '*_packmol.inp'],
        'forward_glob_patterns': { 
            "packmol_pdb" : ["*_packmol.pdb_FORCED", "*_packmol.pdb"] }
    } ),
    spec = {
        '_category':   jobadmin.std_worker,
        '_files_in': {
            'packmol_pdb': system_name + '_packmol.pdb',
            'packmol_pdb_FORCED': system_name + '_packmol.pdb_FORCED'
        },
        'system_name': system_name,
        'step':        'recover_packmol',
        'geninfo':     jobadmin.geninfo()
    },
    name = system_name + '_recover_packmol'
)

In [None]:
jobadmin.lpad.append_wf( Workflow( [ recover_packmol_fw ] ), [8976] )

## Append data file strorage
after successfull production, insert final configurationms into database

In [None]:
# first, identify files not yet stored

In [None]:
completed_10ns_productions_dict = jobadmin.query_step(step="10nsa_production_mixed", state_list=["COMPLETED"])

In [None]:
completed_store_data_file_dict = jobadmin.query_step(step="store_data_file", state_list=["COMPLETED"])

In [None]:
system_names = list( 
    set(completed_10ns_productions_dict.keys()) - set(completed_store_data_file_dict.keys())
)

In [None]:
system_names = list(set(system_names) & set(system_name_scope))

In [None]:
fw_id_dict = jobadmin.query_systems(system_names, step='10ns_production_mixed', state='COMPLETED')

In [None]:
fw_id_dict

In [None]:
fw_dict = jobadmin.batch_prepare_fw( system_names, jobadmin.store_data_file )

In [None]:
len(fw_dict)

In [None]:
jobadmin.append_wf_by_key(fw_dict, fw_id_dict)

In [None]:
# identify fizzled fireworks and rerun

In [None]:
fizzled_fw_id_dict = jobadmin.query_step(
    step='store_data_file', 
    state_list=["FIZZLED"])

In [None]:
fizzled_fw_id_dict

In [None]:
for key, fw_id_lst in fizzled_fw_id_dict.items():
    jobadmin.lpad.rerun_fw(fw_id_lst[-1], rerun_duplicates=False)

In [None]:
fizzled_fw_id_dict = jobadmin.query_systems(
    system_names, 
    step='store_data_file', 
    state=["FIZZLED"])

# Indenter systems


## Query finished interface productions

In [None]:
# original CTAB systems without indenter
system_name_scope = list( sim_df[ 
        sim_df['indenter'].isna() & \
        (sim_df['surfactant'] == 'CTAB')
    ].index )

In [None]:
fw_id_dict = jobadmin.query_step(
    step='store_data_file', 
    state_list=["COMPLETED", "RUNNING", "READY", "WAITING", "RESERVED"])

In [None]:
len(fw_id_dict)

In [None]:
system_names = set(system_name_scope) & set(fw_id_dict.keys())

In [None]:
len(system_names)

In [None]:
system_names

## Workflow build up

### Initiate indenter workflows

In [None]:
interface_file_suffix = '_10ns.lammps'
indenter = '50Ang_stepped'
indenter_file = '50Ang_stepped.pdb'

In [None]:
# manual selectin of FINISHED, STORED non-indenter systems
system_names = {
    '1010_CTAB_on_AU_111_63x36x2_bilayer_with_counterion',
    '1010_CTAB_on_AU_111_63x36x2_monolayer_with_counterion',
    '1298_CTAB_on_AU_111_63x36x2_bilayer_with_counterion',
    '367_CTAB_on_AU_111_63x36x2_monolayer_with_counterion',
    '415_CTAB_on_AU_111_63x36x2_bilayer_with_counterion',
    '415_CTAB_on_AU_111_63x36x2_hemicylinders_with_counterion',
    '515_CTAB_on_AU_111_63x36x2_bilayer_with_counterion',
    '515_CTAB_on_AU_111_63x36x2_hemicylinders_with_counterion',
    '653_CTAB_on_AU_111_63x36x2_bilayer_with_counterion',
    '653_CTAB_on_AU_111_63x36x2_hemicylinders_with_counterion',
    '653_CTAB_on_AU_111_63x36x2_monolayer_with_counterion'}

In [None]:
len(system_names)

In [None]:
# construct names for new systems containing indenter
indenter_system_names_dict = {}
for system_name in system_names:
    indenter_system_names_dict[ system_name + '_' + indenter ] = system_name 

In [None]:
indenter_system_names_dict

In [None]:
# CTAB systems with indenter
system_name_scope = list( sim_df[ 
        (sim_df['indenter'] ==  indenter) & \
        (sim_df['surfactant'] == 'CTAB')
    ].index )

In [None]:
system_names = set(indenter_system_names_dict.keys()) & set(system_name_scope)

In [None]:
len(system_names)

In [None]:
jobadmin.geninfo(reset=True)

In [None]:
for system_name in system_names:
    interface_file = indenter_system_names_dict[system_name] + interface_file_suffix
    fw = jobadmin.initiate_indenter_workflow(
        system_name, 
        interface_file = interface_file, 
        indenter_file=indenter_file )
    jobadmin.lpad.add_wf( Workflow( [ fw ], name = system_name ) )

In [None]:
# convert is used to turn tga into png (not crucial)
jobadmin._template_convert_cmd

### Indenter insertion

In [None]:
system_names = set(['1010_CTAB_on_AU_111_63x36x2_bilayer_with_counterion_50Ang_stepped'])

In [None]:
system_names

In [None]:
fw_id_dict = jobadmin.query_systems(
    system_names,
    step='initiate_indenter_workflow', 
    state=["COMPLETED", "RUNNING", "READY", "WAITING", "RESERVED"])

In [None]:
fw_id_dict

In [None]:
fw_dict = jobadmin.batch_prepare_fw(
    system_names, 
    jobadmin.indenter_insertion )

In [None]:
for key, entry in fw_dict.items(): print(key), pprint(entry.as_dict() )

In [None]:
jobadmin.append_wf_by_key( fw_dict, fw_id_dict )

### Datafile merge

In [None]:
system_names

In [None]:
fw_id_dict = jobadmin.query_systems(
    system_names,
    step='indenter_insertion', 
    state=["COMPLETED", "RUNNING", "READY", "WAITING", "RESERVED"])

In [None]:
fw_id_dict

In [None]:
jobadmin._template_pizzapy_merge_cmd

In [None]:
merge_datafile_wrapper = lambda system_name: \
    jobadmin.add_parameters_to_datafile_and_store(
        system_name, indenter_suffix = '_50Ang_stepped' )

In [None]:
fw_dict = jobadmin.batch_prepare_fw(system_names, 
                                    merge_datafile_wrapper)

In [None]:
jobadmin.append_wf_by_key(fw_dict, fw_id_dict)

### Pull file from DB (migrate machine)
this step is only necessary if changeing file system for subsequent steps

In [None]:
fw_id_dict = jobadmin.query_systems(
    system_names,
    step='pizzapy_merge', 
    state=["COMPLETED", "RUNNING", "READY", "WAITING", "RESERVED"])

In [None]:
fw_dict = jobadmin.batch_prepare_fw( system_names, 
                                   jobadmin.pull_datafile_from_db )

In [None]:
fw_dict['1010_CTAB_on_AU_111_63x36x2_bilayer_with_counterion_50Ang_stepped']

In [None]:
jobadmin.append_wf_by_key(fw_dict, fw_id_dict)

### Minimization

In [None]:
fw_id_dict = jobadmin.query_systems(
    system_names,
    step='pizzapy_merge', 
    state=["COMPLETED", "RUNNING", "READY", "WAITING", "RESERVED"])

In [None]:
fw_id_dict

In [None]:
minimize_wrapper = lambda system_name: jobadmin.minimize(
    system_name,
    lmp_suffix_template=' '.join((
        '-v baseName {baseName:s} -v dataFile {dataFile:s} -v has_indenter 1 -v mpiio 0',
        '-v robust_minimization 0 -v pbc2d 0 -v compute_interactions 1 -v store_forces 1',
        '-v surfactant_name CTAB')))

In [None]:
fw_dict = jobadmin.batch_prepare_fw( system_names, minimize_wrapper )

In [None]:
jobadmin.append_wf_by_key( fw_dict, fw_id_dict )

### NPT equilibration

In [None]:
fw_id_dict = jobadmin.query_systems(
    system_names,
    step='minimization', 
    state=["COMPLETED", "RUNNING", "READY", "WAITING", "RESERVED"])

In [None]:
fw_id_dict

In [None]:
nptEquilibrate_wrapper = lambda system_name: jobadmin.nptEquilibrate(
    system_name,
    lmp_suffix_template=' '.join((
        '-v baseName {baseName:s} -v dataFile {dataFile:s} -v has_indenter 1',
        '-v pbd2d 0 -v reinitialize_velocities 1 -v nptEqSteps 10000',
        '-v pbc2d 0 -v compute_interactions 1 -v store_forces 1',
        '-v mpiio 0 -v surfactant_name CTAB')))

In [None]:
fw_dict = jobadmin.batch_prepare_fw( system_names, nptEquilibrate_wrapper )

In [None]:
jobadmin.append_wf_by_key(fw_dict, fw_id_dict)

### Colvars

In [None]:
system_names = [
     '1298_CTAB_on_AU_111_63x36x2_bilayer_with_counterion_50Ang_stepped',
     '653_CTAB_on_AU_111_63x36x2_bilayer_with_counterion_50Ang_stepped',
     '653_CTAB_on_AU_111_63x36x2_hemicylinders_with_counterion_50Ang_stepped',
     '653_CTAB_on_AU_111_63x36x2_monolayer_with_counterion_50Ang_stepped',
     '367_CTAB_on_AU_111_63x36x2_monolayer_with_counterion_50Ang_stepped'
]

In [None]:
system_names = sorted(list(system_names))

In [None]:
system_names

In [None]:
fw_id_dict = jobadmin.query_systems(
    system_names,
    step='equilibration_npt', 
    state=["COMPLETED", "RUNNING", "READY", "WAITING", "RESERVED"])

In [None]:
system_names = [ system_name for system_name, fw_ids in fw_id_dict.items() if len(fw_ids) > 0 ]

In [None]:
jobadmin.production_queueadapter['nodes'] = 8

In [None]:
jobadmin.production_queueadapter['walltime'] = '96:00:00'

In [None]:
jobadmin._template_lmp_cmd # double-check usage of correct LAMMPS command

In [None]:
# 500,000 steps ~ 1 ns
# 10 nm / 1 ns ~ 10 m / s

# 1,000,000 steps ~ 2 ns
# 10 nm / 2 ns ~ 5 m / s

In [None]:
force_constant=25000
approach_steps = 500000
total_steps = int(1.1*approach_steps)

In [None]:
production_wrapper = lambda system_name: jobadmin.colvars_production(
    system_name,
    lmp_suffix_template= ' '.join((
        '-v baseName {baseName:s} -v dataFile {dataFile:s}',
        '-v has_indenter 1 -v pbd2d 0',
        '-v reinitialize_velocities 0',
        '-v productionSteps {:d}'.format(total_steps),
        '-v use_colvars 1 -v mpiio 0',
        '-v thermo_frequency 10',
        '-v netcdf_frequency 1000',
        '-v compute_interactions 1',
        '-v store_forces 1',
        '-v surfactant_name CTAB')),
    force_constant=force_constant,
    total_steps=approach_steps)

In [None]:
fw_dict = jobadmin.batch_prepare_fw( system_names, production_wrapper )

In [None]:
jobadmin.append_wf_by_key(fw_dict, fw_id_dict)

## Progress check & recovery 

Manually append "post LAMMPS" fireworks to fizzled production runs,
in order to run in same directory as previous and pull already produced output files.

In [None]:
system_name_scope

In [None]:
fizzled_production_dict = jobadmin.query_step(
    step='production', state_list = ['FIZZLED'] )

In [None]:
fizzled_production_dict

In [None]:
fw_ids = jobadmin.lpad.get_fw_ids(
    query = { 
        "spec.step":      "production",
        "state":          "FIZZLED", 
        "spec._category": "nemo_queue"} )

In [None]:
fw_id_system_name_dict = { 
    fw_id: jobadmin.lpad.get_fw_by_id(fw_id).spec["system_name"] for fw_id in fw_ids }

In [None]:
system_names = list(
    set(fw_id_system_name_dict.values()) & set(system_name_scope) )

In [None]:
fw_id_dict = { 
    jobadmin.lpad.get_fw_by_id(fw_id).name: fw_id \
    for fw_id, system_name in fw_id_system_name_dict.items() \
    if system_name in system_names }

In [None]:
fw_id_dict

In [None]:
fw_dict = {}
for fw_name, fw_id in fw_id_dict.items():
    launch_dir = jobadmin.lpad.get_launchdir(fw_id)
    fw_spec = jobadmin.lpad.get_fw_by_id(fw_id).spec
    system_name = fw_spec["system_name"]
    if "total_steps" in fw_spec:
        total_steps = fw_spec["total_steps"]
    else:
        total_steps = 1000000
        
    step = fw_spec["step"]
    fw_dict[fw_name] = jobadmin.post_lammps_fw(
        system_name = system_name, 
        parent_fw_id = fw_id, 
        launch_dir = launch_dir,
        output_folder = '{:s}_{:d}'.format(step,total_steps),
        step= 'production', lmp_suffix='_production_mixed')

In [None]:
fw_dict[ list(fw_dict.keys())[0] ].as_dict()

In [None]:
for fw_name, fw in fw_dict.items():
    jobadmin.lpad.append_wf(
        new_wf = Workflow( [ fw ] ), 
        fw_ids = [ fw_id_dict[ fw_name ] ] )

In [None]:
fw_dict[ list(fw_dict.keys())[0] ]

### Defuse certain fw ids

In [None]:
for fw_id in fw_ids: jobadmin.lpad.defuse_fw( fw_id, rerun_duplicates=False )

### Rerun certain fw ids

In [None]:
for key, fw_ids in fw_id_dict.items():
    jobadmin.lpad.rerun_fw(fw_ids[-1],rerun_duplicates=False)

In [None]:
jobadmin.lpad.rerun_fw(9922,rerun_duplicates=False)

### Detect certain lost runs
fizzle and forget them.
For some reason, offline runs are un-fizzled again when recovered if not forcefully "forgotten"

In [None]:
launch_ids, fw_ids, _ = \
    jobadmin.lpad.detect_lostruns(expiration_secs=1, query= {
        'spec.system_name': { '$regex': '.*CTAB.*'}
    },fizzle=True)
for fw_id in fw_ids: jobadmin.lpad.forget_offline(fw_id)    

In [None]:
# detect lost runs except certain fw ids
# do not recover them afterwards
launch_ids, fw_ids, _ = \
    jobadmin.lpad.detect_lostruns(expiration_secs=1, query= {
        'fw_id': {'$nin': [ 10232,10231 ] }
    }, fizzle=True)
for fw_id in fw_ids: jobadmin.lpad.forget_offline(fw_id)