# This notebook implements the ExaLearn example with Active Learning custom teaching loop

In [1]:
import os
import sys
import time

verbose  = os.environ.get('RADICAL_PILOT_VERBOSE', 'REPORT')
os.environ['RADICAL_PILOT_VERBOSE'] = verbose

import radical.pilot as rp
import radical.utils as ru

from rose.learner import ActiveLearner
from rose.engine import Task, ResourceEngine

### List parameters for training and simulation

We will list the parameters used for running the simulations and training tasks. Runtime parameters for OpenMP are also assigned

In [2]:
seed=20030
num_sample=4500
num_sample_val=((num_sample / 2))
num_sample_test=((num_sample / 2))
num_sample_study=num_sample
num_al_sample=((num_sample * 3))
batch_size=512
epochs=[400,300,250,200]

NNODES=1

nthread=32
nthread_tot=( NNODES * nthread )

nthread_study=22
nthread_study_tot=( NNODES * nthread_study )

nrank_ml=4
nrank_ml_tot=( NNODES * nrank_ml )

ngpus=(NNODES * 4)

### Declare the resource engine for our active learning tasks.
We will ask for 30 minutes, and the target resources will be local, which means it will run on the user's machine.

Next, we define the active learner and assign the resource engine.

In [3]:
engine = ResourceEngine({'runtime': 30,
                         'resource': 'local.localhost'})
acl = ActiveLearner(engine)
code_path = f'{sys.executable} {os.getcwd()}/scripts'

data_dir= f'{os.getcwd()}/data/seed_{seed}'

Resource Engine started successfully

Task 'sample_simulation' ready to submit; resolved dependencies: []
Task 'sweep_simulation' ready to submit; resolved dependencies: []
Task 'sample_simulation' ready to submit; resolved dependencies: []
Task 'sample_simulation' ready to submit; resolved dependencies: []
submitting ['sample_simulation', 'sweep_simulation', 'sample_simulation', 'sample_simulation'] for execution
task.000002 is DONE
Task 'merge_preprocess' ready to submit; resolved dependencies: ['sample_simulation']
Task 'merge_preprocess' ready to submit; resolved dependencies: ['sample_simulation']
Task 'merge_preprocess' ready to submit; resolved dependencies: ['sample_simulation']
submitting ['merge_preprocess', 'merge_preprocess', 'merge_preprocess'] for execution
task.000003 is DONE
task.000001 is DONE
task.000000 is DONE
Task 'merge_preprocess' ready to submit; resolved dependencies: ['sweep_simulation']
Task 'merge_preprocess' ready to submit; resolved dependencies: ['sample_

### Now, let us define our active learning tasks: simulation, preprocessing, training, and active learning

In [4]:
# Define and register the simulation task
@acl.simulation_task
def simulation(*args):
#    return Task(executable=f'{code_path}/simulation_resample.py', arguments=args) 
    return Task(executable=f'{code_path}/replacement_sim.py', arguments=args) # this is a replaced task, dont actually run the simulation sample

# Define and register a utility task
@acl.utility_task
def merge_preprocess(*args):
#    return Task(executable=f'{code_path}/merge_preprocess_hdf5.py', arguments=args)
    return Task(executable=f'{code_path}/replacement_sim.py', arguments=args)

# Define and register the training task
@acl.training_task
def training(*args):
#    return Task(executable=f'{code_path}/train.py', arguments=args)
    return Task(executable=f'{code_path}/replacement_sim.py', arguments=args)

# Define and register the active learning task
@acl.active_learn_task
def active_learn(*args):
#    return Task(executable=f'{code_path}/active_learning.py', arguments=args)
    return Task(executable=f'{code_path}/replacement_sim.py', arguments=args)

### Before running the active learning loop, we must first create some utility tasks for bootstrapping

We will need to run some simulation tasks before beginning training. 4 simulation tasks are submitted in parallel along with an additional 3 data preprocess tasks for each simulation task. 

In [5]:
# Prepare Data
# simulation sample task
@acl.utility_task
def sample_simulation(*args):
#    task = Task(executable=f'{code_path}/simulation_sample.py', arguments=args)
    return Task(executable=f'{code_path}/replacement_sim.py', arguments=args)

#simulation sweep task
@acl.utility_task
def sweep_simulation(*args):
#    task = Task(executable=f'{code_path}/simulation_sweep.py', arguments=args)
    return Task(executable=f'{code_path}/replacement_sim.py', arguments=args)

def bootstrap():
    os.system(f'{code_path}/prepare_data_dir_pm.py --seed {seed}')
    
    bootstrap=[]
    base = sample_simulation(f'{num_sample} {seed} \
            {data_dir}/base/config/config_1001460_cubic.txt \
            {data_dir}/base/config/config_1522004_trigonal.txt \
            {data_dir}/base/config/config_1531431_tetragonal.txt')
    val = sample_simulation(f'{num_sample} {seed-1} \
            {data_dir}/validation/config/config_1001460_cubic.txt \
            {data_dir}/validation/config/config_1522004_trigonal.txt \
            {data_dir}/validation/config/config_1531431_tetragonal.txt')
    test = sample_simulation(f'{num_sample} {seed+1} \
            {data_dir}/test/config/config_1001460_cubic.txt \
            {data_dir}/test/config/config_1522004_trigonal.txt \
            {data_dir}/test/config/config_1531431_tetragonal.txt')
    study = sweep_simulation(f'{num_sample_study} \
            {data_dir}/study/config/config_1001460_cubic.txt \
            {data_dir}/study/config/config_1522004_trigonal.txt \
            {data_dir}/study/config/config_1531431_tetragonal.txt')
    bootstrap.append(base)
    bootstrap.append(val)
    bootstrap.append(test)
    bootstrap.append(study)
    for shape in ['cubic', 'trigonal', 'tetragonal']:
        merge_base = merge_preprocess(f'{data_dir}/base/data {shape} {nthread_tot}', base)
        merge_val = merge_preprocess(f'{data_dir}/validation/data {shape} {nthread_tot}', val)
        merge_test = merge_preprocess(f'{data_dir}/test/data {shape} {nthread_tot}', test)
        merge_study = merge_preprocess(f'{data_dir}/study/data {shape} {nthread_tot}', study)
        bootstrap.append(merge_base)
        bootstrap.append(merge_val)
        bootstrap.append(merge_test)
        bootstrap.append(merge_study)
    
    [task.result() for task in bootstrap]
# invoke the bootstrap() method
bootstrap()

Registered task 'sample_simulation' and id of 000000 with dependencies: []
Registered task 'sample_simulation' and id of 000001 with dependencies: []
Registered task 'sample_simulation' and id of 000002 with dependencies: []
Registered task 'sweep_simulation' and id of 000003 with dependencies: []
Registered task 'merge_preprocess' and id of 000004 with dependencies: ['sample_simulation']
Registered task 'merge_preprocess' and id of 000005 with dependencies: ['sample_simulation']
Registered task 'merge_preprocess' and id of 000006 with dependencies: ['sample_simulation']
Registered task 'merge_preprocess' and id of 000007 with dependencies: ['sweep_simulation']
Registered task 'merge_preprocess' and id of 000008 with dependencies: ['sample_simulation']
Registered task 'merge_preprocess' and id of 000009 with dependencies: ['sample_simulation']
Registered task 'merge_preprocess' and id of 000010 with dependencies: ['sample_simulation']
Registered task 'merge_preprocess' and id of 000011

### Define the active learning loop:
In each iteration in the learning loop, a simulation tasks is submitted followed by 3 parallel data preprocess tasks to handle the simulation output. We then submit a training task followed by an active learning task.

In [6]:
# Custom training loop using active learning
def teach():
    for acl_iter in range(4):
        print(f'Starting Iteration-{acl_iter}')
        simulations = []
        if acl_iter != 0:
            sim = simulation(f'{seed+2} \
                {data_dir}/AL_phase_{acl_iter}/config/config_1001460_cubic.txt \
                {data_dir}/study/data/cubic_1001460_cubic.hdf5 \
                {data_dir}/AL_phase_{acl_iter}/config/config_1522004_trigonal.txt \
                {data_dir}/study/data/trigonal_1522004_trigonal.hdf5 \
                {data_dir}/AL_phase_{acl_iter}/config/config_1531431_tetragonal.txt \
                {data_dir}/study/data/tetragonal_1531431_tetragonal.hdf5')
            simulations.append(sim)
            for shape in ['cubic', 'trigonal', 'tetragonal']:
                merge=merge_preprocess(f'{data_dir}/AL_phase_{acl_iter}/data cubic {nthread_tot}', sim)
                simulations.append(merge)
        [sim.result() for sim in simulations]
        # Now run training and active_learn
        train = training(f'--batch_size {batch_size} \
               --epochs {epochs[acl_iter]} \
               --seed {seed} \
               --device=cpu \
               --num_threads {nthread} \
               --phase_idx {acl_iter} \
               --data_dir {data_dir} \
               --shared_file_dir {data_dir}', *simulations)
        active = active_learn(f'--seed {seed+3} --num_new_sample {num_al_sample} --policy uncertainty', simulations, train)
        active.result()
# invoke the custom/user-defined teach() method
teach()

Starting Iteration-0
Registered task 'training' and id of 000016 with dependencies: []
Registered task 'active_learn' and id of 000017 with dependencies: ['training']
Starting Iteration-1
Registered task 'simulation' and id of 000018 with dependencies: []
Registered task 'merge_preprocess' and id of 000019 with dependencies: ['simulation']
Registered task 'merge_preprocess' and id of 000020 with dependencies: ['simulation']
Registered task 'merge_preprocess' and id of 000021 with dependencies: ['simulation']
Registered task 'training' and id of 000022 with dependencies: ['simulation', 'merge_preprocess', 'merge_preprocess', 'merge_preprocess']
Registered task 'active_learn' and id of 000023 with dependencies: ['training']
Starting Iteration-2
Registered task 'simulation' and id of 000024 with dependencies: []
Registered task 'merge_preprocess' and id of 000025 with dependencies: ['simulation']
Registered task 'merge_preprocess' and id of 000026 with dependencies: ['simulation']
Registe

Let's make sure to shutdown the resources.

In [7]:
engine.shutdown()

Shutdown is triggered, terminating the resources gracefully
