# General Imports

In [None]:
import traceback
import os
import yaml
import datetime
from fx19 import inputs
from fx19.run_ops import *
import time

# Initialize FANTASTX

1. Read in the YAML file
2. Create all FANTASTX objects
3. Assign all FANTASTX objects to local variables which can be passed to the worker nodes
4. Create a calculations folder where all model evaluations will take place
5. Initialize the data file (helpfully called "data_file") which will store all model information

In [None]:
main_path = os.getcwd()
# read input file and make input dictionary
with open('input.yaml') as ifile:
    i_dict = yaml.load(ifile, Loader=yaml.FullLoader)
    i_dict['main_path'] = main_path

# make objects
all_objects = inputs.make_objects(i_dict)

# Assign objects from all_objects to local variables
reg_id = all_objects['reg_id']
input_model_obj = all_objects['input_model_obj']
gb_ops_obj = None
if 'gb_ops_obj' in all_objects:
    gb_ops_obj = all_objects['gb_ops_obj']
# kwargs for full_eval() function
if gb_ops_obj is not None:
    random_model_obj = gb_ops_obj
    evolve = gb_ops_obj
else:
    random_model_obj = all_objects['random_model_obj']
    evolve = all_objects['evolve']

energy_code = all_objects['energy_code']
sim_ids = None
if 'Xsim_1' in all_objects.keys():
    Xsim_1 = all_objects['Xsim_1']
    sim_ids = [1]
else:
    Xsim_1 = None
    
pool = all_objects['pool']
select = all_objects['select']

# Create a folder 'Calcs' where all calculations take place
if 'calcs' in os.listdir(main_path):
    now = datetime.datetime.now()
    new_name = 'old_{}_{}_{}_{}_{}_{}'.format(now.year, now.month, now.day,
                                              now.hour, now.minute, now.second)
    os.rename('calcs', new_name)
calcs = i_dict['main_path'] + '/calcs'
os.mkdir(calcs)

# Create data file where objective function values and inheritance
# information will be written.
data_file = main_path + '/data_file'
with open(data_file, 'w') as f:
    first_line = 'Label   Inheritance     Total Energy    Obj_0' + \
        '           Obj_1           Operator\n\n'
    if not Xsim_1:
        first_line = 'Label   Inheritance     Total Energy    Obj_0' + \
            '           Operator\n\n'
    f.write(first_line)

# Initialize DASK and start the cluster

In [None]:
from dask_jobqueue import SLURMCluster, PBSCluster
from dask.distributed import Client
import dask
import dask.distributed
dask.config.set({'distributed.comm.timeouts.tcp': '3h'})

In [None]:
# Set up everything for calculations
models_evald = 0
evald_futures, simd_futures = [], []
pool_status_update = 10
workers = i_dict['workers']
max_workers = workers['max_workers']

In [None]:
# Start Dask client
if workers['cluster'] == 'SLURM':
    job_script = '/home/dunruh/fantastx_vasp_xanes/sample_job_script.txt'
    jobfile = open(job_script, "w+")
    cluster_job = SLURMCluster(cores=workers['num_cores'],
                               memory=workers['total_mem'],
                               processes=workers['processes'],
                               project=workers['project_name'],
                               queue=workers['submit_queue'],
                               interface=workers['node_type'],
                               walltime=workers['walltime'],
                               job_extra=workers['job_extra'],
                               env_extra=workers['env_extra'],
                               header_skip=workers['header_skip'])
    print("Job script for dask-worker: \n", cluster_job.job_script())
    client = Client(cluster_job)
    # jobfile.write(cluster_job.job_script())
    # jobfile.close()
elif workers['cluster'] == 'PBS':
    job_script = '/home/dunruh/sample_job_script.txt'
    jobfile = open(job_script, "w+")
    cluster_job = PBSCluster(cores=workers['num_cores'],
                             memory=workers['total_mem'],
                             project=workers['project_name'],
                             interface=workers['node_type'],
                             walltime=workers['walltime'],
                             job_extra=workers['job_extra'],
                             header_skip=workers['header_skip'])
    print("Job script for dask-worker: \n", cluster_job.job_script())
    client = Client(cluster_job)
elif workers['cluster'] == 'local':
    client = Client('tcp://127.0.0.1:8786')
else:
    print('FANTASTX currently supports SLURM, PBS and local. Provided '
          'scheduler type not identified.')

In [None]:
if workers['cluster'] == 'SLURM' or workers['cluster'] == 'PBS':
    cluster_job.adapt(minimum=max_workers, maximum=max_workers)
    cluster_job.adapt(minimum_jobs=max_workers, maximum_jobs=max_workers)
    client = Client(cluster_job)

#### Retrieve the client dashboard in order to track and visualize the job

In [None]:
client

In [None]:
# wait for workers to start on cluster
client.wait_for_workers(1)

# Define the eval function which will create a new candidate model and evaluate it

In [None]:
def create_and_eval(model_obj, evolve, select, pool, reg_id, model_type, model):
    """
    A wrapper function around energy_eval and Xsim_eval.
    Both these are done one after the other as one job by worker

    Args:
    model - (obj) Newly created model object which shall be evaluated

    Note:
    Uses reg_id, Xsim_1, energy_code objects which were stored as global
    parameters in all workers and master
    """
    new_model = make_model(random_model_obj, evolve, select, pool,
                                    reg_id, model_type, model)
    
    relaxed_model = relax(new_model, reg_id, energy_code)

    if relaxed_model is None:
        return None
    else:
        print(f"Model converged: {model.converged}")

    # separate gb_iface for the energy evaluated futures
    separate_gb(energy_code, gb_ops_obj, model)
    
    exp_eval_model = do_Xsim(relaxed_model, Xsim_1)
        
    return exp_eval_model

# Run FANTASTX

## Make models directly from input POSCARs

In [None]:
# make new model from all input files provided, then random, then evolve
input_models = []
if input_model_obj is not None:
    for i in range(len(input_model_obj.all_files)):
        new_model = input_model_obj.read_structure(reg_id)
        if new_model is not None:
            # read_structure() returns 0 when all files are done
            if not isinstance(new_model, int):
                input_models.append(new_model)

    # evaluate the input models
    for input_model in input_models:
#         new_model, select = make_model(random_model_obj, evolve, select, pool,
#                                        reg_id, model_type='inputs',
#                                        model=input_model)
        # relax the model in dask-workers
        out = client.submit(create_and_eval, random_model_obj, evolve, select, pool,
                            reg_id, model_type = 'inputs', model = input_model)
        evald_futures.append(out)
        print(
            f"Successfully submitted input model {new_model.label}")
        
num_initial_pop = i_dict['population_limits']['initial_population']
total_models_needed = i_dict['population_limits']['total_population']
evald_futures, models_evald, pool, select = update_pool(evald_futures,
                                                        models_evald,
                                                        pool, select,
                                                        data_file,
                                                        sim_ids)
working_jobs = get_working_jobs(evald_futures)

print('Input models are finished. Current working jobs: {working_jobs}')

## Made random and evolved models

In [None]:
start_time = time.time()
while models_evald < total_models_needed:
    # to lower cpu usage, wait 10 seconds before updating
    # number of working jobs each loop
    time.sleep(10)
    
    # Grab number of workers which are currently busy
    working_jobs = get_working_jobs(evald_futures)
    
    # Add new jobs if any workers are idle and more models are still needed
    while working_jobs < len(client.scheduler_info()['workers']) and models_evald < total_models_needed:
        # New models will either be random models or evolved models
        if models_evald < num_initial_pop:
            model_mech = 'random'
            print("Submitting random job")
        else:
            model_mech = 'evolved'
            print("Submitting evolved job")
            
        new_model = make_model(random_model_obj, evolve, select,
                                           pool, reg_id, model_type=model_mech)

        # relax the model in dask-workers
        out = client.submit(create_and_eval, new_model)
        evald_futures.append(out)
        evald_futures, models_evald, pool, select = update_pool(evald_futures,
                                                                models_evald,
                                                                pool, select,
                                                                data_file,
                                                                sim_ids)
        working_jobs = get_working_jobs(evald_futures)

        if models_evald % pool_status_update == 0 and\
                models_evald >= i_dict['population_limits']['pool']:
            # print statements which output visualization information
            if "selection_algorithm" in i_dict["select_params"]:
                if i_dict["select_params"]["selection_algorithm"] ==\
                        "distance_from_pareto":
                    good_pool = pool.good_pool
                    good_pool_labels = [model.label for model in good_pool]
                    print(
                        "Current good_pool population models: "
                        f"{good_pool_labels}.")
                elif i_dict["select_params"]["selection_algorithm"] ==\
                        "epsilon_moea":
                    pop_labels = [
                        model.label for model in pool.population.models]
                    archive_labels = [
                        model.label for model in pool.archive.models]
                    print("Current pool population models: "
                          f"{pop_labels}")
                    print("Current pool archive models:"
                          f"{archive_labels}")
                elif i_dict["select_params"]["selection_algorithm"] ==\
                        "clustered_selection":
                    nd_pop_labels = [
                        model.label for
                        model in pool.population.non_dominated_models]
                    print(
                        "Current pool population non-dominated models: "
                        f"{nd_pop_labels}")
            else:
                good_pool = pool.good_pool
                good_pool_labels = [model.label for model in good_pool]
                print(
                    "Current good_pool population models: "
                    f"{good_pool_labels}.")
                
# process extra calculations running in last batch
while len(evald_futures) > 0:
    evald_futures, models_evald, pool, select = update_pool(evald_futures,
                                                            models_evald,
                                                            pool, select,
                                                            data_file, sim_ids)
    
    
# print statements which output visualization information
if "selection_algorithm" in i_dict["select_params"]:
    if i_dict["select_params"]["selection_algorithm"] ==\
            "distance_from_pareto":
        good_pool = pool.good_pool
        good_pool_labels = [model.label for model in good_pool]
        print(f"Current good_pool population models: {good_pool_labels}.")
    elif i_dict["select_params"]["selection_algorithm"] == "epsilon_moea":
        pop_labels = [model.label for model in pool.population.models]
        archive_labels = [model.label for model in pool.archive.models]
        print(f"Current pool population models: {pop_labels}")
        print(f"Current pool archive models: {archive_labels}")
    elif i_dict["select_params"]["selection_algorithm"] ==\
            "clustered_selection":
        nd_pop_labels = [
            model.label for model in pool.population.non_dominated_models]
        print(f"Current pool population non-dominated models: {nd_pop_labels}")
else:
    good_pool = pool.good_pool
    good_pool_labels = [model.label for model in good_pool]
    print(f"Current good_pool population models: {good_pool_labels}.")
    
print(f"Current operator probabilities: {select.operator_frequencies}")
print('Done!')
print('Total time: ', time.time() - start_time)

client.shutdown()