# Running multiple calculations on a given model


## Aim

This notebook shows how we can run multiple calculations of a given structure

### Setup

The initial setup is very similar to the other tutorials, such as `singlepoint.ipynb`, which goes into more detail about what each step is doing

Load the aiida profile and code:

In [None]:
from aiida import load_profile
load_profile()

In [None]:
from aiida_mlip.data.model import ModelData
uri = "https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model"
model = ModelData.from_uri(uri, architecture="mace_mp")

In [None]:
from aiida.orm import load_code, load_computer

janus_code = load_code("janus@localhost")

computer_label = 'scarf'
scarf = load_computer(computer_label)
scarf.set_workdir('/work4/scd/scarf1480/aiida') 

Inputs should include the model, code, metadata, and any other keyword arguments expected by the calculation we are running:

In [None]:
from aiida.orm import Str, Float, Bool, Int

inputs = {
    "code": janus_code,
    "model": model,
    "arch": Str(model.architecture),
    "device": Str("cpu"),
    "fmax": Float(0.1), 
    "opt_cell_lengths": Bool(False), 
    "opt_cell_fully": Bool(True), 
    "metadata": {"options": {"resources": {"num_machines": 1}}},
    }

We must now choose the calculations to perform:

In [None]:
from aiida.plugins import CalculationFactory
geomoptCalc = CalculationFactory("mlip.opt")
descriptorsCalc = CalculationFactory("mlip.descriptors")
trainCalc = CalculationFactory("mlip.train")


Setup graph task, which sets up QE params and created a PwCalc for each structure in file. Returns test_file, train_file and valid_file

In [None]:
from aiida_workgraph import task
from aiida_workgraph.manager import get_current_graph
from aiida.orm import StructureData, load_group, KpointsData, SinglefileData
from ase.io import iread
from aiida_mlip.data.config import JanusConfigfile
from pathlib import Path
import yaml
from aiida_quantumespresso.calculations.pw import PwCalculation
from sample_split import process_and_split_data


@task.graph(outputs = ["test_file", "train_file", "valid_file"])
def qe(**inputs):

    qe_code = load_code("qe@scarf")
    # filepaths = inputs['filepath_dict']

    wg = get_current_graph()

    kpoints = KpointsData()
    kpoints.set_kpoints_mesh([1, 1, 1])

    metadata =  {
        "options": {
            "resources": {
                "num_machines": 1,
                'num_mpiprocs_per_machine': 32,
            },
            'max_wallclock_seconds': 3600,         # Set maximum wallclock time
            #'account': 'elph',                     # Set account name
            'queue_name': 'scarf',                  # Set queue name
            'qos': 'scarf',
            'environment_variables': {},
            'withmpi': True,                       # Use MPI
            'prepend_text': '''
            module purge
            module use /work4/scd/scarf562/eb-common/modules/all
            module load amd-modules
            module load QuantumESPRESSO/7.2-foss-2023a
            ''',
            'append_text': ''  
        }
    }

    pseudo_family = load_group('SSSP/1.3/PBE/efficiency')

    for _, file in inputs.items():
        with file.as_path() as path:
            for i, structs in enumerate(iread(path, format="extxyz")):
                
                structure = StructureData(ase=structs)
                pseudos = pseudo_family.get_pseudos(structure=structure)

                ecutwfc, ecutrho = pseudo_family.get_recommended_cutoffs(
                    structure=structure,
                    unit='Ry',
                )

                pw_paras = {
                    "CONTROL": {
                        "calculation": "scf",
                        'tprnfor': True,
                        'tstress': True,
                    },
                    "SYSTEM": {
                        "ecutwfc": ecutwfc,
                        "ecutrho": ecutrho,
                    },
                }

                qe_task = wg.add_task(
                    PwCalculation,
                    code = qe_code,
                    parameters= pw_paras,
                    kpoints= kpoints,
                    pseudos= pseudos,
                    metadata= metadata,
                    structure= structure,
                )

                structfile = f"{_}.struct{i}"
                wg.update_ctx({structfile: qe_task.outputs.output_trajectory})
                
    print(wg.tasks)

    return {
        "test_file": wg.ctx.test_file,
        "train_file": wg.ctx.train_file,
        "valid_file": wg.ctx.valid_file
    }    


In [None]:
@task.calcfunction(outputs = ["JanusConfigfile"])
def create_train_file(**files) -> JanusConfigfile:

    training_files = {"name": "test"}
    
    for filename, file in files.items():
        path = Path(f"mlip_{filename}.xyz")
        
        with open(path, "w") as f:
            f.write(file.get_content())

        training_files[filename] = str(path.resolve())

    with open("JanusConfigFile.yaml", "w") as f:
        yaml.dump(training_files, f)

    return {'JanusConfigfile': JanusConfigfile(str(Path("JanusConfigFile.yaml").resolve()))}

In [None]:
@task.calcfunction(outputs = ["test_file", "train_file", "valid_file"])
def create_aiida_files(**inputs):
     
    files = process_and_split_data(**inputs)

    return {
        "train_file": SinglefileData(files["train_file"]),
        "test_file": SinglefileData(files["test_file"]),
        "valid_file": SinglefileData(files["valid_file"])
    }

In [None]:
from aiida_workgraph import WorkGraph, Zone
from aiida.orm import StructureData
from ase.io import iread
from aiida.orm import Str, Float, Bool, Int

inputs = {
    "code": janus_code,
    "model": model,
    "arch": Str(model.architecture),
    "device": Str("cpu"),
    "fmax": Float(0.1), 
    "opt_cell_lengths": Bool(False), 
    "opt_cell_fully": Bool(True), 
    "metadata": {"options": {"resources": {"num_machines": 1}}},
    }

initial_structure = "../structures/lj-traj.xyz"

with WorkGraph("QE Calculation Workgraph") as wg:

    wg.inputs = inputs
    final_structures = {}

    for i, struct in enumerate(iread(initial_structure)):
        structure = StructureData(ase=struct)

        geomopt_calc = wg.add_task(
            geomoptCalc,
            code=wg.inputs.code,
            model=wg.inputs.model,
            arch=wg.inputs.arch,
            device=wg.inputs.device,
            metadata=inputs["metadata"],
            fmax=wg.inputs.fmax,
            opt_cell_lengths=wg.inputs.opt_cell_lengths,
            opt_cell_fully=wg.inputs.opt_cell_fully,
            struct=structure,
        )

        descriptors_calc = wg.add_task(
            descriptorsCalc,
            code=wg.inputs.code,
            model=wg.inputs.model,
            arch=wg.inputs.arch,
            device=wg.inputs.device,
            metadata=inputs["metadata"],
            struct=geomopt_calc.outputs.final_structure,
            calc_per_element=True,
        )

        final_structures[f"structs{i}"] = descriptors_calc.outputs.xyz_output

    split_task_inputs = {
        "trajectory_data": final_structures,
        "config_types": Str(""),
        "n_samples": Int(len(final_structures.keys())),
        "prefix": Str(""),
        "scale": Float(1.0e5),
        "append_mode": Bool(False),
    }

    split_task = wg.add_task(create_aiida_files, inputs=split_task_inputs)

    qe_inputs = {
        "test_file": split_task.outputs.test_file,
        "train_file": split_task.outputs.train_file,
        "valid_file": split_task.outputs.valid_file
    }

    qe_task = wg.add_task(qe, name="QE_workflow", **qe_inputs)

    qe_files = {
        "test_file": qe_task.outputs.test_file,
        "train_file": qe_task.outputs.train_file,
        "valid_file": qe_task.outputs.valid_file
    }

    # training_files = wg.add_task(create_train_file, **qe_files)

    # train_task = wg.add_task(
    #     trainCalc,
    #     mlip_config = training_files.outputs.result,
    #     code = janus_code,
    #     metadata={'options': {"resources": {"num_machines": 1}}},
    #     fine_tune =True,
    #     foundation_model= model
    # )

In [None]:
wg


In [None]:
wg.run()

In [13]:
wg.tasks.QE_workflow.outputs.test_file

SocketAny(name='test_file', value=AttributeDict({'struct0': <TrajectoryData: uuid: 40daadac-3987-4cf7-8bfb-8734e4bcce5d (pk: 6626)>, 'struct1': <TrajectoryData: uuid: 31438781-6ffe-4548-8039-5f4cd20bb5ed (pk: 6629)>}))