# Build Dataset #

Build training dataset in npz format, using a MD trajectory as input source

In [None]:
def interactive_build_dataset():

    import subprocess
    import ipywidgets as widgets
    from IPython.display import display

    # Step 1: Create input widgets
    mapping_dropdown_w = widgets.Dropdown(
        options=['martini3', 'martini3.membrane', 'ca', 'Custom'],  # List of options for the dropdown
        value='martini3',  # Default value
        description='CG Mapping',
        disabled=False,
    )

    # Create a text widget for custom param1 (disabled by default)
    custom_mapping_w = widgets.Text(
        value='',
        placeholder='Use absolute path to your mapping folder',
        description='Custom CG Mapping',
        disabled=False,  # Start as disabled, only enable if "Custom" is selected
        layout=widgets.Layout(display='none'),  # Initially hidden
    )

    input_w = widgets.Text(
        value='data/tutorial/A2A/md/a2a.pdb',
        placeholder='PATH/TO/INPUT/FILE',
        description='Input file/folder',
        disabled=False
    )

    inputtraj_w = widgets.Text(
        value='data/tutorial/A2A/md/a2a.xtc',
        placeholder='PATH/TO/INPUT/TRAJ',
        description='Input trajectory file/folder',
        disabled=False
    )

    selection_w = widgets.Text(
        value='protein',
        placeholder='selection',
        description='Atom selection',
        disabled=False
    )

    output_w = widgets.Text(
        value='data/tutorial/A2A/npz/',
        placeholder='PATH/TO/OUTPUT/FILES/FOLDER',
        description='Output folder',
        disabled=False
    )

    # Button to trigger the script execution
    run_button = widgets.Button(description="Build Dataset")

    # Output area to display the results
    output_area = widgets.Output()

    # Function to enable/disable custom param1 input based on dropdown selection
    def on_mapping_change(change):
        if change['new'] == 'Custom':
            custom_mapping_w.layout.display = 'block'  # Show the custom input
        else:
            custom_mapping_w.layout.display = 'none'  # Hide the custom input

    # Attach the function to handle changes in the dropdown
    mapping_dropdown_w.observe(on_mapping_change, names='value')

    def run_script(button):
        script_name = "build_dataset.py"
        
        # Clear previous output
        output_area.clear_output()

        # Determine which value to use for param1
        param1_value = custom_mapping_w.value if mapping_dropdown_w.value == 'Custom' else mapping_dropdown_w.value
        
        # Open the external script using Popen to stream stdout in real-time
        try:
            # Run the script
            with subprocess.Popen(
                [
                    "python", script_name,
                    "-m", param1_value,
                    "-i", input_w.value,
                    "-t", inputtraj_w.value,
                    "-s", selection_w.value,
                    "-o", output_w.value,
                ],
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,
                text=True,  # To capture text instead of bytes
                bufsize=1  # Line-buffered output
            ) as proc:
                # Read stdout line by line
                for line in proc.stdout:
                    with output_area:
                        print(line, end='')  # Print each line in the output area

        except Exception as e:
            with output_area:
                print(f"An error occurred: {e}")

    # Link the button click event to the function
    run_button.on_click(run_script)

    # Step 2: Display the widgets
    display(mapping_dropdown_w, custom_mapping_w, input_w, inputtraj_w, selection_w, output_w, run_button, output_area)

interactive_build_dataset()

# Train Model #

To train the backmapping model, you need to provide a configuration file in YAML format. This file defines the dataset, the model to be used, and various hyperparameters. It also specifies the complete training setup, including the optimizer, learning rate, scheduler, loss function, metrics, and more.

Make sure to update the config file with the recommended settings from the previous step, where the training dataset was created.

Although this configuration file contains extensive details, for this tutorial, we will use a predefined one that already includes all the necessary information, including the dataset configuration.

In [None]:
def interactive_train():

    import os
    import torch
    import subprocess
    import ipywidgets as widgets
    from IPython.display import display

    # Step 1: Create input widgets
    config_dropdown_w = widgets.Dropdown(
        options=os.listdir('config'),
        description='Training config file in yaml format',
        disabled=False,
    )

    # Step 1: Dynamically check available devices (CPU and multiple GPUs)
    device_options = ['cpu']  # Always include 'CPU'

    if torch.cuda.is_available():
        num_gpus = torch.cuda.device_count()  # Get the number of available GPUs
        for i in range(num_gpus):
            device_options.append(f'cuda:{i}')  # Add each GPU as 'CUDA:0', 'CUDA:1', etc.

    device_dropdown_w = widgets.Dropdown(
        options=device_options,
        value='cpu',  # Default value
        description='Device',
        disabled=False,
    )

    # Button to trigger the script execution
    run_button = widgets.Button(description="Run Training")

    # Output area to display the results
    output_area = widgets.Output()

    def run_script(button):
        script_name = "geqtrain-train"
        
        # Clear previous output
        output_area.clear_output()

        # Open the external script using Popen to stream stdout in real-time
        try:
            # Run the script
            with subprocess.Popen(
                [
                    script_name,
                    os.path.join('config', config_dropdown_w.value),
                    "-d", device_dropdown_w.value,
                ],
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,
                text=True,  # To capture text instead of bytes
                bufsize=1  # Line-buffered output
            ) as proc:
                # Read stdout line by line
                for line in proc.stdout:
                    with output_area:
                        print(line, end='')  # Print each line in the output area

        except Exception as e:
            with output_area:
                print(f"An error occurred: {e}")

    # Link the button click event to the function
    run_button.on_click(run_script)

    # Step 2: Display the widgets
    display(config_dropdown_w, device_dropdown_w, run_button, output_area)

interactive_train()

# Run Backmapping #

In [None]:
def interactive_run_backmapping():

    import os
    import torch
    import ipywidgets as widgets
    from IPython.display import display

    # Step 1: Select model using training config YAML
    config_dropdown_w = widgets.Dropdown(
        options=os.listdir('config'),
        description='Config used for training model',
        disabled=False,
    )

    # Step 1: Dynamically check available devices (CPU and multiple GPUs)
    device_options = ['cpu']  # Always include 'CPU'

    if torch.cuda.is_available():
        num_gpus = torch.cuda.device_count()  # Get the number of available GPUs
        for i in range(num_gpus):
            device_options.append(f'cuda:{i}')  # Add each GPU as 'CUDA:0', 'CUDA:1', etc.

    device_dropdown_w = widgets.Dropdown(
        options=device_options,
        value='cpu',  # Default value
        description='Device',
        disabled=False,
    )

    mapping_dropdown_w = widgets.Dropdown(
        options=['martini3', 'martini3.membrane', 'ca', 'Custom'],  # List of options for the dropdown
        value='martini3',  # Default value
        description='CG Mapping',
        disabled=False,
    )

    # Create a text widget for custom param1 (disabled by default)
    custom_mapping_w = widgets.Text(
        value='',
        placeholder='Use absolute path to your mapping folder',
        description='Custom CG Mapping',
        disabled=False,  # Start as disabled, only enable if "Custom" is selected
        layout=widgets.Layout(display='none'),  # Initially hidden
    )

    input_w = widgets.Text(
        value='data/tutorial/A2A/test/1a2y.pdb',
        placeholder='PATH/TO/INPUT/FILE',
        description='Input file/folder',
        disabled=False
    )

    inputtraj_w = widgets.Text(
        value='',
        placeholder='PATH/TO/INPUT/TRAJ',
        description='Input trajectory file/folder',
        disabled=False
    )

    isatomistic_w = widgets.Checkbox(
        value=True,  # Default value (checked, so True)
        description='Input is atomistic',  # Label for the checkbox
        disabled=False  # Whether the checkbox is interactive or not
    )

    selection_w = widgets.Text(
        value='protein',
        placeholder='selection',
        description='Atom selection',
        disabled=False
    )

    output_w = widgets.Text(
        value='data/tutorial/A2A/backmapped/',
        placeholder='Leave empty if not traj to load',
        description='Output folder',
        disabled=False
    )

    batch_max_atoms_w = widgets.Text(
        value='10000',
        placeholder='E.g. 10000',
        description='Max atoms per chunk',
        disabled=False
    )

    # Button to trigger the script execution
    run_button = widgets.Button(description="Run Inference")

    # Output area to display the results
    output_area = widgets.Output()

    # Function to enable/disable custom param1 input based on dropdown selection
    def on_mapping_change(change):
        if change['new'] == 'Custom':
            custom_mapping_w.layout.display = 'block'  # Show the custom input
        else:
            custom_mapping_w.layout.display = 'none'  # Hide the custom input

    # Attach the function to handle changes in the dropdown
    mapping_dropdown_w.observe(on_mapping_change, names='value')

    def run_inference(button):
        
        # Clear previous output
        output_area.clear_output()

        args_dict = {
            "mapping": custom_mapping_w.value if mapping_dropdown_w.value == 'Custom' else mapping_dropdown_w.value,
            "input": input_w.value,
            "inputtraj": inputtraj_w.value if len(inputtraj_w.value) > 0 else None,
            "isatomistic": isatomistic_w.value,
            "selection": selection_w.value,
            "output": output_w.value,
            "model": os.path.join("config", config_dropdown_w.value),
            "device": device_dropdown_w.value,
            "batch_max_atoms": int(batch_max_atoms_w.value),
            "noinvariants": True,
        }

        from herobm.backmapping.hierarchical_backmapping import HierarchicalBackmapping

        backmapping = HierarchicalBackmapping(args_dict=args_dict)

        backmapped_filenames, backmapped_minimised_filenames, original_filenames, cg_filenames = backmapping.backmap(
            optimise_backbone=False,
            tolerance=500., # Value in Kj/(mol nm). Threshold to stop energy minimisation.
        )

        print(f"Backmapped filenames: {backmapped_filenames}")
        if len(backmapped_minimised_filenames) > 0:
            print(f"Backmapped minimised filenames: {backmapped_minimised_filenames}")
        print(f"Original filenames: {original_filenames}")

    # Link the button click event to the function
    run_button.on_click(run_inference)

    # Step 2: Display the widgets
    display(
        config_dropdown_w,
        device_dropdown_w,
        mapping_dropdown_w,
        custom_mapping_w,
        input_w,
        inputtraj_w,
        isatomistic_w,
        selection_w,
        output_w,
        run_button,
        output_area,
    )

interactive_run_backmapping()