## This notebook creates batch files used to submit jobs on the cluster

### Step 1: Replace accordingly the following example values in the strings for some main parameters.

In [1]:
job_name='BIFROST_ML'            # name of job to be submitted
partition='partition_name'       # name of partition
cpus='number_of_cpus'            # number of cpus in partition
data_path='/path/to/save/data/'  # path where data will be saved
mcstas_folder_base='data_set'    # base name of data folders
ncount="1E8"                     # number of neutrons

In [2]:
import numpy as np
from math import *

def make_base_batch_file(job_name, name, partition=partition, cpus=cpus, min_nodes=1, max_nodes=1):
    return f"""#!/bin/bash
#SBATCH --job-name={job_name}
#SBATCH --error=err_empty_{name}.stderr
#SBATCH --output=out_empty_{name}.stdout
#SBATCH --nodes {min_nodes}-{max_nodes}
#SBATCH --partition {partition}
#SBATCH --time=23:59:00
# the --exclusive is needed when running OpenMPI
# it will allocate 1x12 core per node
#SBATCH --exclusive

NUMCORES=`echo "$SLURM_NNODES {cpus} * p "| dc`

source /etc/profile.d/modules.sh
module avail
module purge
export OMPI_MCA_btl=^vader,tcp,openib
module load mcstas/3.3
module load gcc/10.2.0
module load openmpi/4.0_gcc1020

"""

def make_mcrun_line(mcstas_folder, n, instrument_name, parameters):
    line = f"mcrun -g --mpi=$NUMCORES -d {mcstas_folder} -n {n} {instrument_name} "

    params_str = ' '.join(f'{k}={v}' for k, v in parameters.items()) + "\n"
    line += params_str
    return line
    

def make_batch_file(instrument_name, parameter_dict_list, file_name='batch_file.sh', data_path=None, 
                    mcstas_folder_base=mcstas_folder_base, n=ncount, partition=partition, cpus=cpus,
                    min_nodes=1, max_nodes=1, only_print=True):
    
    batch_text = make_base_batch_file(job_name, mcstas_folder_base, partition, cpus=cpus, min_nodes=min_nodes, max_nodes=max_nodes)
    
    for index, parameter_dict in enumerate(parameter_dict_list):
        mcstas_folder = mcstas_folder_base + "_" + str(index)
        if data_path is not None:
            mcstas_folder = data_path + mcstas_folder
        mcrun_line = make_mcrun_line(instrument_name=instrument_name, parameters=parameter_dict,
                                     mcstas_folder=mcstas_folder, n=n)
        batch_text += mcrun_line
    
    if only_print:
        print("Would write this text into file called: " + file_name)
        print(batch_text)
    else:
        with open(file_name, 'w', newline='\n') as f:
            f.write(batch_text)
            
def make_job(instrument_name, file_name='batch_file.sh', data_path=None,
             mcstas_folder_base=mcstas_folder_base, n=ncount, partition=partition, cpus=cpus, 
             min_nodes=1, max_nodes=1, only_print=True, n_jobs=10):

    parameter_dict_list = []
    for index in range(n_jobs): # Sets how many mcrun commands to include in this file
        params = {}
        params["beam_div"] = np.random.uniform(20, 40)
        params["detector_rad"] = np.random.uniform(897E-3, 2) 
        params["sample_radius"] = np.random.uniform(1E-3, 5E-3) 
        params["sample_type"] = 0 #= np.random.randint(0, 2) # parameter could be 0 or 1 if crystal option should be available
    
        # Parameters that depend on earlier choices, such as list of materials
        if params["sample_type"]==0:       # powders
            params["material"] = np.random.choice(["Na2Ca3Al2F14", "I2", "Al2O3_sapphire", "H2O_ice_1h", "Y2O3", 
                                                   "Y3Fe5O12_YIG", "UO2", "Sn_beta", "B4C", "lsco_64", "V", "Vacuum"])
            params["wavelength"] = np.random.uniform(1, 9)
            params["sample_height"] = np.random.uniform(5E-3, 30E-3) 

        
        elif params["sample_type"]==1:     # crystals
            params["material"] = np.random.choice(["YBaCuO", "C_diamond", "C60", "Al2O3_sapphire_cr", "CaF2", 
                                                   "SiO2_quartza", "SiO2_quartzb", "Vacuum"])
            if params["material"] == "YBaCuO":
                params["d_value"] = 11.6777
            elif params["material"] == "C_diamond":
                params["d_value"] = 0.8917
            elif params["material"] == "C60":
                params["d_value"] = 7.02039
            elif params["material"] == "Al2O3_sapphire_cr":
                params["d_value"] = 2.1658
            elif params["material"] == "CaF2":
                params["d_value"] = 2.7315
            elif params["material"] == "SiO2_quartza":
                params["d_value"] = 4.25504
            elif params["material"] == "SiO2_quartzb":
                params["d_value"] = 4.33879

            if params["material"] != "Vacuum":
                l_max = calculate_wavelength(params["d_value"])
                if l_max > 9:
                    l_max = 9
            else: 
                l_max = 9
                
            params["wavelength"] = np.random.uniform(1, l_max)
            params["sample_height"] = params["sample_radius"]*2  
        
        params["d_wavelength"] = np.random.uniform(0.0025, 0.005)*params["wavelength"]
        
        parameter_dict_list.append(params) # Add this dictionary object to the list

    # The make_batch_file then gets a list of dicts per batch file.
    make_batch_file(instrument_name=instrument_name, parameter_dict_list=parameter_dict_list,
                    file_name=file_name, data_path=data_path, mcstas_folder_base=mcstas_folder_base, 
                    n=n, partition=partition, cpus=cpus, min_nodes=min_nodes, max_nodes=max_nodes, only_print=only_print)
    
def make_training_jobs(instrument_name, base_file_name='batch_file', data_path=data_path, 
                       mcstas_folder_base=mcstas_folder_base, n=ncount, partition=partition, cpus=cpus,
                       min_nodes=1, max_nodes=1, only_print=True, n_jobs=10, n_batch_files=10, start_batch=0, submit_all_no=0):
    
    submit_all = "#!/bin/bash\n"
    submit_all += "mcrun -c -g --mpi=2 -n 0 " + instrument_name + "\n"
    
    for index in range(n_batch_files):
        mcstas_folder_this_job = mcstas_folder_base + "_" + str(start_batch+index)
        batch_file_name = base_file_name + "_" + str(start_batch+index) + ".batch"
        submit_all += "sbatch " + batch_file_name + "\n"
        
        make_job(instrument_name=instrument_name,
                 file_name=batch_file_name, data_path=data_path, mcstas_folder_base=mcstas_folder_this_job,
                 n=n, partition=partition, min_nodes=min_nodes, max_nodes=max_nodes,
                 only_print=only_print)
        
        if only_print:
            print(submit_all)
        else:
            return

    with open("submit_all"+str(submit_all_no)+".sh", 'w', newline='\n') as f:
        f.write(submit_all)

### Step 2: Replace the following parameter values to generate or print the contents of the batch file.

###### only_print: True only prints the contents of the batch file. False generates batch files.
###### n_batch_files: Number of batch files to be generated. 
###### start_batch: Starting number in the title of first batch file. First is named batch_file_0.batch if n_batch_files=0 - increasing number in each following file
###### submit_all_no: Creates .sh numbered file to submit all batch files at once - increasing number in title

In [3]:
make_training_jobs(instrument_name="BIFROST_ML.instr", only_print=True, n_batch_files=1, start_batch=0, submit_all_no=0)

Would write this text into file called: batch_file_0.batch
#!/bin/bash
#SBATCH --job-name=BIFROST_ML
#SBATCH --error=err_empty_data_set_0.stderr
#SBATCH --output=out_empty_data_set_0.stdout
#SBATCH --nodes 1-1
#SBATCH --partition partition_name
#SBATCH --time=23:59:00
# the --exclusive is needed when running OpenMPI
# it will allocate 1x12 core per node
#SBATCH --exclusive

NUMCORES=`echo "$SLURM_NNODES number_of_cpus * p "| dc`

source /etc/profile.d/modules.sh
module avail
module purge
export OMPI_MCA_btl=^vader,tcp,openib
module load mcstas/3.3
module load gcc/10.2.0
module load openmpi/4.0_gcc1020

mcrun -g --mpi=$NUMCORES -d /path/to/save/data/data_set_0_0 -n 1E8 BIFROST_ML.instr beam_div=34.895583845237354 detector_rad=1.5696763728351244 sample_radius=0.001668940511322522 sample_type=0 material=UO2 wavelength=1.5587458286976945 sample_height=0.010675322623227419 d_wavelength=0.00569008240389979
mcrun -g --mpi=$NUMCORES -d /path/to/save/data/data_set_0_1 -n 1E8 BIFROST_ML.instr be

### Return to Step 1 to generate files with new parameters 