# IUC04 demo: Uploading a pyiron job to openBIS + Coscine from the notebook


## Set up conda, clone git repos, and install packages if not present

- First a conda environment is set up - this resets the kernel and you need to run everything again (already in the first cell)
- Afterwards repos are cloned to the colab space and the branch working on colab is checked out.
- Finally an `environment.yml` file is written and used to upldate the conda environment with all required packages.


In [13]:
! if [[ "$(conda --version 2> /dev/null)" == "" ]]; then pip install -q condacolab; fi

import condacolab, os
condacolab.install()
os.environ['CONDA_DIR'] = '/usr/local/'
os.environ['AWS_REQUEST_CHECKSUM_CALCULATION'] =  "when_required"

✨🍰✨ Everything looks OK!


In [14]:
from google.colab import userdata, output
output.enable_custom_widget_manager()

In [15]:
import git

In [16]:
pyiron_rdm_url =  f"https://{userdata.get('pyiron_rdm')}@github.com/pyiron/pyiron_rdm.git"
pyiron_rdm_path = 'pyiron_rdm'
demo_repo_url = 'https://gitlab.mpcdf.mpg.de/pyiron/iuc04-demonstrator'
demo_repo_path = 'iuc04-demonstrator'
try:
  pyiron_rdm_git = git.Repo(pyiron_rdm_path)
except git.exc.NoSuchPathError:
  try:
    git.Repo.clone_from(pyiron_rdm_url, pyiron_rdm_path)
    pyiron_rdm_git = git.Repo(pyiron_rdm_path)
  except git.exc.GitCommandError as e:
    print(f"Error cloning repository: {e}")
try:
  git.Repo(demo_repo_path)
except git.exc.NoSuchPathError:
  try:
    git.Repo.clone_from(demo_repo_url, demo_repo_path)
  except git.exc.GitCommandError as e:
    print(f"Error cloning repository: {e}")


In [17]:
pyiron_rdm_git.git.checkout('dev_nsiemer')
pyiron_rdm_git.git.pull()

'Already up to date.'

In [18]:
with open('environment.yml', 'w') as f:
    f.write("""channels:
  - conda-forge
  - nodefaults
dependencies:
  - python=3.11
  - pyiron
  - pyiron_base=0.13.0
  - pyiron_atomistics=0.7.8
  - lammps
  - nglview
  - pyiron-data >=0.0.30
  - pybis=1.37.0
  - python-xxhash=3.5.0
  - iprpy-data
  - boto3=1.35
""")

In [19]:
! conda env update -n base -f environment.yml > install.log 2> install.err

# Run a pyiron calculation
We use a small LAMMPS minimization job of a Fe structure as a test case.

In [20]:
from pyiron import Project

In [21]:
pr = Project('test')

In [22]:
structure = pr.create.structure.bulk("Fe", cubic=True, a=2.845)
structure.plot3d()

NGLWidget()

In [23]:
job = pr.create.job.Lammps("lmp_atom", delete_existing_job=True)
job.structure = structure
job.potential = job.list_potentials()[68]
job.calc_minimize(ionic_force_tolerance=1e-8, style="fire")
job.executable = 'lmp -in control.inp'   # colab is running with root user causing issues with mpirun - not needed for 'normal' workflow
job.run()

The job lmp_atom was saved and received the ID: 1


In [24]:
job.output.energy_pot

array([-8.02939586, -8.02939586])

# Interact with the openBIS + Coscine infrastructure

## Log-in to OpenBIS-Coscine

In [25]:
import sys
sys.path.append(os.getcwd() + '/pyiron_rdm')  #
import ob.classic as ob_classic
import requests

In [26]:
# get write credentials for the S3 bucket.
with requests.get('https://datashare.mpcdf.mpg.de/public.php/webdav/', auth=('a9xEyO5TUSgi9Ay', userdata.get('cfg_download_link_pw'))) as r:
    with open('test_sfb.cfg', 'wb') as f:
        f.write(r.content)

In [29]:
o = ob_classic.openbis_login(url="https://openbis.imm.rwth-aachen.de/openbis/webapp/eln-lims/", username="siemer",
                  instance="sfb1394", s3_config_path="test_sfb.cfg")

Enter openBIS password: ··········


## Upload the pyiron job to openBIS + Coscine
- Now we process the pyiron job to receive the conceptual dictionaries, a highly annotated json-ld data format.
- validate these with respect to the data on the OpenBIS instance
- and upload the metadata to OpenBIS and the data to Coscine-S3

In [84]:
cdicts_to_validate, proceed, job_type, upload_final_struct, datamodel = ob_classic.create_conceptual_dicts_for_upload(job, o, collection="CRC1394_PYIRON_UPLOAD_DEMO")

DEBUG:pyiron_log:Not supported parameter used!


In [85]:
def sanitize_cdict(cdict, debug=0):
  import numpy as np
  debug_n = debug + 1 if debug else debug
  if isinstance(cdict, list):
    return [sanitize_cdict(c, debug=debug_n) for c in cdict]
  if isinstance(cdict, dict):
    for key, value in cdict.items():
      if isinstance(value, (list, dict)):
        cdict[key] = sanitize_cdict(value, debug=debug_n)
      elif isinstance(value, (np.float32, np.float64, np.float16)):
        cdict[key] = float(value)
      elif isinstance(value, (np.int32, np.int16, np.int64, np.int8)):
        cdict[key] = int(value)
      elif isinstance(value, np.ndarray):
        cdict[key] = value.tolist()
      elif isinstance(value, np.str_):
        cdict[key] = str(value)
      elif isinstance(value, np.bool_):
        cdict[key] = bool(value)
      if isinstance(cdict[key], float):
        if int(cdict[key]) == cdict[key]:
          cdict[key] = int(cdict[key])
      if debug:
        print(debug*'   ', key, value, type(value), cdict[key])
  return cdict

In [86]:
corrected_cdicts = sanitize_cdict(cdicts_to_validate)

In [75]:
corrected_cdicts[1]['software'][0]['label'] = 'LAMMPS20220623'    # Needed only on colab due to the 'lmp -in control.inp' above
for job_detail in corrected_cdicts[1]['job_details']:             # redefine the host to be google_colab
  if job_detail['label'] == 'host':
    job_detail['value'] = 'GOOGLE_COLAB'

In [76]:
validated_cdict = ob_classic.validate_cdicts_to_upload(o, space="CRC1394", project="CRC1394_DEMO", collection="CRC1394_PYIRON_UPLOAD_DEMO", cdicts_to_validate=corrected_cdicts)

In [104]:
ob_classic.upload_classic_pyiron_from_cdicts(o, space="CRC1394", project="CRC1394_DEMO", collection="CRC1394_PYIRON_UPLOAD_DEMO", validated_to_upload=validated_cdict,datamodel=datamodel, job_type=job_type, upload_final_struct=upload_final_struct)


Object with name lmp_atom_structure already exists! Found object(s) in: ['/CRC1394/CRC1394_DEMO/SAMPLE12253']



Object with name lmp_atom already exists! Found object(s) in: ['/CRC1394/CRC1394_DEMO/PYIRON_JOB_LAMMPS12254']



Object with name lmp_atom_final_structure already exists! Found object(s) in: ['/CRC1394/CRC1394_DEMO/SAMPLE12255']




## Download and work with pyiron job from openBIS/Coscine

Uses the openBIS connection defined in the last section do download a pyiron hdf5 file and load it into a dataclass.

In [89]:
from h5io_browser import read_dict_from_hdf
from pyiron_dataclasses.v1.converter import get_dataclass


In [98]:
pyiron_job_on_ob = o.get_objects(project='CRC1394_DEMO',collection="CRC1394_PYIRON_UPLOAD_DEMO", type="PYIRON_JOB_LAMMPS", where={'atom_f_tol_in_ev_a':	1.0E-8})[0]

In [99]:
pyiron_job_h5 = pyiron_job_on_ob.get_datasets(type='PYIRON_HDF5')[0]

In [100]:
pyiron_job_h5=o.get_dataset(pyiron_job_h5.permId)

In [101]:
pyiron_job_h5.download(destination='some')

In [102]:
job_dict=read_dict_from_hdf(
        file_name='some/20250704084444846-140934/original/2025-07-04T08-44-43.046901_PYIRON_HDF5_siemer_lmp_atom.h5',
        h5_path="/",
        recursive=True,
        slash='ignore',
    )


In [103]:
job_classes = get_dataclass(job_dict['lmp_atom'])
job_classes

LammpsJob(calculation_input=LammpsInput(generic_dict=GenericDict(restart_file_list=[], restart_file_dict={}, exclude_nodes_hdf=[], exclude_groups_hdf=[]), interactive=Interactive(interactive_flush_frequency=10000, interactive_write_frequency=10000), generic=GenericInput(calc_mode='minimize', structure='atoms', fix_symmetry=None, k_mesh_spacing=None, k_mesh_center_shift=None, reduce_kpoint_symmetry=None, restart_for_band_structure=None, path_name=None, n_path=None, fix_spin_constraint=None, max_iter=100000, temperature=None, n_ionic_steps=None, n_print=None, temperature_damping_timescale=None, pressure_damping_timescale=None, time_step=None), structure=Structure(dimension=3, indices=array([0, 0]), info={}, positions=array([[0.    , 0.    , 0.    ],
       [1.4225, 1.4225, 1.4225]]), species=['Fe'], cell=Cell(cell=array([[2.845, 0.   , 0.   ],
       [0.   , 2.845, 0.   ],
       [0.   , 0.   , 2.845]]), pbc=array([ True,  True,  True])), units=Units(length='A', mass='u')), potential=Lam