This tutorial will show how to setup a simulation database out of GROMACS simulations.

In [1]:
import os
import logging
import pandas as pd
from datetime import datetime

try:
    import simdb
except:
    import sys
    sys.path.append("../../../") # need this if labjournal is not installed

from simdb.utils.fileFinder import find_files
from simdb.databaseModel import *
from simdb.databaseAPI import *

# GROMACS specific imports
import simdb.utils.tpr_parser as tpr_parser

# LAMMPS specific imports
from simdb.utils import lammps_logfile_parser as llfp
from support.fileHandler import FileHandler


# Create database file

In [8]:
# database file
db = 'example_simulations.db'

# remove old DB file
if os.path.exists(db):
    os.remove(db)

engine = create_engine('sqlite:///{}'.format(db) , echo=False)

# Establishing a session
Session = sessionmaker(bind=engine)
session = Session()
setup_database(engine)


session.close()

# Add GROMACS simulations

## Find simulations

Before we can add simulations to our database, we have to find them. For this let's assume we have a folder structure like this:
```
simulations
  |-- sim_01
  |    |-- preproc (folder, contains stuff which was necessary to set up simulation)
  |    |-- topol.tpr
  |    |-- traj.xtc (optional)
  |    |-- md.log   (optional)
  |    *-- meta.csv
  *-- sim_02
       |-- topol.tpr
       |-- traj.xtc (optional)
       |-- md.log   (optional)
       *-- meta.csv
```
There are many different ways to organize files from GROMACS simulations, but let's start with this simple example. 

The `topol.tpr` file will be used to mark a **simulation** which will be then an entry in our database. With this rule, it is quite simple to find all simulations with:

In [None]:
SIMS = "../../example_simulations/GROMACS_simulations"

find_files(
    pattern = 'topol.tpr', 
    path = SIMS, 
    dir_ignore = ['preproc']
)

We can now find all simulations and already collect some information about them.

In [None]:
# ========================================= #
# Find files
# ========================================= #

SIM_IDS=[]  # each simulation has a unique ID, we will use the name of the folder
PATHS=[]    # path to each simulation
METAS=[]    # additional information for each simulation

for tpr_file in find_files(pattern = 'topol.tpr', path = SIMS, dir_ignore = ['data']):
    
    path =  os.path.dirname(tpr_file)
    sim_id = os.path.basename(path)
    
    # load additional data if available
    # this is a user generated file
    try:
        meta = pd.Series.from_csv(os.path.join(path, "meta.csv"))
    except IOError:
        meta = pd.Series()
    
    # add everything to lists
    SIM_IDS.append(sim_id)
    PATHS.append(path)
    METAS.append(meta)

print(SIM_IDS)

## Add simulations

In [None]:
# open database
session = connect_database(db_path=db)

# a dummy user
user = "test"

In [None]:
# ========================================= #
# GROMACS - specific settings
# ========================================= #

# GMXBIN="/home/soft/gromacs/gromacs-2018/inst/shared/bin/"
# GMXBIN="/home/soft/GROMACS/gromacs_2016.3_ompi-1.10.2_gcc-5.4/inst/oldcpu/bin/"
GMXBIN = !which gmx
GMXBIN = os.path.dirname(GMXBIN[0])

os.environ['GMXBIN'] = GMXBIN

In [None]:
# ========================================= #
# add entries
# ========================================= #

for i, entry_id in enumerate(SIM_IDS):
    
    path = PATHS[i]
    tpr_file = os.path.join(path, 'topol.tpr')
    
    # get information from TPR file
    try:
        mapped_keywords = tpr_parser.main(tpr_file)
    except:
        print("Was not able to read TPR file for {}. Try a newer GROMACS version.".format(entry_id))
        mapped_keywords = {}

    meta = METAS[i]
    try:
        description = meta['note']
        del meta['note']
    except KeyError:
        description = ""

    # use either created on from meta file or when TPR file was generated
    if 'created_on' in meta:
        created_on = meta['created_on']
        del meta['created_on']
    else:
        created_on = datetime.fromtimestamp(os.path.getmtime(tpr_file))

    # nice function to create a database object
    sim = store_dict(
        entry_id           = entry_id,
        path               = path,
        sim_type           = "GROMACS",
        description        = description,
        created_on         = created_on,
        owner              = user,
        raw_mdp_parameters = mapped_keywords,
        raw_keywords       = meta,
    )

    session.add(sim)
    session.flush()

# dont forget to commit and close session
session.commit()
session.close()

# Add LAMMPS simulations

## Find simulations

In [3]:
SIMS = "../../example_simulations/LAMMPS_simulations/"
user = "test"

# file finder settings for LAMMPS
kwargs_fileFinder = dict(
    pattern='_info_',
    path=SIMS,
    dir_ignore=['OLD', 'old', 'Old', 'TMP', 'tmp', 'rm', 'template', 'testcase', 'input_files'])

In [4]:
# ============================================= #
# find files
# ============================================= #

fileHandler = FileHandler()
SIM_IDS=[]
PATHS=[]
DATAS=[]

ERRORS=False
WARNINGS=False

for fname in find_files(**kwargs_fileFinder):
    data = fileHandler.get_data_from_file(fname)
    data['path']=os.path.dirname(fname)

    SIM_IDS.append(data['ID'])

    DATAS.append(data)
    PATHS.append(fname)

## Add simulations

In [9]:
# open database
session = connect_database(db_path=db)

In [10]:
for data in DATAS:
    sim = Main(
        entry_id = data['ID'],
        url = data['MEDIAWIKI'],
        owner = user,
        type='LAMMPS',
        path = data['path'],
        description = data['INFO'] if 'INFO' in data.keys() else ""
    )
    session.add(sim)
# session.commit()


# ============================================= #
# add keywords
# ============================================= #

for sim_id in SIM_IDS:
    sim = session.query(Main).filter(Main.entry_id == sim_id).one()
    sim.keywords.extend([
        Keywords(name='polymorph', value='calcite'),
        Keywords(name='solvation state', value='bulk'),
        Keywords(name='system state', value='crystalline'),
        Keywords(name='force field', value='Raiteri2015'),
    ])
    session.add(sim)
# session.commit()


# ============================================= #
# scan logfiles
# ============================================= #

for sim_id in SIM_IDS:
    sim = session.query(Main).filter(Main.entry_id == sim_id).one()

    logfiles = find_files(pattern='log.*.lammps',
                      path=sim.path,
                      dir_ignore=['build',
                                  'analysis',
                                  'EM_and_Equilibration'])

    logfiles.sort(key=lambda x: int(
        os.path.basename(x).replace('log.', '').replace('.lammps', '')
    ))

    dict_metagroups = llfp.logfile_to_metagroups(logfiles,
                                                 combine=True,
                                                 sort=False)

    for meta_group_name, meta_group_data in dict_metagroups.items():
        add_meta_data(session=session,
                          entry_id=sim.entry_id,
                          meta_group_name=meta_group_name,
                          **meta_group_data)

session.commit()
session.close()