# Search AiiDA Database for Slab Models

In [None]:
from aiida import load_dbenv, is_dbenv_loaded
from aiida.backends import settings
if not is_dbenv_loaded():
    load_dbenv(profile=settings.AIIDADB_PROFILE)
    
from aiida.orm.querybuilder import QueryBuilder
from aiida.orm.calculation.work import WorkCalculation
from aiida.orm.calculation.job import JobCalculation
from aiida.orm import load_node

import ase.io
from base64 import b64encode
import StringIO
import numpy as np
import ipywidgets as ipw
import matplotlib.pyplot as plt
from IPython.display import display, clear_output

from tempfile import NamedTemporaryFile

In [None]:
############################   START OF PREPROCESSING   ###############################

In [None]:
PREPROCESS_VERSION = 0.83

def preprocess_newbies():
    qb = QueryBuilder()
    qb.append(WorkCalculation, filters={
        'attributes._process_label': 'SlabGeoOptWorkChain',
        'or':[
               {'extras': {'!has_key': 'preprocess_version'}},
               {'extras.preprocess_version': {'<': PREPROCESS_VERSION}},
           ],
    })
    
    
    for m in qb.all(): # iterall() would interfere with set_extra()
        n = m[0]
        if not n.is_sealed:
            print("Skipping underway workchain PK %d"%n.pk)
            continue
        if 'obsolete' not in n.get_extras():
            n.set_extra('obsolete', False)
        try:
            preprocess_one(n)
            n.set_extra('preprocess_successful', True)
            n.set_extra('preprocess_version', PREPROCESS_VERSION)
            print("Preprocessed PK %d"%n.pk)
        except Exception as e:
            n.set_extra('preprocess_successful', False)
            n.set_extra('preprocess_error', str(e))
            n.set_extra('preprocess_version', PREPROCESS_VERSION)
            print("Failed to preprocess PK %d: %s"%(n.pk, e))

In [None]:
def preprocess_one(workcalc):
   
    def get_calc_by_label(workcalc, label):
        qb = QueryBuilder()
        qb.append(WorkCalculation, filters={'uuid':workcalc.uuid})
        qb.append(JobCalculation, output_of=WorkCalculation, filters={'label':label})
        #qb.order_by({'calc':[{'id':{'order':'desc'}}]})
        if qb.count() == 0:
            raise(Exception("Could not find %s calculation."%label))
        calc = qb.all()[0][0]
        return calc

    # formula
    structure = workcalc.inp.structure
    ase_struct = structure.get_ase()
    first_slab_atom = np.argwhere(ase_struct.numbers == 79)[0, 0]
    last_slab_atom = len(ase_struct.numbers)
    mol_formula = ase_struct[:first_slab_atom].get_chemical_formula()
    slab_formula = ase_struct[first_slab_atom:].get_chemical_formula()
    workcalc.set_extra('formula', '{}_slab({})'.format(mol_formula, slab_formula))
    workcalc.set_extra('structure_description', structure.description)
    
    # optimized structure
    geopt_calc = get_calc_by_label(workcalc, "slab_geo_opt") # TODO deal with restarts, check final state
    opt_structure = geopt_calc.out.output_structure
    workcalc.set_extra('opt_structure_uuid', geopt_calc.out.output_structure.uuid)
    workcalc.set_extra('energy', geopt_calc.res.energy)

    # thumbnail
    thumbnail = render_thumbnail(ase_struct)
    workcalc.set_extra('thumbnail', thumbnail)
    
    
#     # ensure all steps succeed
#     all_steps = ['cell_opt1', 'cell_opt2', 'scf', 'export_hartree', 'bands', 'export_pdos', 'bands_lowres', 'export_orbitals']
#     if any([k.name[-1].isdigit() for k in structure.kinds]): # magnetization ?
#         all_steps.append('export_spinden')
#     for label in all_steps:
#         calc = get_calc_by_label(workcalc, label)
#         if calc.get_state() != 'FINISHED':
#             raise(Exception("Calculation %s in state %s."%(label, calc.get_state())))
#         if "aiida.out" not in calc.out.retrieved.get_folder_list():
#             raise(Exception("Calculation %s did not retrive aiida.out"%label))
#         fn = calc.out.retrieved.get_abs_path("aiida.out")
#         content = open(fn).read()
#         if "JOB DONE." not in content:
#             raise(Exception("Calculation %s did not print JOB DONE."%label))

In [None]:
def render_thumbnail(atoms):
    tmp = NamedTemporaryFile()
    ase.io.write(tmp.name, atoms, format='png') # does not accept StringIO
    raw = open(tmp.name).read()
    tmp.close()
    return b64encode(raw)

In [None]:
def preprocess_spm_calcs():
    qb = QueryBuilder()
    qb.append(WorkCalculation, filters={
        'attributes._process_label': {'in': ['STMWorkChain', 'PdosWorkChain', 'AfmWorkChain']},
        'or':[
               {'extras': {'!has_key': 'preprocess_version'}},
               {'extras.preprocess_version': {'<': PREPROCESS_VERSION}},
           ],
    })
    
    for m in qb.all():
        n = m[0]
        ## ---------------------------------------------------------------
        ## calculation not finished
        if not n.is_sealed:
            print("Skipping underway workchain PK %d"%n.pk)
            continue
        calc_states = [out.get_state() for out in n.get_outputs()]
        if 'WITHSCHEDULER' in calc_states:
            print("Skipping underway workchain PK %d"%n.pk)
            continue
        ## ---------------------------------------------------------------
            
        if 'obsolete' not in n.get_extras():
            n.set_extra('obsolete', False)
            
        try:
            if n.get_attrs()['_process_label'] == 'STMWorkChain':
                preprocess_one_stm(n)
                print("Preprocessed PK %d (STM)"%n.pk)
            elif n.get_attrs()['_process_label'] == 'PdosWorkChain':
                preprocess_one_pdos(n)
                print("Preprocessed PK %d (PDOS)"%n.pk)
            elif n.get_attrs()['_process_label'] == 'AfmWorkChain':
                preprocess_one_afm(n)
                print("Preprocessed PK %d (AFM)"%n.pk)
                
            n.set_extra('preprocess_successful', True)
            n.set_extra('preprocess_version', PREPROCESS_VERSION)
            
        except Exception as e:
            n.set_extra('preprocess_successful', False)
            n.set_extra('preprocess_error', str(e))
            n.set_extra('preprocess_version', PREPROCESS_VERSION)
            print("Failed to preprocess PK %d: %s"%(n.pk, e))

def preprocess_one_stm(workcalc):
    
    if len(workcalc.get_outputs()) < 3:
        raise(Exception("stm_image never started."))
        
    stm_image_calc = workcalc.get_outputs()[2]
    if stm_image_calc.get_state() != 'FINISHED':
        raise(Exception("Calculation stm_image in state %s."%(stm_image_calc.get_state())))
        
    if "sts.npz" not in stm_image_calc.out.retrieved.get_folder_list():
         raise(Exception("Calculation stm_image did not retreive sts.npz"))
    
    structure = workcalc.inp.structure
    stm_numbers = [e for e in structure.get_extras() if e.startswith('stm')]
    stm_numbers = [int(e.split('_')[1]) for e in stm_numbers if e.split('_')[1].isdigit()]
    stm_pks = [e[1] for e in structure.get_extras().items() if e[0].startswith('stm')]
    if workcalc.pk in stm_pks:
        return
    stm_nr = 1
    if len(stm_numbers) != 0:
        for stm_nr in range(1, 100):
            if stm_nr in stm_numbers:
                continue
            break
    structure.set_extra('stm_%d_pk'%stm_nr, workcalc.pk)
    
def preprocess_one_pdos(workcalc):
    
    if len(workcalc.get_outputs()) < 5:
        raise(Exception("overlap never started."))
    
    if not all([calc.get_state() == 'FINISHED' for calc in workcalc.get_outputs()]):
        raise(Exception("Not all calculations are 'FINISHED'"))
    
    slab_scf = workcalc.get_outputs()[0]
    overlap = workcalc.get_outputs()[4]
    
    if "aiida-list1-1.pdos" not in slab_scf.out.retrieved.get_folder_list():
         raise(Exception("aiida-list1-1.pdos was not retrieved!"))
            
    if "overlap.npz" not in overlap.out.retrieved.get_folder_list():
         raise(Exception("overlap.npz was not retrieved!"))
    
    structure = workcalc.inp.slabsys_structure
    pdos_numbers = [e for e in structure.get_extras() if e.startswith('pdos')]
    pdos_numbers = [int(e.split('_')[1]) for e in pdos_numbers if e.split('_')[1].isdigit()]
    pdos_pks = [e[1] for e in structure.get_extras().items() if e[0].startswith('pdos')]
    if workcalc.pk in pdos_pks:
        return
    nr = 1
    if len(pdos_numbers) != 0:
        for nr in range(1, 100):
            if nr in pdos_numbers:
                continue
            break
    structure.set_extra('pdos_%d_pk'%nr, workcalc.pk)
    
def preprocess_one_afm(workcalc):
    
    if len(workcalc.get_outputs()) < 3:
        raise(Exception("afm never started."))
    
    if not all([calc.get_state() == 'FINISHED' for calc in workcalc.get_outputs()]):
        raise(Exception("Not all calculations are 'FINISHED'"))
    
    afm_pp = workcalc.get_outputs()[1]
    afm_2pp = workcalc.get_outputs()[2]
    
    if "df.npy" not in afm_pp.out.retrieved.get_folder_list():
         raise(Exception("df.npy was not retrieved!"))
            
    if "df.npy" not in afm_2pp.out.retrieved.get_folder_list():
         raise(Exception("df.npy was not retrieved!"))
            
    structure = workcalc.inp.structure
    afm_numbers = [e for e in structure.get_extras() if e.startswith('afm')]
    afm_numbers = [int(e.split('_')[1]) for e in afm_numbers if e.split('_')[1].isdigit()]
    afm_pks = [e[1] for e in structure.get_extras().items() if e[0].startswith('afm')]
    if workcalc.pk in afm_pks:
        return
    nr = 1
    if len(afm_numbers) != 0:
        for nr in range(1, 100):
            if nr in afm_numbers:
                continue
            break
    structure.set_extra('afm_%d_pk'%nr, workcalc.pk)
    

In [None]:
############################   END OF PREPROCESSING   ###############################

In [None]:
def search():

    results.value = "preprocessing..."
    preprocess_newbies()
    try:
        import apps.scanning_probe
        preprocess_spm_calcs()
    except:
        print("Warning: scanning_probe app not found, skipping spm preprocess.")
    
    results.value = "searching..."
    
    # html table header
    html  = '<style>#aiida_results td,th {padding: 2px}</style>' 
    html += '<table border=1 id="aiida_results" style="margin:10px;"><tr>'
    html += '<th>PK</th>'
    html += '<th>Creation Time</th>'
    html += '<th>Formula</th>'
    html += '<th>CalcName</th>'
    html += '<th>Energy</th>'
    html += '<th>Structure</th>'
    html += '<th>Extra calculations</th>'
    html += '</tr>'

    # query AiiDA database
    filters = {}
    filters['attributes._process_label'] = 'SlabGeoOptWorkChain'
    filters['extras.preprocess_version'] = PREPROCESS_VERSION
    filters['extras.preprocess_successful'] = True
    filters['extras.obsolete'] = False
    
    pk_list = inp_pks.value.strip().split()
    if pk_list:
        filters['id'] = {'in': pk_list}
        
    formula_list = inp_formula.value.strip().split()
    if inp_formula.value:
        filters['extras.formula'] = {'in': formula_list}
        
    if len(text_description.value) > 1:
        filters['description'] = {'like': '%{}%'.format(text_description.value)}

    qb = QueryBuilder()        
    qb.append(WorkCalculation, filters=filters)
    qb.order_by({WorkCalculation:{'ctime':'desc'}})

    for i, node_tuple in enumerate(qb.iterall()):
        node = node_tuple[0]
        thumbnail = node.get_extra('thumbnail')
        description = node.get_extra('structure_description')
        opt_structure_uuid = node.get_extra('opt_structure_uuid')
        
        ## Find all extra calculations done on the optimized geometry
        extra_calc_links = ""
        opt_structure = load_node(opt_structure_uuid)
        
        path_to_stm_viewer = "../../scanning_probe/stm/view_stm.ipynb"
        path_to_pdos_viewer = "../../scanning_probe/pdos/view_pdos.ipynb"
        path_to_afm_viewer = "../../scanning_probe/afm/view_afm.ipynb"
        st_extras = opt_structure.get_extras()
        for key in sorted(st_extras.keys()):
            if key.startswith('stm'):
                nr = key.split('_')[1]
                stm_pk = st_extras[key]
                extra_calc_links += "<a target='_blank' href='%s?pk=%s'>STM %s</a><br />" %(path_to_stm_viewer, stm_pk, nr)
            if key.startswith('pdos'):
                nr = key.split('_')[1]
                pdos_pk = st_extras[key]
                extra_calc_links += "<a target='_blank' href='%s?pk=%s'>PDOS %s</a><br />" %(path_to_pdos_viewer, pdos_pk, nr)
            if key.startswith('afm'):
                nr = key.split('_')[1]
                afm_pk = st_extras[key]
                extra_calc_links += "<a target='_blank' href='%s?pk=%s'>AFM %s</a><br />" %(path_to_afm_viewer, afm_pk, nr)
        
        extra_calc_area = "<div id='wrapper' style='overflow-y:auto; height:100px; line-height:1.5;'> %s </div>" % extra_calc_links
        
        # append table row
        html += '<tr>'
        html += '<td>%d</td>' % node.pk
        html += '<td>%s</td>' % node.ctime.strftime("%Y-%m-%d %H:%M")
        html += '<td>%s</td>' % node.get_extra('formula')
        html += '<td>%s</td>' % node.description
        html += '<td>%f</td>' % node.get_extra('energy')
        html += '<td><a target="_blank" href="../export_structure.ipynb?uuid=%s">'%opt_structure_uuid
        html += '<img width="100px" src="data:image/png;base64,%s" title="%s"></a></td>' % (thumbnail, description)
        html += '<td>%s</td>' % extra_calc_area
        html += '</td>'
        html += '</tr>'

    html += '</table>'
    html += 'Found %d matching entries.<br>'%qb.count()

    results.value = html

In [None]:
# search UI
style = {"description_width":"100px"}
layout = ipw.Layout(width="592px")
inp_pks = ipw.Text(description='PKs', placeholder='e.g. 4062 4753 (space separated)', layout=layout, style=style)
inp_formula = ipw.Text(description='Formulas:', placeholder='e.g. C44H16 C36H4', layout=layout, style=style)
text_description = ipw.Text(description='Calculation Name: ',
                            placeholder='e.g. a great name.',
                            layout=layout, style=style)
search_crit = [inp_pks, inp_formula, text_description]

In [None]:
def on_click(b):
    with info_out:
        clear_output()
        search()

button = ipw.Button(description="Search")
button.on_click(on_click)
results = ipw.HTML()
info_out = ipw.Output()
app = ipw.VBox(children=search_crit + [button, results, info_out])
display(app)

In [None]:
# Table is built up with ipywidgets: easy to embed buttons, dropdowns, ...

#def search():
#
#    results.value = "preprocessing..."
#    preprocess_newbies()
#    
#    results.value = "searching..."
#    
#    headers_and_colwidths = [
#        ('PK', '40px'),
#        ('Creation Time', '120px'),
#        ('Formula', '200px'),
#        ('CalcName', '120px'),
#        ('Energy', '120px'),
#        ('Structure', '100px'),
#        ('Extra calculations', '140px')
#    ]
#
#    html_h_and_cw = [(ipw.HTML('<b>%s</b>' % h_cw[0]), h_cw[1]) for h_cw in headers_and_colwidths]
#    boxed_headers = [ipw.HBox([h_cw[0]], layout=ipw.Layout(border='0.1px solid', width=h_cw[1])) for h_cw in html_h_and_cw]
#    hboxes = []
#    hboxes.append(ipw.HBox(boxed_headers))
#    
#    # query AiiDA database
#    filters = {}
#    filters['attributes._process_label'] = 'SlabGeoOptWorkChain'
#    filters['extras.preprocess_version'] = PREPROCESS_VERSION
#    filters['extras.preprocess_successful'] = True
#    filters['extras.obsolete'] = False
#    
#    pk_list = inp_pks.value.strip().split()
#    if pk_list:
#        filters['id'] = {'in': pk_list}
#        
#    formula_list = inp_formula.value.strip().split()
#    if inp_formula.value:
#        filters['extras.formula'] = {'in': formula_list}
#        
#    if len(text_description.value) > 1:
#        filters['description'] = {'like': '%{}%'.format(text_description.value)}
#
#    qb = QueryBuilder()        
#    qb.append(WorkCalculation, filters=filters)
#    qb.order_by({WorkCalculation:{'ctime':'desc'}})
#
#    for i, node_tuple in enumerate(qb.iterall()):
#        node = node_tuple[0]
#        thumbnail = node.get_extra('thumbnail')
#        description = node.get_extra('structure_description')
#        opt_structure_uuid = node.get_extra('opt_structure_uuid')
#        
#        # Thumbnail with clickable export
#        html = '<td><a target="_blank" href="../export_structure.ipynb?uuid=%s">'%opt_structure_uuid
#        html += '<img width="100px" src="data:image/png;base64,%s" title="%s"></a></td>' % (thumbnail, description)
#        
#        # Buttons for extra calcs
#        
#        
#        row_content = [
#            ipw.HTML('%s' % node.pk),
#            ipw.HTML('%s' % node.ctime.strftime("%Y-%m-%d %H:%M")),
#            ipw.HTML('%s' % node.get_extra('formula')),
#            ipw.HTML('%s' % node.description),
#            ipw.HTML('%s' % node.get_extra('energy')),
#            ipw.HTML(html),
#            ipw.HTML('%s' % 'test')
#        ]
#        
#        boxed_row = [ipw.HBox([row_el], layout=ipw.Layout(border='0.1px solid', width=h_cw[1]))
#                     for row_el, h_cw in zip(row_content, headers_and_colwidths)]
#        
#        hboxes.append(ipw.HBox(boxed_row))
#        
#    results.children += tuple(hboxes)
#    
#def on_click(b):
#    with info_out:
#        clear_output()
#        search()
#
#button = ipw.Button(description="Search")
#button.on_click(on_click)
#results = ipw.VBox()
#info_out = ipw.Output()
#app = ipw.VBox(children=search_crit + [button, results, info_out])
#display(app)