# Search AiiDA Database for Nanoribbons

In [None]:
from aiida import load_dbenv, is_dbenv_loaded
from aiida.backends import settings
if not is_dbenv_loaded():
    load_dbenv(profile=settings.AIIDADB_PROFILE)
    
from aiida.orm.querybuilder import QueryBuilder
from aiida.orm.calculation.work import WorkCalculation
from aiida.orm.calculation.job import JobCalculation

from base64 import b64encode
import StringIO
import numpy as np
import ipywidgets as ipw
import matplotlib.pyplot as plt
from IPython.display import display, clear_output

from ase.data import covalent_radii, atomic_numbers
from ase.data.colors import cpk_colors
from ase.neighborlist import NeighborList

In [None]:
############################   START OF PREPROCESSING   ###############################

In [None]:
PREPROCESS_VERSION = 6.002

def preprocess_newbies():
    qb = QueryBuilder()
    qb.append(WorkCalculation, filters={
        'attributes._process_label': 'NanoribbonWorkChain',
        'or':[
               {'extras': {'!has_key': 'preprocess_version'}},
               {'extras.preprocess_version': {'<': PREPROCESS_VERSION}},
           ],
    })
    
    
    for m in qb.all(): # iterall() would interfere with set_extra()
        n = m[0]
        if not n.is_sealed:
            print("Skipping underway workchain PK %d"%n.pk)
            continue
        try:
            preprocess_one(n)
            n.set_extra('preprocess_successful', True)
            n.set_extra('preprocess_version', PREPROCESS_VERSION)
            print("Preprocessed PK %d"%n.pk)
        except Exception as e:
            n.set_extra('preprocess_successful', False)
            n.set_extra('preprocess_error', str(e))
            n.set_extra('preprocess_version', PREPROCESS_VERSION)
            print("Failed to preprocess PK %d: %s"%(n.pk, e))

In [None]:
def preprocess_one(workcalc):
   
    def get_calc_by_label(workcalc, label):
        qb = QueryBuilder()
        qb.append(WorkCalculation, filters={'uuid':workcalc.uuid})
        qb.append(JobCalculation, output_of=WorkCalculation, filters={'label':label})
        if qb.count() != 1:
            raise(Exception("Could not find %s calculation."%label))
        calc = qb.first()[0]
        return calc

    # formula
    structure = workcalc.inp.structure
    ase_struct = structure.get_ase()
    formula = ase_struct.get_chemical_formula()
    workcalc.set_extra('formula', formula)
    workcalc.set_extra('structure_description', structure.description)
    
    # thumbnail
    thumbnail = render_thumbnail(ase_struct)
    workcalc.set_extra('thumbnail', thumbnail)
    
    # ensure all steps succeed
    all_steps = ['cell_opt1', 'cell_opt2', 'scf', 'export_hartree', 'bands', 'export_pdos', 'bands_lowres', 'export_orbitals']
    if any([k.name[-1].isdigit() for k in structure.kinds]): # magnetization ?
        all_steps.append('export_spinden')
    for label in all_steps:
        calc = get_calc_by_label(workcalc, label)
        if calc.get_state() != 'FINISHED':
            raise(Exception("Calculation %s in state %s."%(label, calc.get_state())))
        if "aiida.out" not in calc.out.retrieved.get_folder_list():
            raise(Exception("Calculation %s did not retrive aiida.out"%label))
        fn = calc.out.retrieved.get_abs_path("aiida.out")
        content = open(fn).read()
        if "JOB DONE." not in content:
            raise(Exception("Calculation %s did not print JOB DONE."%label))
    
    # energies
    scf_calc = get_calc_by_label(workcalc, "scf")
    assert scf_calc.res['fermi_energy_units'] == 'eV'
    fermi_energy = scf_calc.res['fermi_energy']
    assert scf_calc.res['energy_units'] == 'eV'
    workcalc.set_extra('total_energy', scf_calc.res['energy'])
    workcalc.set_extra('opt_structure_uuid', scf_calc.inp.structure.uuid)
    
    # magnetization
    res = scf_calc.out.output_parameters
    workcalc.set_extra('absolute_magnetization', res.get_attr('absolute_magnetization', 0.0))
    workcalc.set_extra('total_magnetization', res.get_attr('total_magnetization', 0.0))
    
    # HOMO, LUMO, and Gap
    bands_calc = get_calc_by_label(workcalc, "bands")
    bands = bands_calc.out.output_band
    parts = find_bandgap(bands, fermi_energy=fermi_energy)
    is_insulator, gap, homo, lumo = find_bandgap(bands, fermi_energy=fermi_energy)
    workcalc.set_extra('is_insulator', is_insulator)
    workcalc.set_extra('gap', gap)
        
    # vacuum level
    export_hartree_calc = get_calc_by_label(workcalc, "export_hartree")
    fn = export_hartree_calc.out.retrieved.get_abs_path("vacuum_hartree.dat")
    data = np.loadtxt(fn)
    vacuum_level = np.mean(data[:,2]) * 27.211385 * 0.5
    workcalc.set_extra('vacuum_level', vacuum_level)
    
    # store shifted energies
    workcalc.set_extra('fermi_energy', fermi_energy - vacuum_level)
    if is_insulator:
        workcalc.set_extra('homo', homo - vacuum_level)
        workcalc.set_extra('lumo', lumo - vacuum_level)
    else:
        workcalc.set_extra('homo', fermi_energy - vacuum_level)
        workcalc.set_extra('lumo', fermi_energy - vacuum_level)

In [None]:
def render_thumbnail(ase_struct):
    s = ase_struct.repeat((2,1,1))
    cov_radii = [covalent_radii[a.number] for a in s]
    nl = NeighborList(cov_radii, bothways = True, self_interaction = False)
    nl.update(s)
    
    fig, ax = plt.subplots()
    ax.set_aspect(1)
    ax.axes.set_xlim([0,s.cell[0][0]])
    ax.axes.set_ylim([5,s.cell[1][1]-5])
    #ax.set_axis_bgcolor((0.423,0.690,0.933))
    ax.set_axis_bgcolor((0.85,0.85,0.85))
    ax.axes.get_yaxis().set_visible(False)

    #name = ase_struct.get_chemical_formula() # get name before repeat
    #ax.set_xlabel(name, fontsize=12)
    ax.tick_params(axis='x', which='both', bottom='off', top='off',labelbottom='off')
    
    for at in s:
        #circles
        x,y,z = at.position
        n = atomic_numbers[at.symbol]
        ax.add_artist(plt.Circle((x,y), covalent_radii[n]*0.5, color=cpk_colors[n], fill=True, clip_on=True))
        #bonds
        nlist = nl.get_neighbors(at.index)[0]
        for theneig in nlist:
            x,y,z = (s[theneig].position +  at.position)/2
            x0,y0,z0 = at.position
            if (x-x0)**2 + (y-y0)**2 < 2 :
                ax.plot([x0,x],[y0,y],color=cpk_colors[n],linewidth=2,linestyle='-')

    img = StringIO.StringIO()
    fig.savefig(img, format="png", dpi=72, bbox_inches='tight')
    return b64encode(img.getvalue())

In [None]:
def calc_vacuum_level(self):
    self.report("Calculating vacuum level")
    prev_calc = self.ctx.hartree
    assert(prev_calc.get_state() == 'FINISHED')

    fn = prev_calc.out.retrieved.get_abs_path("vacuum_hartree.dat")
    data = np.loadtxt(fn)
    vacuum_level = np.mean(data[:,2])
    self.report("Found vacuum level: %f"%vacuum_level)

    output_parameters = ParameterData(dict={'vacuum_level':vacuum_level})
    self.out("output_parameters", output_parameters)

In [None]:
def find_bandgap(bandsdata, number_electrons=None, fermi_energy=None):
    """
    Tries to guess whether the bandsdata represent an insulator.
    This method is meant to be used only for electronic bands (not phonons)
    By default, it will try to use the occupations to guess the number of
    electrons and find the Fermi Energy, otherwise, it can be provided
    explicitely.
    Also, there is an implicit assumption that the kpoints grid is
    "sufficiently" dense, so that the bandsdata are not missing the
    intersection between valence and conduction band if present.
    Use this function with care!

    :param (float) number_electrons: (optional) number of electrons in the unit cell
    :param (float) fermi_energy: (optional) value of the fermi energy.

    :note: By default, the algorithm uses the occupations array
      to guess the number of electrons and the occupied bands. This is to be
      used with care, because the occupations could be smeared so at a
      non-zero temperature, with the unwanted effect that the conduction bands
      might be occupied in an insulator.
      Prefer to pass the number_of_electrons explicitly

    :note: Only one between number_electrons and fermi_energy can be specified at the
      same time.

    :return: (is_insulator, gap), where is_insulator is a boolean, and gap a
             float. The gap is None in case of a metal, zero when the homo is
             equal to the lumo (e.g. in semi-metals).
    """

    def nint(num):
        """
        Stable rounding function
        """
        if (num > 0):
            return int(num + .5)
        else:
            return int(num - .5)

    if fermi_energy and number_electrons:
        raise ValueError("Specify either the number of electrons or the "
                         "Fermi energy, but not both")

    assert bandsdata.units == 'eV'
    stored_bands = bandsdata.get_bands()

    if len(stored_bands.shape) == 3:
        # I write the algorithm for the generic case of having both the
        # spin up and spin down array

        # put all spins on one band per kpoint
        bands = np.concatenate([_ for _ in stored_bands], axis=1)
    else:
        bands = stored_bands

    # analysis on occupations:
    if fermi_energy is None:

        num_kpoints = len(bands)

        if number_electrons is None:
            try:
                _, stored_occupations = bandsdata.get_bands(also_occupations=True)
            except KeyError:
                raise KeyError("Cannot determine metallicity if I don't have "
                               "either fermi energy, or occupations")

            # put the occupations in the same order of bands, also in case of multiple bands
            if len(stored_occupations.shape) == 3:
                # I write the algorithm for the generic case of having both the
                # spin up and spin down array

                # put all spins on one band per kpoint
                occupations = np.concatenate([_ for _ in stored_occupations], axis=1)
            else:
                occupations = stored_occupations

            # now sort the bands by energy
            # Note: I am sort of assuming that I have an electronic ground state

            # sort the bands by energy, and reorder the occupations accordingly
            # since after joining the two spins, I might have unsorted stuff
            bands, occupations = [np.array(y) for y in zip(*[zip(*j) for j in
                                                                [sorted(zip(i[0].tolist(), i[1].tolist()),
                                                                        key=lambda x: x[0])
                                                                 for i in zip(bands, occupations)]])]
            number_electrons = int(round(sum([sum(i) for i in occupations]) / num_kpoints))

            homo_indexes = [np.where(np.array([nint(_) for _ in x]) > 0)[0][-1] for x in occupations]
            if len(set(homo_indexes)) > 1:  # there must be intersections of valence and conduction bands
                return False, None, None, None
            else:
                homo = [_[0][_[1]] for _ in zip(bands, homo_indexes)]
                try:
                    lumo = [_[0][_[1] + 1] for _ in zip(bands, homo_indexes)]
                except IndexError:
                    raise ValueError("To understand if it is a metal or insulator, "
                                     "need more bands than n_band=number_electrons")

        else:
            bands = np.sort(bands)
            number_electrons = int(number_electrons)

            # find the zero-temperature occupation per band (1 for spin-polarized
            # calculation, 2 otherwise)
            number_electrons_per_band = 4 - len(stored_bands.shape)  # 1 or 2
            # gather the energies of the homo band, for every kpoint
            homo = [i[number_electrons / number_electrons_per_band - 1] for i in bands]  # take the nth level
            try:
                # gather the energies of the lumo band, for every kpoint
                lumo = [i[number_electrons / number_electrons_per_band] for i in bands]  # take the n+1th level
            except IndexError:
                raise ValueError("To understand if it is a metal or insulator, "
                                 "need more bands than n_band=number_electrons")

        if number_electrons % 2 == 1 and len(stored_bands.shape) == 2:
            # if #electrons is odd and we have a non spin polarized calculation
            # it must be a metal and I don't need further checks
            return False, None, None, None

        # if the nth band crosses the (n+1)th, it is an insulator
        gap = min(lumo) - max(homo)
        if gap == 0.:
            return False, 0., None, None
        elif gap < 0.:
            return False, gap, None, None
        else:
            return True, gap, max(homo), min(lumo)

    # analysis on the fermi energy
    else:
        # reorganize the bands, rather than per kpoint, per energy level

        # I need the bands sorted by energy
        bands.sort()

        levels = bands.transpose()
        max_mins = [(max(i), min(i)) for i in levels]

        if fermi_energy > bands.max():
            raise ValueError("The Fermi energy is above all band energies, "
                             "don't know what to do")
        if fermi_energy < bands.min():
            raise ValueError("The Fermi energy is below all band energies, "
                             "don't know what to do.")

        # one band is crossed by the fermi energy
        if any(i[1] < fermi_energy and fermi_energy < i[0] for i in max_mins):
            return False, 0., None, None

        # case of semimetals, fermi energy at the crossing of two bands
        # this will only work if the dirac point is computed!
        elif (any(i[0] == fermi_energy for i in max_mins) and
                  any(i[1] == fermi_energy for i in max_mins)):
            return False, 0., None, None
        # insulating case
        else:
            # take the max of the band maxima below the fermi energy
            homo = max([i[0] for i in max_mins if i[0] < fermi_energy])
            # take the min of the band minima above the fermi energy
            lumo = min([i[1] for i in max_mins if i[1] > fermi_energy])

            gap = lumo - homo
            if gap <= 0.:
                raise Exception("Something wrong has been implemented. "
                                "Revise the code!")
            return True, gap, homo, lumo


In [None]:
############################   END OF PREPROCESSING   ###############################

In [None]:
def search():

    results.value = "preprocessing..."
    preprocess_newbies()
    
    results.value = "searching..."
    
    # html table header
    html  = '<style>#aiida_results td,th {padding: 2px}</style>' 
    html += '<form action="compare.ipynb" method="get" target="_blank">'
    html += '<table border=1 id="aiida_results" style="margin:10px;"><tr>'
    html += '<th></th>'
    html += '<th>PK</th>'
    html += '<th>Creation Time</th>'
    html += '<th>Formula</th>'
    html += '<th>HOMO</th>'
    html += '<th>LUMO</th>'
    html += '<th>GAP</th>'
    html += '<th>Fermi Energy</th>'
    html += '<th>Total Mag.</th>'
    html += '<th>Abs Mag.</th>'
    html += '<th>Structure</th>'
    html += '<th></th>'
    html += '</tr>'

    # query AiiDA database
    filters = {}
    filters['attributes._process_label'] = 'NanoribbonWorkChain'
    filters['extras.preprocess_version'] = PREPROCESS_VERSION
    filters['extras.preprocess_successful'] = True
    
    pk_list = inp_pks.value.strip().split()
    if pk_list:
        filters['id'] = {'in': pk_list}
        
    formula_list = inp_formula.value.strip().split()
    if inp_formula.value:
        filters['extras.formula'] = {'in': formula_list}

    def add_range_filter(bounds, label):
        filters['extras.'+label] = {'and':[{'>=':bounds[0]}, {'<':bounds[1]}]}

    add_range_filter(inp_gap.value, "gap")
    add_range_filter(inp_homo.value, "homo")
    add_range_filter(inp_lumo.value, "lumo")
    add_range_filter(inp_efermi.value, "fermi_energy")
    add_range_filter(inp_tmagn.value, "total_magnetization")
    add_range_filter(inp_amagn.value, "absolute_magnetization")
    
    qb = QueryBuilder()        
    qb.append(WorkCalculation, filters=filters)
    qb.order_by({WorkCalculation:{'ctime':'desc'}})

    for i, node_tuple in enumerate(qb.iterall()):
        node = node_tuple[0]
        thumbnail = node.get_extra('thumbnail')
        description = node.get_extra('structure_description')
        opt_structure_uuid = node.get_extra('opt_structure_uuid')
        
        # append table row
        html += '<tr>'
        html += '<td><input type="checkbox" name="pk" value="%s"></td>'%node.pk
        html += '<td>%d</td>' % node.pk
        html += '<td>%s</td>' % node.ctime.strftime("%Y-%m-%d %H:%M")
        html += '<td>%s</td>' % node.get_extra('formula')
        html += '<td>%f</td>' % node.get_extra('homo')
        html += '<td>%f</td>' % node.get_extra('lumo')
        html += '<td>%f</td>' % node.get_extra('gap')
        html += '<td>%f</td>' % node.get_extra('fermi_energy')
        html += '<td>%f</td>' % node.get_extra('total_magnetization')
        html += '<td>%f</td>' % node.get_extra('absolute_magnetization')
        html += '<td><a target="_blank" href="../export_structure.ipynb?uuid=%s">'%opt_structure_uuid
        html += '<img src="data:image/png;base64,%s" title="%s"></a></td>' % (thumbnail, description)
        html += '<td><a target="_blank" href="./show.ipynb?pk=%s">Show</a><br>'%node.pk
        html += '<a target="_blank" href="./show_pdos.ipynb?pk=%s">PDOS</a></td>'%node.pk
        html += '</tr>'

    html += '</table>'
    html += 'Found %d matching entries.<br>'%qb.count()
    html += '<input type="submit" value="Compare">'
    html += '</form>'

    results.value = html

In [None]:
# search UI
style = {"description_width":"100px"}
layout = ipw.Layout(width="592px")
inp_pks = ipw.Text(description='PKs', placeholder='e.g. 4062 4753 (space separated)', layout=layout, style=style)
inp_formula = ipw.Text(description='Formulas:', placeholder='e.g. C44H16 C36H4', layout=layout, style=style)

def slider(desc, min, max):
    return ipw.FloatRangeSlider(description=desc, min=min, max=max, 
                                    value=[min, max], step=0.05, layout=layout, style=style)

inp_gap = slider("Gap:", 0.0, +3.0)
inp_homo = slider("HOMO:", -6.0, -1.0)
inp_lumo = slider("LUMO:", -5.0, -1.0)
inp_efermi = slider("Fermi Energy:", -6.0, -1.0)
inp_tmagn = slider("Total Magn.:", -6.0, +6.0)
inp_amagn = slider("Abs. Magn.:", 0.0, +20.0)
search_crit = [inp_pks, inp_formula, inp_gap, inp_homo, inp_lumo, inp_efermi, inp_tmagn, inp_amagn]

In [None]:
def on_click(b):
    with info_out:
        clear_output()
        search()

button = ipw.Button(description="Search")
button.on_click(on_click)
results = ipw.HTML()
info_out = ipw.Output()
app = ipw.VBox(children=search_crit + [button, results, info_out])
display(app)