In [5]:
import utils
import ipywidgets as ipw
import shutil
from IPython.display import display

from rdkit import Chem,DataStructs
from rdkit.Chem import AllChem
import csv
import pathlib as pl

In [6]:
OPENBIS_MOLECULES = None
OPENBIS_SESSION, _ = utils.connect_openbis()

In [None]:
class Molecule:
    def __init__(self, name, strIn):
        self.name = name
        self.strIn = strIn
        try:
            self.mol = Chem.rdmolfiles.MolsFromCDXML(strIn)
        except RuntimeError:
            self.mol = Chem.MolFromSmiles(strIn)
#        else:
#            self.mol = mols[1]
#            for i in range(1,len(mols)):
#                self.mol = Chem.CombineMols(self.mol,mols[i])

    
    # create chemical fingerprint of molecule
    def fingerprint(self, fpgen, **kwargs):
        self.fp = fpgen.GetFingerprint(self.mol, customAtomInvariants=self.get_invariant(**kwargs))

    # definition of different invariants to be used for fingerprinting
    def get_invariant(self, BCN=False, BrI=False, Topology=False):
        inv = [atom.GetAtomicNum() for atom in self.mol.GetAtoms()]
        if BCN: # do not differentiate between Boron, Carbon and Nitrogen
            inv = list(map(lambda x: 6 if x == 5 or x == 7 else x, inv))
        if BrI: # do not differentiate between Iodine and Bromine
            inv = list(map(lambda x: 35 if x == 53 else x, inv))
        if Topology: # do not differentiate between any elements
            inv = [1]*len(inv)
        return inv
    
    # use tanimoto similarity to compare self with refMol
    def tanimoto(self, refMol):
        return DataStructs.TanimotoSimilarity(self.fp,refMol.fp)
    
    # use dice similarity to compare self with refMol
    def dice(self, refMol):
        return DataStructs.DiceSimilarity(self.fp,refMol.fp)
    
def get_similar_molecules(openbis_session):
    mols={}
    for substance in openbis_session.get_objects(type = "SUBSTANCE"):
        subst_props = substance.props.all()
        subst_empa_number = subst_props.get("empa_number")
        subst_molecule_permid = subst_props.get("has_molecule")
        mol_smiles = openbis_session.get_object(subst_molecule_permid).props.all().get("smiles", None)
        if mol_smiles:
            mol_object = Molecule(subst_empa_number, mol_smiles)
            if mol_object.mol: # There are SMILES that cannot be converted using RDKit
                mols.update({subst_empa_number: mol_object})

    # select how to look for similarities
    BCNinv=True
    BrIinv=True
    Topology=False
    useBondTypes=False

    # choose fingerprint generator. Morgan seems to work best
    fpgen = AllChem.GetMorganGenerator(radius=3, useBondTypes=useBondTypes)
    #fpgen = AllChem.GetRDKitFPGenerator(minPath=1, maxPath=7)

    # create fingerprint for all molecules
    for key in mols:
        mols.get(key).fingerprint(fpgen, BrI=BrIinv, BCN=BCNinv, Topology=Topology)

    # define reference Molecule to which the list should be compared and create its fingerpint
    refSmile = smiles_textbox.value
    refMol = Molecule("Ref", refSmile)
    refMol.fingerprint(fpgen, BrI=BrIinv, BCN=BCNinv, Topology=Topology)

    # create and sort list of molecules with dice similarity to reference molecule
    searchList = [(mols.get(key), mols.get(key).dice(refMol)) for key in mols]
    searchList.sort(reverse=True, key=lambda x: x[1])

    molecule_widgets = []
    for i in range(5):
        mol, similarity = searchList[i+1]
        
        openbis_substances_dict = {substance.props.all()["empa_number"] : substance for substance in openbis_session.get_objects(type = "SUBSTANCE")}
        substance_molecule = openbis_session.get_object(openbis_substances_dict[mol.name].props.all()['has_molecule'])
        selected_molecule_name = substance_molecule.props.all().get("$name", None)
        selected_molecule_iupac_name = substance_molecule.props.all().get("iupac_name", None)
        selected_molecule_sum_formula = substance_molecule.props.all().get("sum_formula", None)
        selected_molecule_smiles = substance_molecule.props.all().get("smiles", None)
        molecule_description_string = ""
        molecule_description_string += f"Name: {selected_molecule_name}\n"
        molecule_description_string += f"Empa Number: {mol.name}\n"
        molecule_description_string += f"IUPAC Name: {selected_molecule_iupac_name}\n"
        molecule_description_string += f"Sum Formula: {selected_molecule_sum_formula}\n"
        molecule_description_string += f"SMILES: {selected_molecule_smiles}\n"
        molecule_description_string += f"Similarity: {similarity}\n"
        
        molecule_dataset = substance_molecule.get_datasets(type="ELN_PREVIEW")[0]
        fig = ipw.Image(value = utils.read_file("images/white_screen.jpg"), width = '200px', height = '300px', layout=ipw.Layout(border='solid 1px #cccccc'))
        
        if molecule_dataset:
            molecule_dataset.download(destination="images")
            material_image_filepath = molecule_dataset.file_list[0]
            fig.value = utils.read_file(f"images/{molecule_dataset.permId}/{material_image_filepath}")
            shutil.rmtree(f"images/{molecule_dataset.permId}/")
        
        textarea = utils.Textarea(disabled = True, layout = ipw.Layout(width = '600px', height = '200px'), value = molecule_description_string)
        molecule_widgets.append(ipw.HBox([textarea, fig]))

    return molecule_widgets

def search_molecule_in_openbis(openbis_session):
    openbis_molecules_smiles_dict = {molecule.props.all()["smiles"].lower() : molecule for molecule in openbis_session.get_objects(type = "MOLECULE")}
    selected_smiles = smiles_textbox.value.lower()
    if selected_smiles in openbis_molecules_smiles_dict:
        selected_molecule = openbis_molecules_smiles_dict[selected_smiles]
        selected_molecule_name = selected_molecule.props.all().get("$name", None)
        selected_molecule_iupac_name = selected_molecule.props.all().get("iupac_name", None)
        selected_molecule_sum_formula = selected_molecule.props.all().get("sum_formula", None)
        selected_molecule_smiles = selected_molecule.props.all().get("smiles", None)
        molecule_description_string = ""
        molecule_description_string += f"Name: {selected_molecule_name}\n"
        molecule_description_string += f"IUPAC Name: {selected_molecule_iupac_name}\n"
        molecule_description_string += f"Sum Formula: {selected_molecule_sum_formula}\n"
        molecule_description_string += f"SMILES: {selected_molecule_smiles}\n"
        molecule_properties_textarea.value = molecule_description_string
        
        molecule_dataset = selected_molecule.get_datasets(type="ELN_PREVIEW")[0]

        if molecule_dataset:
            molecule_dataset.download(destination="images")
            material_image_filepath = molecule_dataset.file_list[0]
            molecule_image_box.value = utils.read_file(f"images/{molecule_dataset.permId}/{material_image_filepath}")
            shutil.rmtree(f"images/{molecule_dataset.permId}/")
        else:
            molecule_image_box.value = utils.read_file("images/white_screen.jpg")
    
    molecules_widgets = get_similar_molecules(openbis_session)
    similar_molecules_box.children = molecules_widgets

# Find similar molecules

In [None]:
smiles_textbox = utils.Text(description = "SMILES", disabled = False, layout = ipw.Layout(width = '950px'), placeholder = f"Write SMILES here...", style = {'description_width': "48px"})
search_molecules_button = utils.Button(description = '', disabled = False, button_style = '', tooltip = 'Search', icon = 'search', layout = ipw.Layout(width = '50px', height = '25px'))
openbis_session, _ = utils.connect_openbis()
search_molecules_button.on_click(lambda change: search_molecule_in_openbis(openbis_session))
display(ipw.VBox([smiles_textbox, search_molecules_button]))

## Molecule

In [None]:
molecule_properties_textarea = utils.Textarea(disabled = True, layout = ipw.Layout(width = '400px', height = '200px'))
molecule_image_box = utils.Image(value = open("images/white_screen.jpg", "rb").read(), format = 'jpg', width = '200px', height = '300px', layout=ipw.Layout(border='solid 1px #cccccc'))
molecule_metadata_boxes = ipw.HBox([molecule_properties_textarea, molecule_image_box])
display(molecule_metadata_boxes)

## Similar molecules

In [None]:
similar_molecules_box = ipw.VBox()
display(similar_molecules_box)