<img style="float: right;" src="images/cell2mol_logo.png" width="500">

# cell2mol
Unit Cell to Molecule Interpretation

In [1]:
import numpy as np
from ipywidgets import interactive, widgets, HTML, Button
from typing import Callable, Any
import warnings
import os
import markdown
import codecs
import pickle
from IPython.display import display, FileLink
from contextlib import contextmanager
from io import StringIO 
import sys
import py3Dmol
sys.path.append('/usr/local/lib/python3.7/site-packages/')
import rdkit
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole
# Two imports are not needed due to visualization being disabled until nglview integration is fixed
#import ase.io
#import nglview

warnings.simplefilter('ignore')
html_description = markdown.markdown("""
cell2mol is able to analyze cif files containing crystallographic unit cell information and provide a chemical interpretation of the individual molecular species. Run the entire notebook and use the widget to proceed through the four tabs sequentially.""")
description=HTML(html_description)

pre_accordion = widgets.Accordion()
pre_accordion.children = [description]
pre_accordion.set_title(0, 'Description')
pre_accordion.selected_index = None
pre_accordion

Accordion(children=(HTML(value='<p>cell2mol is able to analyze cif files containing crystallographic unit cell…

In [2]:
# Import the core functionalities of cell2mol
from cell2mol.cif2info import cif_2_info
from cell2mol.c2m_module import save_cell, cell2mol

# Define some basic functionalities

class Capturing(list):
    def __enter__(self):
        self._stdout = sys.stdout
        sys.stdout = self._stringio = StringIO()
        return self
    def __exit__(self, *args):
        self.extend(self._stringio.getvalue().splitlines())
        del self._stringio    # free up some memory
        sys.stdout = self._stdout


class ExecutionButton(Button):
    def __init__(self, action: Callable[bool, Any], WidgetOut, **kwargs):
        super(ExecutionButton, self).__init__(**kwargs)
        self.action = action
        self.tooltip='Click me to run'
        self.description='Run'
        self.icon='fa-check'
        self.output = WidgetOut
        self.on_click(self.__on_click)
        
    @contextmanager
    def show_loading(self):
        self.description = 'Running...'
        self.icon = 'fa-stop'
        yield
        self.description = 'Ready to run again'
        self.icon = 'fa-check'
           
    def __on_click(self, b):
        with self.show_loading():
            toprint = self.action(run=True, out=self.output)
        with self.output :
            for line in toprint:
                print(line)

def printing_text (cell, output):
    dicts = {}
    list_show = []
    for idx, mol in enumerate(cell.moleclist):
        if mol.type == "Complex":        
            if mol.formula in dicts.keys():
                dicts[mol.formula] +=1 
            else:
                dicts[mol.formula] = 1
                list_show.append(idx)

    for idx, mol in enumerate(cell.moleclist):
        if mol.type == "Other":     
            if mol.formula in dicts.keys():
                dicts[mol.formula] +=1 
            else:
                dicts[mol.formula] = 1
                list_show.append(idx)

    for i in list_show:
        mol=cell.moleclist[i]
        if mol.type == "Complex": 
            output.extend([f"[Complex] Formula : {mol.formula}\t(occurence : {dicts[mol.formula]})"])
            output.extend([f"   total charge : {mol.totcharge}"])
            output.extend([""])

            if mol.hapticity == False :
                for met in mol.metalist:
                    output.extend([f"   >> Metal : {met.label}"])
                    output.extend([f"   Metal oxidation state : {met.totcharge}"])
                    output.extend([f"   coordination number: {met.coordination_number}"])
                    output.extend([f"   metal-coordinating atoms: {met.coordinating_atoms}"])
                    output.extend([f"   coordination geometry: {met.geometry}\t(*deviation value : {met.deviation})"])
                    output.extend(["   *deviation value: closer to 0, less distortion in a given geometry"])           
            else :
                    output.extend([f"   >> Metal : {met.label}"])
                    output.extend([f"   Metal oxidation state : {met.totcharge}"])                                
            output.extend([""])

            for lig in mol.ligandlist:
                output.extend([f"   >> Ligand Formula : {lig.formula}"])
                output.extend([f"   charge : {lig.totcharge}"])
                if lig.hapticity == True :
                    output.extend([f"   hapticity: {lig.hapttype}"])
                else : 
                    output.extend([f"   denticity: {lig.hapttype}"])
                output.extend([f"   smiles: {lig.smiles}"])
                output.extend([""])

        elif mol.type == "Other" :
            output.extend([f"[Other] Formula : {mol.formula}\t(occurence : {dicts[mol.formula]})"])
            output.extend([f"   charge: {mol.totcharge}"])
            output.extend([f"   smiles: {mol.smiles}"])

    return output
 
def printing_structure (natoms, labels, coord):
    if natoms >=2 :
        xyz = f'''{natoms}\n\n'''
        for l, c in zip(labels, coord):
              xyz += f'''{l} {c[0]} {c[1]} {c[2]}\n''' 
    #     print(xyz)     
        xyzview = py3Dmol.view(width=400,height=400)
        xyzview.addModel(xyz,'xyz')
        xyzview.setStyle({'stick':{}})
        xyzview.setBackgroundColor('0xeeeeee')
        xyzview.animate({'loop': 'backAndForth'})
        xyzview.zoomTo()
        xyzview.show()
    else :
        pass
    
def printing_structure_cell (cell, printing):
    dicts = {}
    list_show = []
    for idx, mol in enumerate(cell.moleclist):
        if mol.type == "Complex":        
            if mol.formula in dicts.keys():
                dicts[mol.formula] +=1 
            else:
                dicts[mol.formula] = 1
                list_show.append(idx)

    for idx, mol in enumerate(cell.moleclist):
        if mol.type == "Other":     
            if mol.formula in dicts.keys():
                dicts[mol.formula] +=1 
            else:
                dicts[mol.formula] = 1
                list_show.append(idx)

    for i in list_show:
        mol=cell.moleclist[i]
        if mol.type == "Complex": 
            print(f"[Complex] Formula : {mol.formula}\t(occurence : {dicts[mol.formula]})")
            print(f"   total charge : {mol.totcharge}")
            printing_structure(mol.natoms, mol.labels, mol.coord)
            print("")
            
            if mol.hapticity == False :
                for met in mol.metalist:
                    print(f"   >> Metal : {met.label}")
                    print(f"   metal oxidation state : {met.totcharge}")
                    print(f"   coordination number: {met.coordination_number}")
                    print(f"   metal-coordinating atoms: {met.coordinating_atoms}")
                    print(f"   coordination geometry: {met.geometry}\t(*deviation value : {met.deviation})")
                    print("   *deviation value: closer to 0, less distortion in a given geometry")
                    print("")
            else :
                    print(f"   >> Metal : {met.label}")
                    print(f"   metal oxidation state : {met.totcharge}")      
            print("")
                
            for lig in mol.ligandlist:
                print(f"   >> Ligand Formula : {lig.formula}")
                print(f"   charge : {lig.totcharge}")
                if lig.hapticity == True :
                    print(f"   hapticity: {lig.hapttype}")
                else :
                    print(f"   denticity: {lig.totmconnec}")
                print(f"   smiles: {lig.smiles}")
                print("")
                printing_structure(lig.natoms, lig.labels, lig.coord)


        elif mol.type == "Other" :
            print(f"[Other] Formula : {mol.formula}\t(occurence : {dicts[mol.formula]})")
            print(f"   charge: {mol.totcharge}")
            print(f"   smiles: {mol.smiles}")

            printing_structure(mol.natoms, mol.labels, mol.coord)    
    
#############################
# STEP 1: Choose and upload, define widgets
#############################

Widget1UploadConfiguration = widgets.FileUpload(
 accept='.cif',  # Accepted file extension e.g. '.cif'
 multiple=False,  # True to accept multiple files upload else False
 wait=True
)

examples = ('Transition metal complex', 'cell2mol/test/cif/AJEPIP.cif'),
widget_choose_filename = widgets.Dropdown(options=examples,description='Structure:')

Widget1Out = widgets.Output()
def chooseAndPlotConfigurationAltOutput(filename):
    Widget1Out.clear_output()
    #with Widget1Out:
    #    plot_from_file(filename)
        
Widget1ExampleConfiguration = interactive(chooseAndPlotConfigurationAltOutput, filename=widget_choose_filename)

# This function updates the dropdown list when a new file is uploaded
def updateWidget1ExampleConfiguration(*args):
    uploaded = next(iter(Widget1UploadConfiguration.value))
    cif_content = codecs.decode(uploaded['content'], encoding="utf-8")
    print(cif_content)
    cif_name = uploaded['name']
    mypath="./uploaded/"
    with open(mypath + cif_name, 'w') as f: 
        f.write(cif_content)
    found_files=[]
    for f in os.listdir(mypath):
        found_files.append(f)
    all_files = found_files
    widget_choose_filename.options = all_files
    

Widget1UploadConfiguration.observe(updateWidget1ExampleConfiguration,names='value')
Widget1ExamplesAndUpload = widgets.HBox([Widget1ExampleConfiguration,Widget1UploadConfiguration])
Widget1Text = HTML(markdown.markdown("""
Choose a structure from the examples or upload your own in .cif format [https://en.wikipedia.org/wiki/Crystallographic_Information_File].
"""))
Widget1 = widgets.VBox([Widget1Text,Widget1ExamplesAndUpload,Widget1Out])

Widget2Text = HTML(markdown.markdown("""
Will run cell2info on the cif file selected in the drop down menu in the previous tab. Should run almost instantaneously. When finished, output will be printed below, then go to next tab!
"""))
Widget2BottomText = HTML(markdown.markdown("""
cell2info output:
"""))
Widget2Out = widgets.Output(layout={'border': '1px solid black'})
Widget3Text = HTML(markdown.markdown("""
Will run cell2mol on the info file obtained in the previous tab from the cif file in the first tab. Takes some time to run. When finished, output will be printed below.
"""))
Widget3BottomText = HTML(markdown.markdown("""
cell2mol output:
"""))
Widget3Out = widgets.Output(layout={'border': '1px solid black'})

Widget4Text = HTML(markdown.markdown("""
Will display the cell2mol interpreted molecules in 3D.
"""))
Widget4Out = widgets.Output(layout={'border': '1px solid black'})
Widget4BottomText = HTML(markdown.markdown("""
"""))


Widget5Text = HTML(markdown.markdown("""
You can download the resulting cell file, from which you can run your own analysis.
"""))
Widget5BottomText = HTML(markdown.markdown("""
"""))
Widget5Out = widgets.Output(layout={'border': '1px solid black'})

#############################
# STEP 2: Run cell2info
#############################
def run_cell2info(run=False, out=None):
    if run :
        cif_name = widget_choose_filename.value
        if "/" in cif_name :
            cif_name = cif_name.split("/")[-1]
        refcode = cif_name.split("/")[-1].split(".")[-2]
        info_name = cif_name.replace('.cif', '.info')
        err_name = cif_name.replace('.cif', '.err')
        input_path = "./uploaded/" + cif_name
        info_path = "./uploaded/" + info_name
        error_path = "./uploaded/" + err_name
        cell_path = "./Cell_" + refcode +'.gmol'
        with Capturing() as output:
            cif_2_info(input_path, info_path, error_path)
        error = False
        with open(error_path, 'r') as err:
            for line in err.readlines():
                if "Error" in line:
                    output.extend([line])
                    error = True
        if error :
            output.extend([f"Parsing of .cif file {cif_name} failed due to the error above."])
        else :
            output.extend([f"Infofile {info_name} generated from {cif_name} succesfully."])
        return output

#############################
# STEP 3: Run cell2mol on infofile
#############################
def run_cell2mol(run=False, out=None):
    if run :
        cif_name = widget_choose_filename.value
        if "/" in cif_name :
            cif_name = cif_name.split("/")[-1]
        refcode = cif_name.split("/")[-1].split(".")[-2]
        info_name = cif_name.replace('.cif', '.info')
        err_name = cif_name.replace('.cif', '.err')
        input_path = "./uploaded/" + cif_name
        info_path = "./uploaded/" + info_name
        error_path = "./uploaded/" + err_name
        cell_path = "./Cell_" + refcode +'.gmol'
        output = []
        if os.path.exists(info_path):
            with Capturing() as output:
                cell = cell2mol(info_path, refcode, "./uploaded/", 3)
                save_cell(cell, 'gmol', "./uploaded/")
            # This line removes cell2mol output from printout, it should be extend not redefine
            output = [f"For input {cif_name}"]
            output = printing_text(cell, output)
        else:
            output.extend([f"Please, wait until cell2info has finished for this input. Could not find {info_name}."])
        return output

#############################
# STEP 4: Display structures
#############################    
    
def display_mol(run=False, out=None):
    if run :
        with Capturing() as output:
            path = find_gmol(run=run)
            cell = pickle.load(open(path, "rb"))
        with out:
            printing_structure_cell(cell,True)
    return output   

#############################
# STEP 5: Set up download of gmol files
#############################            
def find_gmol(run=False):
    if run :
        cif_name = widget_choose_filename.value
        if "/" in cif_name :
            cif_name = cif_name.split("/")[-1]
        refcode = cif_name.split("/")[-1].split(".")[-2]
        cell_path = "./uploaded/Cell_" + refcode +'.gmol'
        output = []
        if os.path.exists(cell_path):
            print(f"Found cell file in {cell_path}")
            return cell_path
        else:
            return None  
    else:
        return None

class DownloadFileLink(FileLink):
    html_link_str = "<a href='{link}' download={file_name}>{link_text}</a>"

    def __init__(self, path, file_name=None, link_text=None, *args, **kwargs):
        super(DownloadFileLink, self).__init__(path, *args, **kwargs)

        self.file_name = file_name or os.path.split(path)[1]
        self.link_text = link_text or self.file_name

    def _format_path(self):
        from html import escape

        fp = "".join([self.url_prefix, escape(self.path)])
        return "".join(
            [
                self.result_html_prefix,
                self.html_link_str.format(
                    link=fp, file_name=self.file_name, link_text=self.link_text
                ),
                self.result_html_suffix,
            ]
        )

def display_fl(run=False, out=None):
    if run :
        with Capturing() as output:
            path = find_gmol(run=run)
            filename = path.split("/")[-1]
            if path is not None:
                fl = DownloadFileLink(path, f"{filename}")
                print("Ready to download!")
            else:
                print("Download not ready.")
        with out:
            display(fl)
    return output



Widget2 = widgets.VBox([Widget2Text, ExecutionButton(run_cell2info, Widget2Out), Widget2BottomText, Widget2Out])          
Widget3 = widgets.VBox([Widget3Text, ExecutionButton(run_cell2mol, Widget3Out), Widget3BottomText, Widget3Out])
Widget4 = widgets.VBox([Widget4Text, ExecutionButton(display_mol, Widget4Out), Widget4BottomText, Widget4Out])
Widget5 = widgets.VBox([Widget5Text, ExecutionButton(display_fl, Widget5Out), Widget5BottomText, Widget5Out]) 

#############################
# Put widget in tab
#############################

children = [Widget1, Widget2, Widget3, Widget4, Widget5]
tab = widgets.Tab() #layout=widgets.Layout(width='800px', height='800px'))
tab.children = children
tab.set_title(0, 'Upload file')
tab.set_title(1, 'Run cell2info')
tab.set_title(2, 'Run cell2mol')
tab.set_title(3, 'Visualize molecules')
tab.set_title(4, 'Download cell file')

In [3]:
display(tab)

Tab(children=(VBox(children=(HTML(value='<p>Choose a structure from the examples or upload your own in .cif fo…

In [4]:
html_acknowledgments = markdown.markdown("""
* Funding from the NCCR MARVEL funded by the SNSF.
""")
acknowledgments=widgets.VBox([
    HTML(html_acknowledgments)
])

html_howtocite = markdown.markdown("""
Please cite the cell2mol paper~!
""")
howtocite=widgets.VBox([
    HTML(html_howtocite)
])

post_children=[acknowledgments,howtocite]
post_accordion = widgets.Accordion(layout=widgets.Layout())
#print(post_accordion.layout.keys)
post_accordion.children = post_children
post_accordion.set_title(0, 'Acknowledgments')
post_accordion.set_title(1, 'How to cite')
post_accordion.selected_index = None
post_accordion

Accordion(children=(VBox(children=(HTML(value='<ul>\n<li>Funding from the NCCR MARVEL funded by the SNSF.</li>…

See the project on [GitHub](https://github.com/lcmd-epfl/cell2mol)

cell2mol, Copyright © 2022 LCMD-EPFL