In [1]:
import numpy as np
import pandas as pd
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual
from ase.visualize import view
from ase.visualize.ngl import view_ngl
from ase.io import read, write
from ase.geometry.analysis import Analysis
from pysmiles import write_smiles
from rdkit import Chem
from tqdm import tqdm
from rdkit.Chem import Draw
%matplotlib inline

In [2]:
with open("data/all_relaxed_molecules_tagged.xyz", "r") as f:
    lines = f.readlines()

In [3]:
blocks = []
block = lines[0]
i = 1
while i < len(lines):
    if lines[i][0].isdecimal():
        blocks.append(block)
        block = ""
    block = block + lines[i]
    i += 1

In [4]:
ase_mols = read("data/all_relaxed_molecules.xyz", index=":")

In [5]:
class Molecule:
    def __init__(self, str):
        self.lines = str.split("\n")
        lines = self.lines
        self.num_atoms = lines[0]
        self.lattice = tuple(lines[1][0:lines[1].index(" Properties")].split('"')[1].split())
        props = lines[1][lines[1].index("Properties")+11:-1]
        self.dft_energy_ryd = props[props.index("dft_energy_ryd=")+16:props.index(" molecule_idx")]
        self.molecule_idx = props[props.index("molecule_idx")+13:props.index(" crystal_idx")]
        self.crystal_idx = props[props.index("crystal_idx")+12:props.index(" motif_idx")]
        m_idx = props[props.index("motif_idx")+17:props.index("motif_names")]
        m_idx = m_idx[2:].split("[")
        l = []
        for el in m_idx:
            sub = [int(c) for c in el if c.isdecimal()]
            l.append(sub)
        self.motif_idx = l
        m_names = props[props.index("motif_names")+19:props.index(" pbc")]
        m_names = m_names.split()
        self.motif_names = [name.strip("\\[,\"]") for name in m_names]
        self.pbc = lines[1][lines[1].index("pbc=")+5:].rstrip('"')
        self.atoms = lines[2:]
        xyz = ""
        for line in lines[2:]:
            xyz += line + "\n"
        self.xyz = f"{self.num_atoms}\n" + xyz[:-1]

In [6]:
mols = [Molecule(block) for block in blocks]
mols = list(zip(mols, ase_mols))

In [7]:
all_motifs = []
for (mol, ase_mol) in mols:
    all_motifs += mol.motif_names

all_motifs = np.unique(all_motifs)

In [8]:
AROMATIC_MOTIFS = ['Aryl_NH', 'benzene', 'diazine', 'furan', 'imidazole', 'isoxazole', 
                   'oxidiazole', 'pentazole', 'pyrazole', 'pyridine', 'pyrrole', 'tetrazine', 
                   'tetrazole', 'thiazole', 'thiophene', 'triazine', 'triazole']

UNKNOWN_MOTIFS = ['C=O_noCOO', 'quatN', 'sulfide_chain', 'tetrazole', 'thiocarboxamide', 'thiodiazole']

In [9]:
# list of pairs of the form (molecule, num of aromatic motifs)
mol_aromas = []
for (mol, ase_mol) in mols:
    i = 0
    for motif in np.unique(mol.motif_names):
        if motif in AROMATIC_MOTIFS:
            i += 1
    mol_aromas.append(((mol, ase_mol), i))

# aromatic molecules
armols = [(mol, ase_mol) for ((mol, ase_mol), aroma) in mol_aromas if aroma > 0]
# non-aromatic molecules
non_armols = [(mol, ase_mol) for ((mol, ase_mol), aroma) in mol_aromas if aroma == 0]

In [23]:
out = widgets.Output()

def picture_mols(x):
    out.clear_output()
    display(out)
    v = view(armols[x][1], viewer="ngl")
    v.control_box.children[2].value = 0.8
    return v

ibox = widgets.BoundedIntText(min=0, max=1487)
interact(picture_mols, x=ibox)

interactive(children=(BoundedIntText(value=0, description='x', max=1487), Output()), _dom_classes=('widget-int…

<function __main__.picture_mols(x)>