In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import itertools
import copy
import os

# Cheminformatics stack
import rdkit
from rdkit import Chem
from rdkit.Chem import rdDistGeom
from rdkit.Chem import AllChem
from rdkit.Chem import FragmentMatcher
from rdkit.Chem import rdFMCS
from glob import glob

# For segfault
import faulthandler
import signal
from subprocess import Popen, PIPE
from rdkit.Chem import rdMMPA

In [5]:
import stk


def build_molecule_from_frag(
    right_block_smiles, photochromic_block_smiles, catalytic_block_smiles,
    inhibitor_block
):
        """
        This function takes the 3 fragments and builds a molecule using the STK library.
        Parameters:
        right_block: RDKit molecule, right block
        left_block: RDKit molecule, left block
        linker: RDKit molecule, linker

        """
        right_block_smiles = right_block_smiles.replace("[*:2]", "Br")
        try:
                bb_right = stk.BuildingBlock(
                        smiles=right_block_smiles, functional_groups=[stk.BromoFactory()]
                )
        except:
                print("Error with right block")
                print(right_block_smiles)
                return None
        try:
                bb_photo = stk.BuildingBlock(
                        smiles=photochromic_block_smiles, functional_groups=[stk.BromoFactory()]
                )
        except:
                print("Error with photo block")
                print(photochromic_block_smiles)
                return None
        try:
                catalytic_block_smiles = catalytic_block_smiles.replace("[*:1]", "Br")
                catalytic_block_smiles = catalytic_block_smiles.replace("[*:2]", "Br")
                bb_linker = stk.BuildingBlock(
                smiles=catalytic_block_smiles, functional_groups=[stk.BromoFactory()]
                )
        except:
                print("Error with linker block")
                print(catalytic_block_smiles)
                return None
        try: 
                inhibitor_block = inhibitor_block.replace("[*:2]", "Br")
                bb_inhibitor = stk.BuildingBlock(
                        smiles=inhibitor_block, functional_groups=[stk.BromoFactory()]
                )
        except:
                print("Error with inhibitor block")
                print(inhibitor_block)
                return None
        
        try:
                constructed_molecule = stk.ConstructedMolecule(
                        stk.polymer.Linear(
                        building_blocks=[bb_inhibitor,bb_photo, bb_linker, bb_right],
                        repeating_unit="ABCD",
                        num_repeating_units=1,
                        optimizer=stk.MCHammer(),
                        )
                )
        except:
                print("Error with constructed molecule")
                return None
        return constructed_molecule

In [6]:
df_catalysts_right_block = pd.read_csv('data/catalyst_right_blocks.csv')
df_catalysts_right_block['catalytic_block'] = df_catalysts_right_block['linkers_smiles'].apply(Chem.MolFromSmiles)
df_catalysts_right_block['right_block'] = df_catalysts_right_block['smiles'].apply(Chem.MolFromSmiles)
df_catalysts_right_block['catalytic_block_smiles'] = df_catalysts_right_block['linkers_smiles']
df_catalysts_right_block['right_block_smiles'] = df_catalysts_right_block['smiles']
df_catalysts_right_block['number_of_atoms_right_block'] = df_catalysts_right_block['right_block'].apply(lambda x: x.GetNumAtoms())
df_catalysts_right_block['number_of_atoms_catalytic_block'] = df_catalysts_right_block['catalytic_block'].apply(lambda x: x.GetNumAtoms())
df_catalysts_right_block_unique = df_catalysts_right_block.drop_duplicates(subset=['smiles'])
df_linkers = df_catalysts_right_block[['linkers_smiles','catalytic_block']]
df_linkers['smiles'] = df_linkers['linkers_smiles'] 
df_catalysts_linker_unique = df_linkers.drop_duplicates(subset=['smiles'])
df_catalysts_right_block_unique = df_catalysts_right_block_unique[['smiles']]
df_catalysts_right_block_unique.to_csv('data/catalyst_right_blocks_unique.csv', index=False)
df_catalysts_linker_unique.to_csv('data/catalyst_linkers_unique.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_linkers['smiles'] = df_linkers['linkers_smiles']


In [7]:
df_catalysts_right_block_unique

Unnamed: 0,smiles
0,C#CC[*:2]
2,C#C[*:2]
3,c1ccc([*:2])cc1
5,C[Si](C)(C)[*:2]
11,COC(=O)[C@H](C)[*:2]
...,...
10876,c1ccc2c(c1)cc([*:2])c1ccccc12
10896,C(=Nc1ccccc1[*:2])c1c2ccccc2cc2ccccc12
10899,Clc1ccc(C=Nc2ccccc2[*:2])cc1
10901,S=C(Nc1ccccc1)Nc1ccccc1[*:2]


In [8]:
df_linker_filtered = pd.read_csv('data/2_murcko_azo_photo_cores_Br_filtered.txt',delimiter='\t')
df_linker_filtered = df_linker_filtered.drop_duplicates()
df_linker_filtered['photochromic_block'] = df_linker_filtered['smiles'].apply(Chem.MolFromSmiles)
df_linker_filtered['photochromic_block_smiles'] = df_linker_filtered['smiles']
df_linker_filtered.to_csv('data/2_murcko_azo_photo_cores_Br_filtered.csv', index=False)
print('number of photochromic blocks:', len(df_linker_filtered))
print('number of block on the right of the catalyst:', len(set(df_catalysts_right_block_unique['smiles'])))
print('number of catalysts units:', len(set(df_catalysts_linker_unique['smiles'])))
print(f"size of the dataset: {len(df_linker_filtered)*len(set(df_catalysts_right_block_unique['smiles']))*len(set(df_catalysts_linker_unique['smiles'])):.2e}")

number of photochromic blocks: 411
number of block on the right of the catalyst: 1045
number of catalysts units: 7980
size of the dataset: 3.43e+09


In [9]:
df_inhibitor = pd.read_csv('data/inhibitor_right_blocks.csv')
df_inhibitor['inhibitor_block'] = df_inhibitor['smiles'].apply(Chem.MolFromSmiles)
df_inhibitor['inhibitor_block_smiles'] = df_inhibitor['smiles']
print('number of inhibitors:', len(df_inhibitor))



number of inhibitors: 21


In [10]:
constructed_molecule_list = []
number_of_molecules = 100
rb_list, pb_list, cb_list,ib_list= [], [], [], []
for i in range(number_of_molecules):
    rb = df_catalysts_right_block.sample(3,random_state= i)['right_block_smiles'].values[0]
    pb = df_linker_filtered.sample(3,random_state= i)['photochromic_block_smiles'].values[0]
    cb = df_catalysts_right_block.sample(3,random_state= i)['catalytic_block_smiles'].values[0]
    ib = df_inhibitor.sample(3,random_state= i)['inhibitor_block_smiles'].values[0]
    constructed_molecule = build_molecule_from_frag(rb, pb, cb,ib)
    if constructed_molecule is not None:
        rb_list.append(rb)
        pb_list.append(pb)
        cb_list.append(cb)
        ib_list.append(ib)
        constructed_molecule_list.append(constructed_molecule.to_rdkit_mol())
print('number of constructed molecules:', len(constructed_molecule_list))

  return np.divide(vector, np.linalg.norm(vector))


Error with constructed molecule
Error with constructed molecule
Error with constructed molecule
Error with constructed molecule
Error with constructed molecule
Error with constructed molecule
Error with constructed molecule
Error with constructed molecule
Error with constructed molecule
Error with linker block
COc1ccc2nccc([C@@H](NC(=S)Nc3cc(C(F)(F)F)cc(C(F)(F)Br)c3)[C@H]3C[C@H]4CC[N@@]3C[C@H]4Br)c2c1
Error with linker block
C=C[C@H]1C[N@@]2CC[C@H]1C[C@@H]2[C@H](NC(=S)Nc1cc(C(F)(F)Br)cc(Br)c1)c1ccnc2ccc(OC)cc12
Error with constructed molecule
Error with constructed molecule
Error with constructed molecule
Error with constructed molecule
Error with linker block
C=C[C@H]1C[N@@]2CC[C@H]1C[C@H]2[C@@H](NC(=O)NS(=O)(=O)Br)Br
Error with constructed molecule
Error with constructed molecule
Error with constructed molecule
number of constructed molecules: 81


In [11]:
constructed_molecule_list_smiles = [Chem.MolToSmiles(x) for x in constructed_molecule_list]
pd_constructed_molecules = pd.DataFrame((constructed_molecule_list_smiles,rb_list,pb_list,cb_list,ib_list)).T
pd_constructed_molecules.columns=['smiles','right_block','photo_block','catalytic_block','inhibitor_block']
pd_constructed_molecules.head()
pd_constructed_molecules.to_csv('constructed_organo_photocat.csv',index=False)