In [1]:
# Convert ncaa to "dipeptide", with name (ncaa_entry) and smiles as input.

# an example
[ncaa_entry, smiles] = ["BP5", "c1ccnc(c1)c2ccc(cn2)C[C@@H](C(=O)O)N"] 
ncaa_info = []
ncaa_info.append([ncaa_entry, smiles])

In [3]:
# add NME and ACE
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem import AllChem

modified_ncaa_mols = []

dipep_bb_mol = Chem.MolFromSmarts("CC(=O)NCC(=O)NC")
nme_mol = Chem.MolFromSmiles("C(=O)NC")
ace_mol = Chem.MolFromSmiles("NC(=O)C")
amino_mol = Chem.MolFromSmarts("N")
acid_mol = Chem.MolFromSmarts("C(=O)O")

for ncaa_entry, smiles in ncaa_info:
    ncaa_mol = Chem.MolFromSmiles(smiles)
    add_nme_mols = AllChem.ReplaceSubstructs(ncaa_mol, acid_mol, nme_mol, useChirality=True)
    for mol_1 in add_nme_mols:
        add_ace_mols = AllChem.ReplaceSubstructs(mol_1, amino_mol, ace_mol, useChirality=True)  
        for mol_2 in add_ace_mols:
            if mol_2.HasSubstructMatch(dipep_bb_mol):
                Chem.SanitizeMol(mol_2)
                modified_ncaa_mols.append([mol_2, ncaa_entry])

### check structure, because this script is not compatible with a few cases ###
# image = Draw.MolsToGridImage([x[0] for x in modified_ncaa_mols], molsPerRow=3, subImgSize=(300,300), legends=[x[1] for x in modified_ncaa_mols], returnPNG=False, maxMols=100)
# image

In [4]:
# 2D to 3D
mol_path = "../input/mol/"
for mol, ncaa_entry in modified_ncaa_mols:
    mol = Chem.AddHs(mol)
    AllChem.EmbedMolecule(mol)
    AllChem.EmbedMultipleConfs(mol, numConfs=50)
    AllChem.MMFFOptimizeMoleculeConfs(mol, numThreads=0)

    mol.SetProp("_Name", ncaa_entry)
    ncaa_file = mol_path + ncaa_entry + ".mol"
    print(Chem.MolToMolBlock(mol), file=open(ncaa_file, 'w+'))