In [1]:
from utils import get_smirks, is_center_bond_single, smiles_to_image_grid_mod, get_matching_substituents

In [2]:
# 1. find uncovered torsions
import pickle
round1 = pickle.load(open('tid_clusters_list.p','rb'))
from openff.toolkit.typing.engines.smirnoff import ForceField
forcefield = ForceField('result.offxml',allow_cosmetic_attributes=True)
ff_torsion_param_list = forcefield.get_parameter_handler('ProperTorsions').parameters

uncovered = []
poorly_covered = []
for tid, clusters in round1.items():
    if len(clusters) == 0:
        uncovered.append(tid)
    elif len(clusters) <3 :
        poorly_covered.append(tid)

print(f'# uncovered: {len(uncovered)}, # poorly covered: {len(poorly_covered)}')

single_uncovered = []
for tid in uncovered: 
    smirks = get_smirks(ff_torsion_param_list, tid)
    if is_center_bond_single(smirks):
        single_uncovered.append(tid)
print(f'# single uncovered: {len(single_uncovered)}')

for count, uncovered_tid in enumerate(single_uncovered):
    smirks = get_smirks(ff_torsion_param_list, uncovered_tid)
    print(f' {count}: {uncovered_tid}, {smirks}')

pickle.dump(single_uncovered, open('single_uncovered.p','wb'))

# uncovered: 46, # poorly covered: 4
# single uncovered: 20
 0: t8, [#35:1]-[#6X4:2]-[#6X4:3]-[#35:4]
 1: t12, [#1:1]-[#6X4:2]-[#6X4:3]-[#35:4]
 2: t30, [#6X3:1]-[#6X4;r3:2]-[#6X3:3]-[#7X3:4]
 3: t31, [#6X3:1]-[#6X4;r3:2]-[#6X3:3]=[#8X1:4]
 4: t32, [#6X3:1]-[#6X4;r3:2]-[#6X3:3]~[#6X3:4]
 5: t33, [#7X3:1]-[#6X4;r3:2]-[#6X3:3]~[#6X3:4]
 6: t36, [#6X4;r3:1]-;@[#6X4;r3:2]-[#6X3;r5:3]-;@[#6X3;r5:4]
 7: t50, [*:1]-[#6X4:2]-[#7X4:3]-[*:4]
 8: t51b, [*:1]-[#6X4:2]-[#7X3:3]-[#7X2:4]=[#7X2,#8X1]
 9: t51bh, [#1:1]-[#6X4:2]-[#7X3:3]-[#7X2:4]=[#7X2,#8X1]
 10: t51c, [*:1]-[#6X4:2]-[#7X3$(*@1-[*]=,:[*][*]=,:[*]@1):3]-[*:4]
 11: t51ch, [#1:1]-[#6X4:2]-[#7X3$(*@1-[*]=,:[*][*]=,:[*]@1):3]-[*:4]
 12: t58, [*:1]-[#7X4:2]-[#6X3:3]~[*:4]
 13: t68, [*:1]~[#7X3,#7X2-1:2]-[#6X3:3]~[*:4]
 14: t104, [*:1]=[#8X2+1:2]-[#6:3]~[*:4]
 15: t117, [*:1]-[#8:2]-[#8H1:3]-[*:4]
 16: t136, [#6X3:1]-[#16X4,#16X3+0:2]-[#7X4,#7X3:3]-[#1:4]
 17: t138, [#6X3:1]-[#16X4,#16X3+0:2]-[#7X4,#7X3:3]-[#6X4:4]
 18: t141, [#6X3:1]-[#16X4,

In [3]:
# 2. Load the substituent list 
substituents_filtered=pickle.load(open('substituents_filtered.p','rb'))

In [4]:
# 3. Load additional substituent list (hand-picked)
with open('supplemental_substituents.smi','r') as file:
    supplemental_substituents = [
        line for line in file.read().split("\n") if len(line) > 0
    ]
smiles_to_image_grid_mod(supplemental_substituents, output_path='supplemental_substituents.pdf')

In [5]:
# 4. combine the lists
substituents_filtered_new = set(substituents_filtered)
substituents_filtered_new.update(supplemental_substituents)

print(f'# substituents: {len(substituents_filtered)}')
print(f'# additional substituents: {len(supplemental_substituents)}')
print(f'# tot: {len(substituents_filtered_new)}')

# substituents: 292
# additional substituents: 17
# tot: 309


In [6]:
# 5. gen dict[tid]={frag1:[sub1, sub2, ...], frag2: [suba, subb, ...]}
selected_substituents_tot = dict()
for uncovered_tid in single_uncovered:
    smirks = get_smirks(ff_torsion_param_list, uncovered_tid)
    selected_substituents = get_matching_substituents(smirks, substituents_filtered_new)
    selected_substituents_tot[uncovered_tid] = selected_substituents

In [7]:
for tid, frags in selected_substituents_tot.items():
    for frag, sublist in frags.items():
        if len(sublist) == 0: 
            print(tid, frag)

In [8]:
import pickle
pickle.dump(selected_substituents_tot, open('selected_substituent_dict.p','wb'))