```bash
python -m pip install git+https://github.com/stephwills/fragment_network_merges.git
python -m pip install joblib neo4j im-data-manager-job-utilities pebble
sudo apt-get install ffmpeg libsm6 libxext6  -y
export KUBECONFIG=$HOME/config-fragnet
export NEO4J_USER=matteo
export NEO4J_PASS=👾👾👾
export USE_NEO4J_INSTEAD_API=true
kubectl port-forward -n graph-b graph-0 7474:7474 &
kubectl port-forward -n graph-b graph-0 7687:7687 &
```

In [6]:
import os

os.environ['KUBECONFIG']=os.environ['HOME'] + '/config-fragnet'
os.environ['NEO4J_USER']='matteo'
os.environ['NEO4J_PASS']='👾👾👾'
os.environ['USE_NEO4J_INSTEAD_API']='true'

os.environ['FRAGALYSIS_DATA_DIR'] = os.path.join(os.getcwd(), 'fauxalysis')

In [5]:
## Import hits

from rdkit import Chem

with Chem.SDMolSupplier('filtered_hits.sdf') as sds:
    hits = list(sds)
    
hit_names = [hit.GetProp('_Name') for hit in hits]
hit_smileses = [Chem.MolToSmiles(hit) for hit in hits]
hitdex = dict(zip(hit_names, hits))

In [7]:
## Fauxalysis
from typing import List

def make_fauxalysis(hits: List[Chem.Mol], target_name: str, base_folder='.') -> None:
    if not os.path.exists(base_folder):
        os.mkdir(base_folder)
    os.environ['FRAGALYSIS_DATA_DIR'] = base_folder
    for hit in hits:
        hit_name: str = hit.GetProp('_Name')
        hit_path = os.path.join(base_folder, f'{target_name}', 'aligned',f'{target_name}-{hit_name}')
        os.makedirs(hit_path, exist_ok=True)
        Chem.MolToMolFile(hit, os.path.join(hit_path, f'{target_name}-{hit_name}.mol'))
           
target_name = 'EV-D68-protease'
make_fauxalysis(hits, target_name, os.path.join(os.getcwd(), 'fauxalysis'))

In [None]:
## Search

from merge import query
from merge.find_merges import getFragmentNetworkSearcher
from merge.find_merges_generic import MergerFinder_generic  # solely for typehinting

searcher: MergerFinder_generic = getFragmentNetworkSearcher()

valid_smileses, valid_names = searcher.filter_for_nodes(hit_smileses, hit_names)
smiles_pairs, name_pairs = searcher.get_combinations(valid_smileses, valid_names)
all_mergers: List[Dict] = []
for smiles_pair, name_pair in zip(smiles_pairs, name_pairs):
    mergers: Dict[str, List[str]] = searcher.get_expansions(smiles_pair, name_pair, target_name, 'output')
    all_mergers.append(dict(mergers=mergers, smiles_pair=smiles_pair, name_pair=name_pair))

with gzip.open('fragnet_pre.pkl.gz', 'wb') as fh:
    pickle.dump(all_mergers, fh)
    
print(len(all_mergers),\
sum([len(m['mergers']) for m in all_mergers]), \
sum([len(mm) for m in all_mergers for mm in m['mergers'].values()]))

2 fragments removed from list. 21 fragments remaining.
Expanding fragment A: x1498L0AP1 with synthons of fragment B: x1498L0AP2
Found 2 synthons
1 synthons remaining after filtering
Running synthon 0


In [None]:
## Parse synthons

import operator
import pandas as pd
from fragmenstein import Victor, Laboratory, Igor

dfs = [ pd.DataFrame([{'smiles': synthon.replace('Xe', 'H'),
                       'original_name': f'{merge_info["name_pair"][1]}-synthon{i}',
                       'xenonic': synthon,
                       'parent': merge_info['name_pair'][1],
                       'hits': [hitdex[merge_info['name_pair'][1]]]} for i, synthon in enumerate(merge_info['mergers'].keys())])
       for merge_info in all_mergers
      ]

synthons = pd.concat(dfs, axis='index')

# fix duplicated
synthons['inchi'] = synthons.smiles.apply(Chem.MolFromSmiles).apply(Chem.RemoveAllHs).apply(Chem.MolToInchiKey)    
synthons = synthons.drop_duplicates(['parent', 'inchi'])
synthons['name'] = synthons.parent +'S'+ (synthons.groupby(['parent']).cumcount()+1).astype(str)
Igor.init_pyrosetta()
placed_synthons = Laboratory(pdbblock=pdb_block, covalent_resi=None).place(synthons, n_cores=55)

def fix_name(row):
    # error... min_mol has it. not unmin.
    mol = Chem.Mol(row.unmin_binary)
    mol.SetProp('_Name', row['name'])
    return mol
    
synthons['∆∆G'] = placed_synthons['∆∆G']
synthons['unmin_mol'] = placed_synthons.apply(fix_name, axis=1)
from rdkit.Chem import PandasTools
PandasTools.WriteSDF(df=synthons,
                     out='fragnet-synthons.sdf',
                     molColName='unmin_mol', 
                     idName='name',
                     properties=['parent', '∆∆G'])

In [None]:
print('DONE')

In [None]:
# fix names of synthons in combination and make it a long table
data = []
combodex: dict
for combodex in all_mergers:
    # 'mergers', 'smiles_pair', 'name_pair'
    first_name, second_name = combodex['name_pair']
    first: Chem.Mol = hitdex[first_name]
    for synthon_smiles in combodex['mergers']:
        clean_smiles = synthon_smiles.replace('Xe', 'H')
        inchi = Chem.MolToInchiKey( Chem.RemoveAllHs( Chem.MolFromSmiles(clean_smiles) ) )
        matched = placed_synthons.loc[(placed_synthons['parent'] == second_name) & (placed_synthons.inchi == inchi)]
        if len(matched) == 0:
            print(first_name, second_name, synthon_smiles, 'missing!')
            # Z2111637360
            second = hitdex[second_name]
            synthon_name = second_name+'X'
        elif matched.iloc[0]['∆∆G'] > -1.:
            # skip crap floater fragments
            continue
        else:
            second = matched.iloc[0].unmin_mol
            synthon_name = matched.iloc[0]['name']
        for i, smiles in enumerate(combodex['mergers'][synthon_smiles]):
            name = f'{first_name}-{synthon_name}-{i}'
            data.append(dict(name=name, hits=[first, second], 
                             primary_name=first_name, secondary_parent=second_name, secondary_name=synthon_name,
                             smiles=smiles.replace('Xe', 'H')))
tabular_combinations = pd.DataFrame(data)

In [None]:
## Place enumerations
    
lab = Laboratory(pdb_block, None)
Victor.monster_throw_on_discard = True
placed = lab.place(tabular_combinations, n_cores=55, expand_isomers=True)
with gzip.open('fragnet.placed.pkl.gz', 'wb') as fh:
  placed.to_pickle(fh)
placed.sort_values('∆∆G', ascending=True)