# HIPPO for 2A Protease (2/?)

## Imports

In [1]:
%load_ext autoreload
# %pprint

In [2]:
%autoreload 2
from pathlib import Path
import molparse as mp
import hippo2 as hippo
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
from pprint import pprint
import json
from rdkit import Chem

## Load the project

In [3]:
animal = hippo.HIPPO.from_pickle('pickles/2A_hits_v2p1.pickle')
# animal = hippo.HIPPO.from_pickle('pickles/2A_comps_restart.pickle')

[36mpath[0m = [38;5;11mpickles/2A_hits_v2p1.pickle[0m[95m [0m[0m


In [4]:
animal.summary()

[1mHIPPO(hippo2_test)[0m[0m                              
[36mtarget_name[0m = A71EV2A[0m[95m [0m[0m            
[36mmax_lead_time[0m = 20[0m[95m workdays[0m[0m       
[36mmax_bb_price[0m = 100[0m[95m $[0m[0m              
[36mmin_bb_quantity[0m = 20[0m[95m mg[0m[0m           
[36m#compound_sets[0m = 1[0m[95m [0m[0m               
[36m#compounds[0m = 38[0m[95m [0m[0m                  
[36m#poses[0m = 49[0m[95m [0m[0m                      
[36m#tags[0m = 4[0m[95m [0m[0m                        
[0m                                                        
[4mcompound sets:[0m                                      
CompoundSet("hits", #compounds=38, #poses=49)[0m           
[0m                                                        
[4mtags:[0m                                               
hits #compounds=37, #poses=49[0m                           
Catalytic Site #compounds=0, #poses=7[0m                   
P1 #compounds=0, #poses=

## Load Syndirella outputs

In [5]:
syndirella_root = Path('/data/xchem-fragalysis/kfieseler/A71EV2A')

In [11]:
test = 28
animal.add_elabs(
    syndirella_root, 
    test=test, 
    reference_hit='x0310_0A', 
    overwrite=True, 
    pickle_dump=f'pickles/fstein_json_test.pickle', 
    restart_j=0,
    debug=False,
    rmsd_cut=2,
    elabs_csv_pattern='elabs/*/*/steph*/*.csv',
)

[36mroot_path[0m = /data/xchem-fragalysis/kfieseler/A71EV2A[0m[95m [0m[0m
[36mtags[0m = ['Syndirella'][0m[95m [0m[0m            
[1mSyndirella Synthetic Routes CSV[0m[0m                 
[36mroutes_csv_pattern[0m = routes_data/*.csv[0m[95m [0m[0m
[36mpaths[0m[#=0] = [][0m[95m [0m[0m                  
[1mSyndirella Elaborations CSVs[0m[0m                    
[36melabs_csv_pattern[0m = elabs/*/*/steph*/*.csv[0m[95m [0m[0m
[36mminimised_mol_suffix[0m = .minimised.mol[0m[95m [0m[0m
[36melabs_skip_prefix[0m[#=2] = [., ~$][0m[95m [0m[0m 
[36melabs_skip_substr[0m[#=1] = [_batch_][0m[95m [0m[0m
[36melabs_skip_exact[0m[#=1] = [output.csv][0m[95m [0m[0m
[36m#elab CSVs[0m = 27[0m[95m [0m[0m                  
[1m[34m>>> [1mj=0/27[0m[0m                             
[36mcsv[0m = [38;5;11m/data/xchem-fragalysis/kfieseler/A71EV2A/elabs/1_step_dec7_batched/batch_1/steph_fragnetv2_Z1864315293/steph_fragnetv2_Z1864315293_1_of_1_ste

KeyboardInterrupt: 

## Run add_elabs as a shell script

In [6]:
!./comps_shell.py -i 'pickles/2A_hits_v2p1.pickle' -o 'pickles/2A_comps_shell_allfragnet_2.pickle' -s '/data/xchem-fragalysis/kfieseler/A71EV2A' -f 'steph' -c 2

ERROR; return code from pthread_create() is 11
	Error detail: Resource temporarily unavailable


In [7]:
# !./comps_shell.py -i 'pickles/hippo2_test_comps_shell_restart.pickle' -o 'pickles/2A_comps_shell_allfragnet.pickle' -s '/data/xchem-fragalysis/kfieseler/A71EV2A' -f 'steph' -r 19

In [8]:
animal = hippo.HIPPO.from_pickle('pickles/2A_comps_shell_allfragnet_2.pickle')

[36mpath[0m = [38;5;11mpickles/2A_comps_shell_allfragnet_2.pickle[0m[95m [0m[0m


FileNotFoundError: [Errno 2] No such file or directory: 'pickles/2A_comps_shell_allfragnet_2.pickle'

In [None]:
print(animal.num_compounds)
animal.compounds[-1]

In [None]:
from hippo2.cset import CompoundSet

print('getting bases')
if 'bases' in animal.compound_sets:
    animal.compound_sets['bases'] = animal.get_compounds('base')
else:
    bases = CompoundSet('bases', animal.get_compounds('base'))
    animal.compound_sets.append(bases)
print(animal.bases)

print('getting elabs')
if 'elabs' in animal.compound_sets:
    animal.compound_sets['elabs'] = animal.get_compounds('elab')
else:
    elabs = CompoundSet('elabs', animal.get_compounds('elab'))
    animal.compound_sets.append(elabs)
print(animal.elabs)

animal._update_bb_amounts(debug=True)

animal.write_pickle('pickles/2A_HIPPO_comps_allfragnet_2.pickle')

In [None]:
assert animal.num_compounds == len(animal.get_compounds('Syndirella')) + len(animal.hits)
print(animal.num_compounds)
print(animal.elabs)

## Visualisation

In [None]:
# animal.plot_tag_statistics(log_y=True)

In [None]:
animal.plot_synthetic_routes(html='graphs/elabs_routes_allfragnet', png='graphs/elabs_routes_allfragnet')

In [None]:
animal.plot_reactant_amounts()

In [None]:
#animal.plot_building_blocks(html='graphs/elabs_bbs', png='graphs/elabs_bbs')

In [None]:
animal.plot_numbers()

## Random

In [None]:
raise Exception

In [None]:
df = pd.read_csv('/data/xchem-fragalysis/kfieseler/A71EV2A/elabs/1_step_dec6_batched/batch_1/matteo_ferla_rocs_cov_Z2737383535/matteo_ferla_rocs_cov_Z2737383535_1_of_1_step_10080.csv')

In [None]:
df = pd.read_csv('/data/xchem-fragalysis/kfieseler/A71EV2A/elabs/1_step_dec7_batched/batch_2/steph_fragnetv2_PV-002862252644/steph_fragnetv2_PV-002862252644_1_of_1_step_26729.csv')

In [None]:
pprint(df[df['smi_reactant2'] == 'Cc1cccc2c1OC(C)(C)CNC2']['metadata_reactant2'].values[0])

In [None]:
metadata_reactant1 = eval(df['metadata_reactant1'].values[0])

In [None]:
metadata_reactant1

In [None]:
print(json.dumps(df['metadata_reactant1'].values[0], indent=2))

In [None]:
df[['name','smiles', 'smi_reactant1', 'smi_reactant2']].loc[20]

In [None]:
df[['base_name']].values[0]

In [None]:
sub.to_csv('test.csv')

In [None]:
df['name'].values

In [None]:
animal.get_compounds('Syndirella')

In [None]:
animal.compounds

In [None]:
elab = animal.elabs[0]
elab.summary()

In [None]:
elab.mol

In [None]:
for elab in animal.elabs:
    if elab.num_poses > 1:
        for p in elab.poses:
            p.summary()
        break

In [None]:
p.longname

In [None]:
mp.rdkit.draw_mols([p.mol for p in elab.poses])

In [None]:
print(animal.elabs)

In [None]:
# for i,c in enumerate(animal.elabs):
    # print(i, c.name, animal.elabs.names.count(c.name), animal.elabs.smiles.count(c.smiles))
