In [1]:
%load_ext autoreload
%autoreload 2
import hippo
from pathlib import Path
from mlog import setup_logger
import pandas as pd
import numpy as np
import molparse as mp
from tqdm import tqdm
# from hippo_plot import write_html
# logger=setup_logger('notebook')
import mrich
import plotly.express as px
from hippo.plotting import plot_interaction_punchcard_by_tags

In [2]:
animal = hippo.HIPPO('CHIKV_prod10c', 'CHIKV_prod10c.sqlite', update_legacy=True)

In [3]:
scaffolds = animal.compounds(tag="Syndirella base")
elabs = scaffolds.elabs

In [None]:
fig = plot_interaction_punchcard_by_tags(animal, tags={
    # "hits":"hits",
    # "inspirations": "Syndirella scaffold inspirations",
    "scaffolds": "Syndirella base",
    "elaborations": "Syndirella elab"
})
mp.write("CHIKV_interaction_punchcard_3.html",fig)
mp.write("CHIKV_interaction_punchcard_3.pdf",fig, width=1200, height=600)
fig

In [4]:
%%time
d = animal.db.get_scaffold_similarity_dict(scaffolds=scaffolds)
len(d)

Output()

CPU times: user 1.14 s, sys: 624 ms, total: 1.77 s
Wall time: 6.12 s


76720

In [5]:
df = pd.DataFrame(d)
df['base'] = df['base_id'].apply(lambda x: str(animal.compounds[x]))
df['base_id'] = df['base_id'].astype(str)
df = df.sort_values(by="base")
df.head()

Unnamed: 0,base_id,superstructure_id,similarity,base
2291,100,67308,0.787018,C100
1876,100,66892,0.845316,C100
1877,100,66893,0.803313,C100
1878,100,66894,0.849015,C100
1879,100,66895,0.869955,C100


In [None]:
fig = px.violin(df, x="base", y="similarity", points=False, box=True)
fig.update_layout(yaxis_range=[0,1.1])
fig.update_layout(yaxis_title="Tanimoto Pattern Fingerprint Similarity")
fig.update_layout(xaxis_title="Scaffold")
mp.write("scaffold_sim_violin.html", fig)
mp.write("scaffold_sim_violin.pdf", fig, width=1200, height=800)
fig

In [None]:
fig = px.histogram(df, x="similarity")
fig.update_layout(xaxis_title="Tanimoto Pattern Fingerprint Similarity")
mp.write("scaffold_sim_histogram.html", fig)
mp.write("scaffold_sim_histogram.pdf", fig, width=1200, height=800)
fig

In [None]:
%%time
fig = (scaffolds + elabs).plot_tsnee(
    opacity=1.0, 
    title="Elaboration clusters", 
    legend=True, color="type", symbol=None, logo=False)
mp.write("tsnee.html", fig)
mp.write("tsnee.pdf", fig, width=1200, height=800)
fig

In [None]:
lt65 = animal.compounds[set(df[df["similarity"] <= 0.65]["superstructure_id"].values)]
lt65.write_smiles_csv("elabs_lt65_similarity.csv", tags=False)
lt65

In [None]:
gt65 = animal.compounds[set(df[df["similarity"] > 0.65]["superstructure_id"].values)]
gt65.write_smiles_csv("elabs_gt65_similarity.csv", tags=False)
gt65

In [6]:
scaffold_poses = scaffolds.poses
scaffold_interactions = scaffold_poses.interactions

In [None]:
elab_poses = elabs.poses
elab_interactions = elab_poses.interactions

In [None]:
scaffold_tuples = set(tuple(x) for x in scaffold_interactions.df[["type", "residue_name", "residue_number"]].values)
scaffold_tuples

In [None]:
pose_ids = set()
for i in mrich.track(elab_interactions):
    key = (i.type,i.residue_name,i.residue_number)
    if key not in scaffold_tuples:
        pose_ids.add(i.pose_id)

In [None]:
new_interaction_poses = animal.poses[pose_ids]
new_interaction_comps = new_interaction_poses.compounds
print(new_interaction_poses, new_interaction_comps)
new_interaction_comps.write_smiles_csv("elabs_new_interactions.csv", tags=False)
(elabs - new_interaction_comps).write_smiles_csv("elabs_no_new_interactions.csv", tags=False)

In [None]:
elab_recipe = hippo.Recipe.from_compounds(elabs)
elab_recipe.write_json("elab_recipe.json")
elab_recipe

Output()

In [None]:
elab_recipe.write_CAR_csv("elab_recipe.csv")