# Plot

> A collection of plot functions

## Setup

In [None]:
#| default_exp plot

In [None]:
#| export
import pandas as pd, seaborn as sns
from fastcore.meta import delegates

#for embeddings
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from umap.umap_ import UMAP

# kdock
from kdock.core import *

# for visualization
import py3Dmol

## 2d plot

In [None]:
#| export
def set_sns():
    sns.set(rc={"figure.dpi":300, 'savefig.dpi':300})
    sns.set_context('notebook')
    sns.set_style("ticks")

In [None]:
#| export
def reduce_feature(df: pd.DataFrame, 
                   method='pca', # dimensionality reduction method, accept both capital and lower case
                   complexity=20, # None for PCA; perfplexity for TSNE, recommend: 30; n_neigbors for UMAP, recommend: 15
                   n=2, # n_components
                   seed: int=123, # seed for random_state
                   **kwargs, # arguments from PCA, TSNE, or UMAP depends on which method to use
                  ):
    
    "Reduce the dimensionality given a dataframe of values"
    
    method = method.lower()
    assert method in ['pca','tsne','umap'], "Please choose a method among PCA, TSNE, and UMAP"
    
    if load is not None:
        reducer = joblib.load(load)
    else:
        if method == 'pca':
            reducer = PCA(n_components=n, random_state=seed,**kwargs)
        elif method == 'tsne':
            reducer = TSNE(n_components=n,
                           random_state=seed, 
                           perplexity = complexity, # default from official is 30 
                          **kwargs)
        elif method == 'umap':
            reducer = UMAP(n_components=n, 
                           random_state=seed, 
                           n_neighbors=complexity, # default from official is 15, try 15-200
                          **kwargs)
        else:
            raise ValueError('Invalid method specified')

    proj = reducer.fit_transform(df)
    embedding_df = pd.DataFrame(proj).set_index(df.index)
    embedding_df.columns = [f"{method.upper()}{i}" for i in range(1, n + 1)]

    return embedding_df

In [None]:
#| export
@delegates(sns.scatterplot)
def plot_2d(X: pd.DataFrame, # a dataframe that has first column to be x, and second column to be y
            **kwargs, # arguments for sns.scatterplot
            ):
    "Make 2D plot from a dataframe that has first column to be x, and second column to be y"
    plt.figure(figsize=(7,7))
    sns.scatterplot(data = X,x=X.columns[0],y=X.columns[1],alpha=0.7,**kwargs)

In [None]:
df = Data.get_mirati_g12d()

In [None]:
df.shape

(660, 5)

In [None]:
prop_std = get_rdkit_df(df,postprocess=True)


 35 Columns with zero std: ['NumRadicalElectrons', 'SMR_VSA8', 'SlogP_VSA9', 'fr_Ar_COO', 'fr_C_S', 'fr_SH', 'fr_amidine', 'fr_azide', 'fr_azo', 'fr_barbitur', 'fr_benzodiazepine', 'fr_diazo', 'fr_dihydropyridine', 'fr_epoxide', 'fr_furan', 'fr_hdrzine', 'fr_hdrzone', 'fr_imide', 'fr_isocyan', 'fr_isothiocyan', 'fr_ketone', 'fr_ketone_Topliss', 'fr_lactam', 'fr_nitro', 'fr_nitro_arom', 'fr_nitro_arom_nonortho', 'fr_nitroso', 'fr_oxazole', 'fr_oxime', 'fr_phos_acid', 'fr_phos_ester', 'fr_prisulfonamd', 'fr_tetrazole', 'fr_thiocyan', 'fr_thiophene']

 11 Columns removed due to high similarity (pearson>0.99): ['MaxEStateIndex', 'HeavyAtomMolWt', 'ExactMolWt', 'HeavyAtomCount', 'fr_COO', 'fr_C_O_noCOO', 'fr_Nhpyrrole', 'fr_amide', 'fr_benzene', 'fr_phenol', 'fr_phenol_noOrthoHbond']

 Total removed columns: 46


In [None]:
#| export
def plot_corr(x,#a column of df
              y,#a column of df
              xlabel=None,# x axis label
              ylabel=None,# y axis label
              order=3, # polynomial level, if straight, order=1 
             ):
    correlation, pvalue = spearmanr(x, y)
    sns.regplot(x=x,
            y=y,
            order=order,
            line_kws={'color': 'gray'}
           )
    
    if xlabel is not None:
        plt.xlabel(xlabel)
        
    if ylabel is not None:
        plt.ylabel(ylabel)

    plt.text(x=0.8, y=0.1, s=f'Spearman: {correlation:.2f}', transform=plt.gca().transAxes, ha='center', va='center');

## pdb/sdf visualization

In [None]:
#| export
def view_mol(file, #sdf or pdb file
            ):
    v = py3Dmol.view()
    v.addModel(open(file).read())
    v.setStyle({'stick':{}})
    v.zoomTo()
    v.show()

In [None]:
view_mol('gnina_test/chain_A.pdb')

In [None]:
#| export
def view_complex(receptor,           # protein file
                 ligand,             # ligand (green), or docked ligand
                 ori_ligand=None,    # original ligand (yellow)
                 box=None            # optional box: [x, y, z, sizeX, sizeY, sizeZ]
                ):

    "Visualize the receptor, ligand, optional original ligand, and optional box via py3Dmol."
    v = py3Dmol.view()
    
    # Load receptor
    v.addModel(open(receptor).read())
    v.setStyle({'cartoon': {}, 'stick': {'radius': 0.15}})
    
    # Load docked ligand
    v.addModel(open(ligand).read())
    v.setStyle({'model': 1}, {'stick': {'colorscheme': 'greenCarbon'}})

    # Load original ligand if provided
    if ori_ligand is not None:
        v.addModel(open(ori_ligand).read())
        v.setStyle({'model': 2}, {'stick': {'colorscheme': 'yellowCarbon'}})

    # Add bounding box if specified
    if box is not None and len(box) == 6:
        x, y, z, sizeX, sizeY, sizeZ = box
        v.addBox({
            'center': {'x': x, 'y': y, 'z': z},
            'dimensions': {'w': sizeX, 'h': sizeY, 'd': sizeZ},
            'color': 'red',
            'opacity': 1,
            'wireframe': True
        })

    v.zoomTo({'model': 1})
    v.show()

In [None]:
box_list = get_box('core_test/7OFF_lig.sdf',tolist=True)
box_list

[38.848, -26.77, 10.419, 14.652, 8.942, 12.509]

In [None]:
view_complex('core_test/7OFF.pdb','core_test/7OFF_lig.sdf',box=box_list)

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()