# Plot

> A collection of plot functions

In [1]:
#| default_exp plot

In [3]:
#| export
import pandas as pd
import seaborn as sns

#for embeddings
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from umap.umap_ import UMAP

# kdock
from kdock.core import Data

In [4]:
#| export
def set_sns():
    sns.set(rc={"figure.dpi":300, 'savefig.dpi':300})
    sns.set_context('notebook')
    sns.set_style("ticks")

In [5]:
#| export
def reduce_feature(df: pd.DataFrame, 
                   method='pca', # dimensionality reduction method, accept both capital and lower case
                   complexity=20, # None for PCA; perfplexity for TSNE, recommend: 30; n_neigbors for UMAP, recommend: 15
                   n=2, # n_components
                   seed: int=123, # seed for random_state
                   **kwargs, # arguments from PCA, TSNE, or UMAP depends on which method to use
                  ):
    
    "Reduce the dimensionality given a dataframe of values"
    
    method = method.lower()
    assert method in ['pca','tsne','umap'], "Please choose a method among PCA, TSNE, and UMAP"
    
    if load is not None:
        reducer = joblib.load(load)
    else:
        if method == 'pca':
            reducer = PCA(n_components=n, random_state=seed,**kwargs)
        elif method == 'tsne':
            reducer = TSNE(n_components=n,
                           random_state=seed, 
                           perplexity = complexity, # default from official is 30 
                          **kwargs)
        elif method == 'umap':
            reducer = UMAP(n_components=n, 
                           random_state=seed, 
                           n_neighbors=complexity, # default from official is 15, try 15-200
                          **kwargs)
        else:
            raise ValueError('Invalid method specified')

    proj = reducer.fit_transform(df)
    embedding_df = pd.DataFrame(proj).set_index(df.index)
    embedding_df.columns = [f"{method.upper()}{i}" for i in range(1, n + 1)]

    return embedding_df

In [None]:
#| export
@delegates(sns.scatterplot)
def plot_2d(X: pd.DataFrame, # a dataframe that has first column to be x, and second column to be y
            **kwargs, # arguments for sns.scatterplot
            ):
    "Make 2D plot from a dataframe that has first column to be x, and second column to be y"
    plt.figure(figsize=(7,7))
    sns.scatterplot(data = X,x=X.columns[0],y=X.columns[1],alpha=0.7,**kwargs)

In [9]:
df = Data.get_mirati_g12d()

In [10]:
df.shape

(722, 10)

In [11]:
prop_std = get_rdkit_df(df,normalize=True)

In [None]:
plot_cluster(prop_std,method = 'umap', hue = df.group,legend=True)

In [None]:
#| export
def plot_corr(x,#a column of df
              y,#a column of df
              xlabel=None,# x axis label
              ylabel=None,# y axis label
              order=3, # polynomial level, if straight, order=1 
             ):
    correlation, pvalue = spearmanr(x, y)
    sns.regplot(x=x,
            y=y,
            order=order,
            line_kws={'color': 'gray'}
           )
    
    if xlabel is not None:
        plt.xlabel(xlabel)
        
    if ylabel is not None:
        plt.ylabel(ylabel)

    plt.text(x=0.8, y=0.1, s=f'Spearman: {correlation:.2f}', transform=plt.gca().transAxes, ha='center', va='center');

In [7]:
#| hide
import nbdev; nbdev.nbdev_export()