In [1]:
from sklearn.datasets import make_blobs
from sklearn.manifold import MDS

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

import sys

import umap

import distinctipy

from bokeh.plotting import figure, show, save, output_file
from bokeh.models import HoverTool, TabPanel, Tabs

from scipy.stats import gmean

sys.path.append('../../')
from utils import clustering_utils, rna_seq_utils, bokeh_ui_utils, microarray_utils

  @numba.jit()
  @numba.jit()
  @numba.jit()
  @numba.jit()


In [2]:
# X, y = make_blobs(n_samples=10000, centers=3, n_features=10, random_state=0)
# print(X.shape)

# X_dist = clustering_utils.compute_pairwise_distance_matrix(X, 'manhattan')

# mds_mapper = MDS(n_components=2, normalized_stress='auto', dissimilarity='precomputed', n_jobs=-1)
# embedding = mds_mapper.fit_transform(X_dist)

# umap_df = pd.DataFrame(np.array(embedding), columns=('x', 'y'))

# plt.scatter(umap_df['x'], umap_df['y'])

In [3]:
def rgb_to_hex(rgb):
    """
    Convert RGB tuple to hexadecimal color code.
    """
    return '#{:02x}{:02x}{:02x}'.format(int(rgb[0] * 255), int(rgb[1] * 255), int(rgb[2] * 255))

In [4]:
def generate_embedding_plot(embedding_df, colors_dict, title=''):
    bokeh_x = 'x'
    bokeh_y = 'y'
    color_column = 'label' 

    # axis_padding = 0.05
    # x_min, x_max = embedding_df[bokeh_x].min() - axis_padding * embedding_df[bokeh_x].min(), embedding_df[bokeh_x].max() + axis_padding * embedding_df[bokeh_x].max()
    # y_min, y_max = embedding_df[bokeh_y].min() - axis_padding * embedding_df[bokeh_y].min(), embedding_df[bokeh_y].max() + axis_padding * embedding_df[bokeh_y].max()

    p = figure(title=title, sizing_mode='stretch_both',
                # x_range=(x_min, x_max), 
                # y_range=(y_min, y_max),
                )

    for val in sorted(embedding_df[color_column].unique()):
        col_df=embedding_df.loc[(embedding_df[color_column]==val)]
        p.scatter(bokeh_x, bokeh_y, 
                    # size=bokeh_ui_utils.compute_2d_embedding_point_radius(embedding_df, const=3), 
                    size=3,
                    color=colors_dict[val], 
                    # legend_label=val, 
                source=col_df)

    tooltips = []
    for column in col_df.columns[col_df.shape[1] - 1:]:
        tooltip = (column.replace('_', ' ').title(), f'@{column}')
        tooltips.append(tooltip)

    hover = HoverTool(tooltips=tooltips)

    p.add_tools(hover)

    # p.legend.click_policy="hide"
    # p.legend.location = "bottom_left"

    p.xaxis.axis_label = bokeh_x
    p.yaxis.axis_label = bokeh_y

    p.min_border = 100

    return p

In [5]:
def ari_mean_nexpr_per_mod(full_filtered_norm_df: pd.DataFrame, leiden_label_df_round_1_arranged_sorted: pd.DataFrame):    
    avg_df = None

    for m in leiden_label_df_round_1_arranged_sorted['label'].unique():

        curr_df = (full_filtered_norm_df.loc[full_filtered_norm_df['TTHERM_ID'].isin(
                        (leiden_label_df_round_1_arranged_sorted.loc[leiden_label_df_round_1_arranged_sorted['label'] == m]['TTHERM_ID'].values)
                    )].iloc[:, 1:].mean()).to_frame().T
        curr_df['label'] = m

        if avg_df is None:
            avg_df = curr_df
            continue

        avg_df = pd.concat((avg_df, curr_df), ignore_index=True)

    avg_df = avg_df.loc[: , list(avg_df.columns)[avg_df.shape[1] - 1:] + list(avg_df.columns)[0: avg_df.shape[1] - 1]]

    return avg_df

In [6]:
def geo_mean_nexpr_per_mod(full_filtered_norm_df: pd.DataFrame, leiden_label_df_round_1_arranged_sorted: pd.DataFrame):    
    avg_df = None

    for m in leiden_label_df_round_1_arranged_sorted['label'].unique():

        curr_df = full_filtered_norm_df.loc[full_filtered_norm_df['TTHERM_ID'].isin(
                        (leiden_label_df_round_1_arranged_sorted.loc[leiden_label_df_round_1_arranged_sorted['label'] == m]['TTHERM_ID'].values)
                    )].iloc[:, 1:]

        print(curr_df.apply(gmean, axis=0)) # FIXME FINISH IMPLEMENTING

        curr_df['label'] = m

        if avg_df is None:
            avg_df = curr_df
            continue

        avg_df = pd.concat((avg_df, curr_df), ignore_index=True)

    avg_df = avg_df.loc[: , list(avg_df.columns)[avg_df.shape[1] - 1:] + list(avg_df.columns)[0: avg_df.shape[1] - 1]]

    return avg_df

In [7]:
# DATASET = 'microarray'
DATASET = 'rna_seq'

In [8]:
if DATASET == 'rna_seq':
    full_filtered_df = pd.read_csv('../../active_files/rna_seq.csv')
    full_filtered_norm_df = rna_seq_utils.normalize_expression_per_gene(full_filtered_df)
    full_filtered_norm_df = rna_seq_utils.ari_mean_df_of_duplicates(full_filtered_norm_df)

    leiden_label_df_round_1_arranged_sorted = pd.read_csv('./rna_seq_label_df_round_1.csv')

    mds_file = './rna_seq_mds.csv'
    nmds_file = './rna_seq_nmds.csv'

elif DATASET == 'microarray':
    full_filtered_df = pd.read_csv('../../active_files/allgood_filt_agg_tidy_2021aligned_qc_rma_expression_full.csv')
    
    full_filtered_norm_df = microarray_utils.normalize_expression_per_gene(full_filtered_df, z=True)
    full_filtered_norm_df = microarray_utils.normalize_expression_per_gene(full_filtered_norm_df)

    leiden_label_df_round_1_arranged_sorted = pd.read_csv('./test_nn3_leiden_label_df_round_1.csv')

    mds_file = './microarray_mds.csv'
    nmds_file = './microarray_nmds.csv'

In [9]:
geo_mean_nexpr_per_mod(full_filtered_norm_df, leiden_label_df_round_1_arranged_sorted)

  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)


000min         NaN
030min         NaN
060min    1.065809
090min         NaN
120min         NaN
150min         NaN
180min         NaN
210min         NaN
240min         NaN
dtype: float64
000min         NaN
030min         NaN
060min    0.729476
090min         NaN
120min         NaN
150min         NaN
180min         NaN
210min         NaN
240min         NaN
dtype: float64
000min         NaN
030min    0.653564
060min         NaN
090min         NaN
120min    1.097282
150min         NaN
180min         NaN
210min         NaN
240min         NaN
dtype: float64
000min         NaN
030min         NaN
060min         NaN
090min         NaN
120min    2.505375
150min         NaN
180min         NaN
210min         NaN
240min         NaN
dtype: float64
000min      NaN
030min      NaN
060min      NaN
090min      NaN
120min    2.137
150min      NaN
180min      NaN
210min      NaN
240min      NaN
dtype: float64
000min         NaN
030min         NaN
060min    1.049586
090min    0.938693
120min    0.721800
15

  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)


000min    0.616024
030min    0.610610
060min    1.019105
090min    1.131749
120min    1.087833
150min    1.284542
180min         NaN
210min         NaN
240min         NaN
dtype: float64
000min         NaN
030min    0.472175
060min    0.781799
090min    1.232594
120min    1.280935
150min    1.198592
180min         NaN
210min         NaN
240min         NaN
dtype: float64
000min    0.865107
030min    0.574302
060min    0.753920
090min    1.090408
120min    1.013717
150min         NaN
180min         NaN
210min         NaN
240min         NaN
dtype: float64
000min    1.159075
030min         NaN
060min    0.972600
090min    1.083461
120min    0.729076
150min    0.977973
180min         NaN
210min         NaN
240min         NaN
dtype: float64
000min         NaN
030min    0.713253
060min    0.666519
090min         NaN
120min         NaN
150min    1.253986
180min         NaN
210min         NaN
240min         NaN
dtype: float64
000min    0.922035
030min    0.903438
060min    0.834559
090min    0.9

  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)


000min         NaN
030min         NaN
060min         NaN
090min         NaN
120min         NaN
150min    0.511238
180min         NaN
210min         NaN
240min         NaN
dtype: float64
000min    0.880915
030min         NaN
060min         NaN
090min         NaN
120min    1.496836
150min         NaN
180min         NaN
210min         NaN
240min         NaN
dtype: float64
000min         NaN
030min         NaN
060min         NaN
090min         NaN
120min    1.626314
150min    1.406802
180min         NaN
210min    0.894522
240min         NaN
dtype: float64
000min         NaN
030min         NaN
060min         NaN
090min         NaN
120min    1.827639
150min    1.603016
180min         NaN
210min         NaN
240min         NaN
dtype: float64
000min         NaN
030min         NaN
060min         NaN
090min         NaN
120min    1.034074
150min         NaN
180min         NaN
210min         NaN
240min         NaN
dtype: float64
000min         NaN
030min         NaN
060min         NaN
090min       

  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)


000min    2.608592
030min         NaN
060min         NaN
090min         NaN
120min         NaN
150min         NaN
180min         NaN
210min         NaN
240min         NaN
dtype: float64
000min    1.208526
030min         NaN
060min         NaN
090min         NaN
120min         NaN
150min         NaN
180min         NaN
210min         NaN
240min         NaN
dtype: float64
000min    1.148338
030min         NaN
060min         NaN
090min         NaN
120min         NaN
150min         NaN
180min         NaN
210min         NaN
240min    0.714832
dtype: float64
000min    1.671467
030min         NaN
060min         NaN
090min         NaN
120min         NaN
150min         NaN
180min         NaN
210min         NaN
240min         NaN
dtype: float64
000min   NaN
030min   NaN
060min   NaN
090min   NaN
120min   NaN
150min   NaN
180min   NaN
210min   NaN
240min   NaN
dtype: float64
000min    1.836975
030min         NaN
060min         NaN
090min         NaN
120min         NaN
150min         NaN
180min    

  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)


000min    1.730772
030min    0.904958
060min         NaN
090min         NaN
120min         NaN
150min         NaN
180min         NaN
210min         NaN
240min         NaN
dtype: float64
000min    1.563991
030min    0.812713
060min         NaN
090min         NaN
120min         NaN
150min         NaN
180min         NaN
210min         NaN
240min         NaN
dtype: float64
000min    1.949777
030min         NaN
060min    1.029574
090min         NaN
120min         NaN
150min         NaN
180min         NaN
210min         NaN
240min         NaN
dtype: float64
000min    2.718159
030min         NaN
060min    0.972241
090min         NaN
120min         NaN
150min         NaN
180min         NaN
210min         NaN
240min         NaN
dtype: float64
000min    2.003916
030min         NaN
060min         NaN
090min    1.210177
120min         NaN
150min         NaN
180min         NaN
210min         NaN
240min         NaN
dtype: float64
000min    3.036121
030min    0.232041
060min         NaN
090min    0.6

  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)


000min         NaN
030min    1.782732
060min    1.244303
090min    0.928598
120min         NaN
150min         NaN
180min         NaN
210min         NaN
240min         NaN
dtype: float64
000min         NaN
030min    1.697849
060min         NaN
090min         NaN
120min         NaN
150min         NaN
180min         NaN
210min         NaN
240min         NaN
dtype: float64
000min         NaN
030min    1.175719
060min         NaN
090min         NaN
120min         NaN
150min         NaN
180min         NaN
210min         NaN
240min         NaN
dtype: float64
000min         NaN
030min    1.696154
060min    0.810244
090min         NaN
120min         NaN
150min         NaN
180min         NaN
210min         NaN
240min         NaN
dtype: float64
000min   NaN
030min   NaN
060min   NaN
090min   NaN
120min   NaN
150min   NaN
180min   NaN
210min   NaN
240min   NaN
dtype: float64
000min         NaN
030min         NaN
060min         NaN
090min         NaN
120min         NaN
150min         NaN
180min    

  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)


000min    1.098004
030min         NaN
060min         NaN
090min         NaN
120min         NaN
150min         NaN
180min    0.659634
210min    1.026394
240min         NaN
dtype: float64
000min    1.081356
030min         NaN
060min         NaN
090min         NaN
120min         NaN
150min         NaN
180min    0.625919
210min    0.787751
240min         NaN
dtype: float64
000min    0.716315
030min         NaN
060min         NaN
090min         NaN
120min         NaN
150min         NaN
180min         NaN
210min    0.621432
240min         NaN
dtype: float64
000min   NaN
030min   NaN
060min   NaN
090min   NaN
120min   NaN
150min   NaN
180min   NaN
210min   NaN
240min   NaN
dtype: float64
000min   NaN
030min   NaN
060min   NaN
090min   NaN
120min   NaN
150min   NaN
180min   NaN
210min   NaN
240min   NaN
dtype: float64
000min   NaN
030min   NaN
060min   NaN
090min   NaN
120min   NaN
150min   NaN
180min   NaN
210min   NaN
240min   NaN
dtype: float64
000min    1.530955
030min    0.895755
060min  

  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)


000min         NaN
030min         NaN
060min         NaN
090min         NaN
120min         NaN
150min    1.186731
180min    1.442001
210min    1.009648
240min         NaN
dtype: float64
000min         NaN
030min         NaN
060min         NaN
090min         NaN
120min         NaN
150min    1.372682
180min    1.595819
210min    1.209154
240min         NaN
dtype: float64
000min         NaN
030min         NaN
060min         NaN
090min         NaN
120min    0.465057
150min    1.120220
180min    1.559231
210min    1.166342
240min         NaN
dtype: float64
000min         NaN
030min         NaN
060min         NaN
090min         NaN
120min         NaN
150min    1.075373
180min    1.490572
210min    1.223707
240min         NaN
dtype: float64
000min         NaN
030min         NaN
060min         NaN
090min         NaN
120min         NaN
150min    0.729809
180min    1.381006
210min    1.354983
240min    0.523594
dtype: float64
000min         NaN
030min         NaN
060min         NaN
090min       

  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)


000min         NaN
030min         NaN
060min         NaN
090min         NaN
120min    1.193206
150min    0.735942
180min    0.464699
210min         NaN
240min         NaN
dtype: float64
000min         NaN
030min         NaN
060min         NaN
090min    0.494153
120min    0.927543
150min    1.400944
180min    0.941812
210min    0.654565
240min         NaN
dtype: float64
000min         NaN
030min         NaN
060min         NaN
090min    0.368238
120min    1.218789
150min         NaN
180min    0.751925
210min    0.535562
240min         NaN
dtype: float64
000min         NaN
030min         NaN
060min         NaN
090min         NaN
120min         NaN
150min         NaN
180min         NaN
210min    0.539113
240min         NaN
dtype: float64
000min   NaN
030min   NaN
060min   NaN
090min   NaN
120min   NaN
150min   NaN
180min   NaN
210min   NaN
240min   NaN
dtype: float64
000min         NaN
030min         NaN
060min         NaN
090min         NaN
120min    1.049564
150min    0.909810
180min    

  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)
  log_a = np.log(a)


Unnamed: 0,label,000min,030min,060min,090min,120min,150min,180min,210min,240min
0,711,-1.202200,0.555037,1.724574,-0.573183,-0.770504,-0.366093,1.241515,-0.380283,0.165937
1,711,-1.118308,-1.002579,1.782797,-0.179393,-0.612365,-0.697697,1.300174,-1.022751,-0.038346
2,711,0.043825,-1.017845,0.972688,0.091875,-1.422158,-0.681175,1.628980,-1.071387,-0.345870
3,711,-0.063829,-0.222619,1.318690,0.526479,-1.244657,-1.022552,1.892718,-1.323655,-1.743305
4,711,-0.749742,0.120390,1.594732,-0.520193,-0.463970,-0.657934,-0.162895,-0.212441,0.222130
...,...,...,...,...,...,...,...,...,...,...
23107,0,0.409155,0.161347,0.199929,0.397410,0.347994,0.059553,-0.321846,0.717392,0.016749
23108,0,-0.017718,0.169777,0.453792,0.099016,-0.518738,-0.031974,0.335282,0.576057,0.781257
23109,0,0.608048,0.406005,0.261789,0.394904,0.447788,0.654470,0.299044,0.686835,0.169009
23110,0,0.461101,-0.155288,0.658710,0.084326,-0.325173,-0.580931,0.063246,0.396974,0.842909


In [None]:
mds_embeddind_df = pd.read_csv(mds_file)
mds_embeddind_df['TTHERM_ID'] = full_filtered_norm_df['TTHERM_ID'].values
mds_embeddind_df = mds_embeddind_df.merge(leiden_label_df_round_1_arranged_sorted, on='TTHERM_ID', how='inner')

nmds_embeddind_df = pd.read_csv(nmds_file)
nmds_embeddind_df['TTHERM_ID'] = full_filtered_norm_df['TTHERM_ID'].values
nmds_embeddind_df = nmds_embeddind_df.merge(leiden_label_df_round_1_arranged_sorted, on='TTHERM_ID', how='inner')

In [None]:
leiden_label_df_round_1_arranged_sorted.sample(10)

In [None]:
full_filtered_norm_df.sample(10)

In [None]:
leiden_label_df_round_1_arranged_sorted.loc[leiden_label_df_round_1_arranged_sorted['label'] == 24]['TTHERM_ID'].values

In [None]:
avg_df = ari_mean_nexpr_per_mod(full_filtered_norm_df, leiden_label_df_round_1_arranged_sorted)

In [None]:
data = full_filtered_norm_df[list(full_filtered_norm_df.columns)[1:]].values

random_state = 42
n_components = 2
n_neighbors = 3
embedding_metric = 'manhattan'

umap_mapper = umap.UMAP(random_state=random_state, n_components=n_components, n_neighbors=n_neighbors, metric=embedding_metric).fit(data)
embedding = bokeh_ui_utils._get_umap_embedding(umap_mapper)

umap_df = pd.DataFrame(np.array(embedding), columns=('x', 'y'))

umap_df['TTHERM_ID'] = full_filtered_norm_df['TTHERM_ID'].values

umap_df = umap_df.merge(leiden_label_df_round_1_arranged_sorted, on='TTHERM_ID', how='inner')

In [None]:
plt.scatter(umap_df['x'], umap_df['y'])

In [None]:
data = avg_df[list(avg_df.columns)[1:]].values

random_state = 42
n_components = 2
n_neighbors = 3
embedding_metric = 'manhattan'

umap_mapper = umap.UMAP(random_state=random_state, n_components=n_components, n_neighbors=n_neighbors, metric=embedding_metric).fit(data)
embedding = bokeh_ui_utils._get_umap_embedding(umap_mapper)

avg_umap_df = pd.DataFrame(np.array(embedding), columns=('x', 'y'))

avg_umap_df['label'] = avg_df['label'].values

In [None]:
plt.scatter(avg_umap_df['x'], avg_umap_df['y'])

In [None]:
mds_mapper = MDS(n_components=2, normalized_stress='auto', dissimilarity='precomputed', random_state=42)
embedding = mds_mapper.fit_transform(clustering_utils.compute_pairwise_distance_matrix(avg_df, 'manhattan'))

avg_mds_df = pd.DataFrame(np.array(embedding), columns=('x', 'y'))

avg_mds_df['label'] = avg_df['label'].values

In [None]:
plt.scatter(avg_mds_df['x'], avg_mds_df['y'])

In [None]:
nmds_mapper = MDS(n_components=2, normalized_stress='auto', dissimilarity='precomputed', metric=False, random_state=42)
embedding = nmds_mapper.fit_transform(clustering_utils.compute_pairwise_distance_matrix(avg_df, 'manhattan'))

avg_nmds_df = pd.DataFrame(np.array(embedding), columns=('x', 'y'))

avg_nmds_df['label'] = avg_df['label'].values

In [None]:
plt.scatter(avg_nmds_df['x'], avg_nmds_df['y'])

In [None]:
num_colors = len(umap_df['label'].unique())
colors = distinctipy.get_colors(num_colors)
distinctipy.color_swatch(colors)
colors_dict = {m : rgb_to_hex(colors[idx]) for idx, m in enumerate(umap_df['label'].unique())}

In [None]:
plots = [
generate_embedding_plot(umap_df, colors_dict, title=f'{DATASET}_umap_df'),
generate_embedding_plot(avg_umap_df, colors_dict, title=f'{DATASET}_avg_umap_df'),
generate_embedding_plot(mds_embeddind_df, colors_dict, title=f'{DATASET}_mds_embeddind_df'),
generate_embedding_plot(nmds_embeddind_df, colors_dict, title=f'{DATASET}_nmds_embeddind_df'),
generate_embedding_plot(avg_mds_df, colors_dict, title=f'{DATASET}_avg_mds_df'),
generate_embedding_plot(avg_nmds_df, colors_dict, title=f'{DATASET}_avg_nmds_df'),
]

In [None]:
tabs = [TabPanel(child=p, title=p.title.text) for p in plots]

tabbed_plot = Tabs(tabs=tabs, sizing_mode='stretch_both')

output_file(f'./{DATASET}_embedding_comparison.html')

save(tabbed_plot)