In [None]:
# start with command “jupyter notebook –NotebookApp.iopub_data_rate_limit=1e10”

%matplotlib inline
#%matplotlib notebook
#%matplotlib

import os
import sys
import numpy as np
import matplotlib.pyplot as plt

# LIBRARY GLOBAL MODS
CELLTYPES = os.path.dirname(os.path.abspath(''))
sys.path.append(CELLTYPES)

from multicell.analysis_gamma import plot_bifurcation_candidates
from utils.file_io import RUNS_FOLDER, INPUT_FOLDER
manyruns = RUNS_FOLDER + os.sep + 'multicell_manyruns'

In [None]:
NOTEBOOK_OUTDIR = RUNS_FOLDER + os.sep + 'explore' + os.sep + 'nb_bifurcation'
os.makedirs(NOTEBOOK_OUTDIR, exist_ok=True)

# Dimension reduction and plotting settings

In [None]:
import umap
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA

import seaborn as sns
import pandas as pd
import plotly.express as px

sns.set(style='white', context='notebook', rc={'figure.figsize':(7,5)})

plotly_figstyle = dict(
    autosize=False,
    width=1000,
    height=1000,
    margin=dict(
        l=50,
        r=50,
        b=100,
        t=100,
        pad=0
    ),
    paper_bgcolor=None)

In [None]:
choice = 2                 # 0: umap, 1: tsne, 2:, pca
REDUCER_COMPONENTS = 2
REDUCER_SEED = 2
UMAP_KWARGS = {
    'random_state': REDUCER_SEED,
    'n_components': REDUCER_COMPONENTS,
    'metric': 'euclidean',
    'init': 'spectral',
    'unique': False,
    'n_neighbors': 15,
    'min_dist': 0.1,
    'spread': 1.0,
}
TSNE_KWARGS = {
    'random_state': REDUCER_SEED,
    'n_components': REDUCER_COMPONENTS,
    'metric': 'euclidean',
    'init': 'random',
    'perplexity': 30.0,
}
PCA_KWARGS = {
    'n_components': REDUCER_COMPONENTS,
}


def dimred(choice, X, mod_kwargs={}):
    assert choice in [0,1,2]
    if choice == 0:
        algo = 'UMAP'
        reducer_kwargs = UMAP_KWARGS.copy()
        reducer_kwargs.update(mod_kwargs)
        reducer = umap.UMAP(**reducer_kwargs)
        reducer.fit(X.T)
        embedding = reducer.transform(X.T)
    elif choice == 1:
        algo = 't-SNE'
        reducer_kwargs = TSNE_KWARGS.copy()
        reducer_kwargs.update(mod_kwargs)
        reducer = TSNE(**reducer_kwargs)
        embedding = reducer.fit_transform(X.T)
    elif choice == 2:
        algo = 'PCA'
        reducer_kwargs = PCA_KWARGS.copy()  
        reducer_kwargs.update(mod_kwargs)
        reducer = PCA(**reducer_kwargs)
        embedding = reducer.fit_transform(X.T)
    return algo, embedding, reducer

def plot_embedding_jupyter(embedding, seed_arr, gamma_arr, color_label, algo, outdir, outname, clip_low=None, clip_high=None, pts_subset=None):
    
    num_runs = embedding.shape[0]
    pts_index = np.arange(num_runs)
    if pts_subset is not None:
        embedding = embedding[pts_subset, :]
        gamma_arr = gamma_arr[pts_subset]
        seed_arr = seed_arr[pts_subset]
        pts_index = pts_index[pts_subset]
        
    assert color_label in [r'$\gamma$', 'seed']
    if color_label == 'seed':
        color_arr = seed_arr.astype(int)
        #plotly_kw = {'color_discrete_sequence': 'Light24'}
        plotly_kw = {'color_discrete_sequence': px.colors.qualitative.Alphabet} # px.colors.sequential.Plasma_r or px.colors.qualitative.Light24
        
    else:
        color_arr = gamma_arr
        plotly_kw = {'color_continuous_scale': 'spectral_r'}
    color_label_clipped = color_label + '_clip'

    # clip gamma event array to smaller value to better visualize low gamma changes
    if clip_low is None:
        clip_low = np.min(color_arr)
    if clip_high is None:
        clip_high = np.max(color_arr)
    c = np.copy(color_arr)
    c = np.clip(c, clip_low, clip_high)

    plot_title = '%s of gammascan manyseeds dataset' % algo
    #plot_path = dirpath + os.sep + "%s_plotly_%s%s" % (algo, label, fmod)

    df = pd.DataFrame({'index': pts_index,
                       'seed': seed_arr,
                       r'$\gamma$': gamma_arr,
                       color_label_clipped: c,
                       'x': embedding[:, 0],
                       'y': embedding[:, 1]})    
    if color_label == 'seed':
        df[color_label_clipped] = df[color_label_clipped].astype(str)

    if REDUCER_COMPONENTS == 2:
        fig = px.scatter(df, x='x', y='y',
                         color=color_label_clipped,
                         title=plot_title,
                         hover_name='index',
                         hover_data=["index", "seed", r"$\gamma$"],
                         **plotly_kw)
    else:
        df['z'] = embedding[:, 2]
        fig = px.scatter_3d(df, x='x', y='y', z='z',
                         color=color_label_clipped,
                         title=plot_title,
                         hover_name='index',
                         hover_data=["index", "seed", r"$\gamma$"],
                         **plotly_kw)

    fig.update_layout(**plotly_figstyle)
    fig.write_image(outdir + os.sep + outname + '.png')
    fig.show()
    return

In [None]:
from multicell.multicell_replot import \
    replot_graph_lattice_reference_overlap_plotter, replot_modern, replot_scatter_dots

def plot_tissue_local(X, agg_index, num_genes, num_cells, sidelength, fmod, outdir, state_int=False):
    # constants
    sidelength = int(np.sqrt(num_cells)); assert sidelength ** 2 == num_cells
    
    X_state = X[:, agg_index]
    X_state = X_state.reshape(num_cells, num_genes)

    outpath = outdir + os.sep + 'agg%d_scatter' % agg_index
    replot_scatter_dots(X_state.T, sidelength, outpath, fmod=fmod, state_int=state_int)
    return        

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def exptl_plot_embedding_jupyter(embedding, seed_arr, gamma_arr, dist_arr, vel_arr, color_label, algo, outdir, outname, clip_low=None, clip_high=None, pts_subset=None, seed_arrows=[], seed_endpts=[], show=True):
    
    num_runs = embedding.shape[0]
    pts_index = np.arange(num_runs)
    if pts_subset is not None:
        embedding = embedding[pts_subset, :]
        gamma_arr = gamma_arr[pts_subset]
        seed_arr = seed_arr[pts_subset]
        dist_arr = dist_arr[pts_subset]
        vel_arr = vel_arr[pts_subset]
        pts_index = pts_index[pts_subset]
    
    color_dict = {
        'seed': {
            'color_arr': seed_arr.astype(int),
            'color_sequence': px.colors.qualitative.Alphabet,  # px.colors.qualitative.Light24
            'plotly_kw': {'color_discrete_sequence': px.colors.qualitative.Alphabet},
            'marker_dict': {}
        },
        r'$\gamma$': {
            'color_arr': gamma_arr,
            'plotly_kw': {'color_continuous_scale': 'spectral_r'},
            'marker_dict': {'colorscale': 'spectral_r',
                           'colorbar': dict(thickness=10, ticktext=['Low', 'High'], outlinewidth=0, title=r'$\gamma$')}
        },
        'FP_distance': {
            'color_arr': dist_arr,
            'plotly_kw': {'color_continuous_scale': 'spectral_r'},
            'marker_dict': {'colorscale': 'spectral_r',
                           'colorbar': dict(thickness=10, ticktext=['Low', 'High'], outlinewidth=0, title='FP_distance')}
        },
        'FP_velocity': {
            'color_arr': vel_arr,
            'plotly_kw': {'color_continuous_scale': 'spectral_r'},
            'marker_dict': {'colorscale': 'spectral_r',
                            'colorbar': dict(thickness=10, ticktext=['Low', 'High'], outlinewidth=0, title='FP_velocity')}
        }
    }

    cdict = color_dict[color_label]
    color_arr = cdict['color_arr']
    plotly_kw = cdict['plotly_kw']
    marker_dict = cdict['marker_dict']
    color_label_clipped = color_label + '_clip'

    # clip gamma event array to smaller value to better visualize low gamma changes
    if clip_low is None:
        clip_low = np.min(color_arr)
    if clip_high is None:
        clip_high = np.max(color_arr)
    c = np.copy(color_arr)
    c = np.clip(c, clip_low, clip_high)

    plot_title = '%s of gammascan manyseeds dataset' % (algo)
    #plot_path = dirpath + os.sep + "%s_plotly_%s%s" % (algo, label, fmod)
    
    if len(seed_arrows) > 0:
        showlegend = True
        opacity = 1.0
        range_scaler = 0.05
        ry1, ry2 = np.min(embedding[:,1]), np.max(embedding[:, 1])
        margin = (ry2 - ry1) * range_scaler
        ry1 = ry1 - margin
        ry2 = ry2 + margin
        #ry1, ry2 = range_scaler * ry1, range_scaler * ry2
        
    else:
        showlegend = False
        opacity = 1.0
        #fig = go.Figure()
    fig = make_subplots(specs=[[{"secondary_y": True}]])

    df = pd.DataFrame({'index': pts_index,
                       'seed': seed_arr,
                       r'$\gamma$': gamma_arr,
                       'FP_distance': dist_arr,
                       'FP_velocity': vel_arr,
                       color_label_clipped: c,
                       'x': embedding[:, 0],
                       'y': embedding[:, 1]})
    
    # seed based colouring for seed plot and arrows 
    seed_color_list = color_dict['seed']['color_sequence']
    seed_color_map = lambda s: seed_color_list[s % len(seed_color_list)]
    seed_colors = [seed_color_map(s) for s in seed_arr]  # list of color code for each point
    
    if color_label == 'seed':
        #df[color_label_clipped] = df[color_label_clipped].astype(str)
        showlegend = True
        marker_dict['color'] = seed_colors
    else:
        marker_dict['color'] = c

    if embedding.shape[1] == 2:
        
        custom_hover_data = np.zeros((8, len(pts_index)))
        custom_hover_data[0,:] = embedding[:, 0]
        custom_hover_data[1,:] = embedding[:, 1]
        custom_hover_data[2,:] = pts_index
        custom_hover_data[3,:] = seed_arr
        custom_hover_data[4,:] = gamma_arr
        custom_hover_data[5,:] = dist_arr
        custom_hover_data[6,:] = vel_arr
        custom_hover_data[7,:] = c

        trace_main = go.Scatter(x=df['x'], y=df['y'],
                                mode='markers',
                                hovertemplate = 
                                '<br><b>x</b>: %{customdata[0]:.2f}'+ 
                                '<br><b>y</b>: %{customdata[1]:.2f}'+
                                '<br><b>index</b>: %{customdata[2]}'+
                                '<br><b>seed</b>: %{customdata[3]}'+
                                '<br><b>gamma</b>: %{customdata[4]:.5f}'+
                                '<br><b>dist</b>: %{customdata[5]:.2f}'+
                                '<br><b>vel</b>: %{customdata[6]:.2f}'
                                '<br><b>c</b>: %{customdata[7]:.5f}',
                                customdata=custom_hover_data.T,
                                opacity=opacity,
                                marker=marker_dict,
                                name='Embedding')
        fig.add_trace(trace_main, secondary_y=False)
        
        if len(seed_arrows) > 0:    
            # collect traj start and endpoints
            start_x, start_y = np.zeros(len(seed_arrows)), np.zeros(len(seed_arrows))
            end_x, end_y = np.zeros(len(seed_arrows)), np.zeros(len(seed_arrows))
            
            for seed in seed_arrows:
                x_mask = df['x'].copy()
                y_mask = df['y'].copy()
                x_mask[seed_arr != seed] = np.nan
                y_mask[seed_arr != seed] = np.nan                  
                
                print('PLOTTING THE ARROWS')
                fig.add_trace(
                    go.Scatter(x=x_mask, y=y_mask, mode="lines", name='Seed %d' % seed, 
                               line_color=seed_color_map(seed)),
                    secondary_y=True)
            
            fig.update_yaxes(
                range=(ry1,ry2),
                secondary_y=False)
            
            fig.update_yaxes(
                title_text="", 
                range=(ry1,ry2),
                showticklabels=False, 
                secondary_y=True)

        if len(seed_endpts) > 0:
            
            start_x, start_y = np.zeros(len(seed_endpts)), np.zeros(len(seed_endpts))
            end_x, end_y = np.zeros(len(seed_endpts)), np.zeros(len(seed_endpts))
            for seed in seed_endpts:
                seed_indices = np.where(seed_arr == seed)[0]
                low, high = np.min(seed_indices), np.max(seed_indices)
                start_x[seed] = embedding[low, 0]
                start_y[seed] = embedding[low, 1]
                end_x[seed] = embedding[high, 0]
                end_y[seed] = embedding[high, 1]

            fig.add_trace(
                go.Scatter(x=start_x, y=start_y, mode="markers", name='Trajectory start',
                          marker=dict(color='white', size=13,
                                      line=dict(width=2, color='DarkSlateGrey'))),
                secondary_y=True)
            fig.add_trace(
                go.Scatter(x=end_x, y=end_y, mode="markers", name='Trajectory end',
                          marker=dict(color='DarkSlateGrey', size=13,
                                      line=dict(width=3, color='Black'))),
                secondary_y=True)
        
    else:    
        assert embedding.shape[1] == 3
        assert embedding.shape[1] == 2  # need to switch to full plotly "go" for 3D from plotly express

        df['z'] = embedding[:, 2]
        fig = px.scatter_3d(df, x='x', y='y', z='z',
                         color=color_label_clipped,
                         title=plot_title,
                         hover_name='index',
                         hover_data=["index", "seed", r"$\gamma$"],
                         **plotly_kw)

    title_dict = dict(text=plot_title, xanchor='left', yanchor='top' )   
    fig.update_layout(title=title_dict, title_text=plot_title, showlegend=showlegend, **plotly_figstyle)
    #fig.update_layout(title_text=plot_title, showlegend=showlegend, **plotly_figstyle)
    
    fig.write_image(outdir + os.sep + outname + '.png')
    
    if show:
        fig.show()
    
    return

# Miscellaneous functions

In [None]:
import natsort
from multicell.graph_helper import state_load


def gather_fp_data(runs_subdir, midmod='fpshift', expand=False):
    assert not expand  # TODO implement
    
    assert midmod in ['fpshift', 'all']
    statedir = 'states'
    
    num_genes = np.loadtxt(runs_subdir + os.sep + 'simsetup' + os.sep + 'matrix_J.txt').shape[0]
    num_cells = np.loadtxt(runs_subdir + os.sep + 'simsetup' + os.sep + 'matrix_A.txt').shape[0]
    
    statedir_path = runs_subdir + os.sep + statedir
    fpaths = [statedir_path + os.sep + a for a in os.listdir(statedir_path)]
    X_files = [a for a in fpaths
               if not os.path.isdir(a) and os.path.basename(a).split('_')[1] == midmod]
    X_files = natsort.natsorted(X_files)  
    num_points = len(X_files)
    #print('for midmod=%s, num_points=%d' % (midmod, num_points))
    agg_states = np.zeros((num_cells * num_genes, num_points), dtype=int)
    gamma_values = np.zeros(num_points)
    assert num_points > 0
    
    for i, X_path in enumerate(X_files):
        X_label = os.path.basename(X_path)
        X_label_gammastr = X_label.split('_')[2][1:-4]
        gamma_values[i] = float(X_label_gammastr)
        
        X_loaded = state_load(X_path, cells_as_cols=False, num_genes=num_genes, num_cells=num_cells, txt=False)
        agg_states[:, i] = X_loaded
        
    return agg_states, gamma_values

In [None]:
def aggregate_manyruns_gammascan(seed_range, run_names):
    """
    Returns
      X:                        2D ndim x nfpshifts - contains all fp data
      gamma_space:              1D ng - space at which gamma were sampled
      fpshifts_events:          1D ng - number of shifts occuring at gamma value g
      ids_seed:                 1D nfpshifts - for each col of X, what is the seed value? 
      ids_gamma:                1D nfpshifts - for each col of X, what is the gamma value?  
    """
    # get data dimension
    num_genes = np.loadtxt(run_names[0] + os.sep + 'simsetup' + os.sep + 'matrix_J.txt').shape[0]
    num_cells = np.loadtxt(run_names[0] + os.sep + 'simsetup' + os.sep + 'matrix_A.txt').shape[0]
    ndim = num_genes * num_cells
    # get gamma space from first run
    gamma_space = np.loadtxt(run_names[0] + os.sep + 'data' + os.sep + 'gamma_space.txt')
    ng = len(gamma_space)
    dg = gamma_space[1] - gamma_space[0]
    assert gamma_space[0] == 0  # makes counting events easier
    fpshifts_events = np.zeros(ng, dtype=int)
    # 1) first pass over all runs to get nfpshifts and fpshifts_events (distribution of events)
    nfpshifts = 0
    for k, s in enumerate(seed_range):
        fpshifts = np.loadtxt(run_names[k] + os.sep + 'data' + os.sep + 'bifurcation_candidates.txt')
        nfpshifts += len(fpshifts)
        fpshifts_normed = (fpshifts / dg).astype(int)  # convert to indices on the gamma_space lattice
        events = np.zeros(ng, dtype=int)
        events[fpshifts_normed] += 1 
        #print(events)        
        fpshifts_events += events
    # 2) now fill remaining arrays
    X = np.zeros((ndim, nfpshifts), dtype=int)
    ids_seed = np.zeros(nfpshifts, dtype=int)
    ids_gamma = np.zeros(nfpshifts)
    run_index = 0
    for k, s in enumerate(seed_range):
        fpshifts = np.loadtxt(run_names[k] + os.sep + 'data' + os.sep + 'bifurcation_candidates.txt')
        X_run, gamma_values = gather_fp_data(run_names[k], midmod='fpshift', expand=False)
        assert np.all(fpshifts == gamma_values)
        # fill arrays
        run_index_next = run_index + len(fpshifts)
        X[:, run_index:run_index_next] = X_run
        ids_seed[run_index:run_index_next] = s
        ids_gamma[run_index:run_index_next] = fpshifts
        # update run_index
        run_index = run_index_next
    
    return X, gamma_space, fpshifts_events, ids_seed, ids_gamma
    

In [None]:
from multicell.multicell_lattice import reconstruct_random_state_from_seed


def exptl_aggregate_manyruns_gammascan(seed_range, run_names):
    # TODO: try setting distance to 0 or some fixed value for the initial points in each traj
    # TODO: try different distance measure (current = hamming)
    """
    Returns
      X:                        2D ndim x nfpshifts - contains all fp data
      gamma_space:              1D ng - space at which gamma were sampled
      fpshifts_events:          1D ng - number of shifts occuring at gamma value g
      ids_seed:                 1D nfpshifts - for each col of X, what is the seed value? 
      ids_gamma:                1D nfpshifts - for each col of X, what is the gamma value?  
    EXPERIMENTAL:
      ids_distances:            1D nfpshifts - for each col of X, what is distance between it and the previous state
          Distances: let D be the length of the binary (+-1) states x, y 
                hamming(x,y): 0.5*(D - np.dot(x,y))
              euclidean(x,y): sqrt( np.dot(x,y) ) 
          Notes: 
              for the initial fpshift, distance is measured against the initial condition 
              else, distance is measured against the previous fpshift gamma value
      ids_velocities:             1D nfpshifts - for each col of X, what is scaled distance between it and the previous state
          divide the distance by the change in gamma i.e. v(x_k) = d(x_k, x_k_1) / (g_k - g_k-1)
    """
    # get data dimension
    num_genes = np.loadtxt(run_names[0] + os.sep + 'simsetup' + os.sep + 'matrix_J.txt').shape[0]
    num_cells = np.loadtxt(run_names[0] + os.sep + 'simsetup' + os.sep + 'matrix_A.txt').shape[0]
    ndim = num_genes * num_cells
    # get gamma space from first run
    gamma_space = np.loadtxt(run_names[0] + os.sep + 'data' + os.sep + 'gamma_space.txt')
    ng = len(gamma_space)
    dg = gamma_space[1] - gamma_space[0]
    init_gamma = gamma_space[0]
    assert gamma_space[0] == 0  # makes counting events easier
    fpshifts_events = np.zeros(ng, dtype=int)
    # 1) first pass over all runs to get nfpshifts and fpshifts_events (distribution of events)
    nfpshifts = 0
    for k, s in enumerate(seed_range):
        fpshifts = np.loadtxt(run_names[k] + os.sep + 'data' + os.sep + 'bifurcation_candidates.txt')
        nfpshifts += len(fpshifts)
        fpshifts_normed = (fpshifts / dg).astype(int)  # convert to indices on the gamma_space lattice
        events = np.zeros(ng, dtype=int)
        events[fpshifts_normed] += 1 
        fpshifts_events += events

    # 2) now fill remaining arrays
    X = np.zeros((ndim, nfpshifts), dtype=int)
    ids_seed = np.zeros(nfpshifts, dtype=int)
    ids_gamma = np.zeros(nfpshifts)
    ids_distance = np.zeros(nfpshifts)
    ids_velocity = np.zeros(nfpshifts)
    run_index = 0
    for k, s in enumerate(seed_range):
        fpshifts = np.loadtxt(run_names[k] + os.sep + 'data' + os.sep + 'bifurcation_candidates.txt')
        X_run, gamma_values = gather_fp_data(run_names[k], midmod='fpshift', expand=False)
        assert np.all(fpshifts == gamma_values)
        # fill arrays
        run_index_next = run_index + len(fpshifts)
        X[:, run_index:run_index_next] = X_run
        ids_seed[run_index:run_index_next] = s
        ids_gamma[run_index:run_index_next] = fpshifts
        # fill experimental arrays
        assert len(fpshifts) >= 2
        for idx, r in enumerate(range(run_index, run_index_next)):
            if r == 0:
                init_cond = reconstruct_random_state_from_seed(ndim, s)
                adotb = np.dot(X[:, idx], init_cond) 
                gamma_gap = ids_gamma[idx] - init_gamma
            else: 
                adotb = np.dot(X[:, idx], X[:, idx-1]) 
                gamma_gap = ids_gamma[idx] - ids_gamma[idx-1]                
            # hamming distance for +-1 binary vectors is a simple transform of the euclidean distance
            dist = 0.5*(ndim - adotb)
            ids_distance[r] = dist
            ids_velocity[r] = dist / gamma_gap
            
            #print(k, s, run_index, run_index_next, idx, r, '----', adotb, dist, gamma_gap, dist/gamma_gap)
    
        #ids_distances[run_index:run_index_next] = 
        #ids_velocities[run_index:run_index_next] = 
        # update run_index
        run_index = run_index_next
    
    return X, gamma_space, fpshifts_events, ids_seed, ids_gamma, ids_distance, ids_velocity

# Analysis: single gammascan directory (1 init cond)

In [None]:
#gammascan_label = 'gscan_anchor1_gLow0_gHigh2.0_gStep5e-4_W_9_W15maze_R1_init_dual_s0_M400'
gammascan_label = 'gscan_anchor0_gLow0_gHigh4.0_gStep5e-4_W_9_W15maze_R1_init_dual_s0_M400'

gammascan_subdir = manyruns + os.sep + gammascan_label
num_genes = np.loadtxt(gammascan_subdir + os.sep + 'simsetup' + os.sep + 'matrix_J.txt').shape[0]
num_cells = np.loadtxt(gammascan_subdir + os.sep + 'simsetup' + os.sep + 'matrix_A.txt').shape[0]
gammaspace = np.loadtxt(gammascan_subdir + os.sep + 'data' + os.sep + 'gamma_space.txt')
fpshifts = np.loadtxt(gammascan_subdir + os.sep + 'data' + os.sep + 'bifurcation_candidates.txt')
print('num_genes, num_cells', num_genes, num_cells, 'with nfpshifts', len(fpshifts))

**Aggregate fixed point state files**

In [None]:
"""X_fpshifts, gamma_fpshifts = gather_fp_data(gammascan_subdir, midmod='fpshift', expand=False)
   
# Explicit midmod = 'all' or use expand flag with midmod='fpshift'
#A_X_fpshifts, A_gamma_fpshifts = gather_fp_data(A_subdir, midmod='all', expand=False)
#A_X_fpshifts_expanded, A_gamma_fpshifts_expanded = gather_fp_data(A_subdir, midmod='fpshift', expand=True)

truncate = None
if truncate is not None:
    print('Warning: Truncating dataset')
    X_fpshifts = X_fpshifts[:, :truncate]
    gamma_fpshifts = gamma_fpshifts[:truncate]

print(gamma_fpshifts[0:5], '...')
print(gamma_fpshifts[-5:])"""

In [None]:
seed_range = [0]
run_names = [gammascan_subdir]
X, gamma_space, fpshifts_events, ids_seed, ids_gamma, ids_distance, ids_velocity = exptl_aggregate_manyruns_gammascan(seed_range, run_names)
print('Done')

**Plot all states**

In [None]:
plot_all_shifts = False
sidelength = 20

if plot_all_shifts:   
    outdir = gammascan_subdir + os.sep + 'plot_lattice'
    for agg_index, gamma in enumerate(ids_gamma):
        fmod = 'g%.5f' % gamma
        if agg_index % 50 == 0:
            print(agg_index, 'plotting...')
        plot_tissue_local(X, agg_index, num_genes, num_cells, sidelength, fmod, outdir, state_int=False)

**UMAP (or other dim. reduce) for initial condition trajectory**

In [None]:
alt_kwargs = {
    0: {
        'random_state': 40,
        'n_components': REDUCER_COMPONENTS,
        'metric': 'euclidean',
        'init': 'spectral',
        'unique': False,
        'n_neighbors': 5,
        'min_dist': 0.1,
        'spread': 1},
    1: {'perplexity': 100,
        'early_exaggeration': 12},  # default is 12
    2: {}
}

In [None]:
dimred_choice = 0
algo, embedding, reducer = dimred(dimred_choice, X, mod_kwargs=alt_kwargs[dimred_choice])
#algo, embedding, reducer = dimred(dimred_choice, X_fpshifts)
    
print('Dim. reduction:', X.T.shape, 'to', embedding.shape)

**Visualization**

In [None]:
color_plots = {
    r'$\gamma$': {'fname': 'Gamma', 
                  'hclips': [1.5, 1.0, 0.5]},
    'FP_distance': {'fname': 'FPdist', 
                    'hclips': [450, 200.0, 80, 40]},
    'FP_velocity': {'fname': 'FPvel', 
                    'hclips': [1e6, 5e5, 1e5, 5e4]},
}

outdir = NOTEBOOK_OUTDIR

In [None]:
show=True
#key_choices = [r'$\gamma$', 'FP_distance', 'FP_velocity']
key_choices = [r'$\gamma$']

plot_seed_traj = seed_range  # list of seed paths to plot (overlay)
endpts = seed_range          # list of traj endpts to overlay

plot_pos_args = [embedding, ids_seed, ids_gamma, ids_distance, ids_velocity, '', algo, outdir, '']
plot_base_kw = dict(
    show=show,
    seed_arrows=plot_seed_traj,
    seed_endpts=endpts,
    pts_subset=None, 
    clip_low=None
)

In [None]:
cdict_key = r'$\gamma$'

color_hclips = color_plots[cdict_key]['hclips']
fname = color_plots[cdict_key]['fname']
outname = '%s_c%s' % (algo, fname)

plot_pos_args[5] = cdict_key  # set colour_label argument at pos 5
plot_pos_args[8] = outname    # set plot outname
exptl_plot_embedding_jupyter(*plot_pos_args, **plot_base_kw)

In [None]:
# old below

In [None]:
num_runs = len(gamma_fpshifts)
c = gamma_fpshifts

clabel = r'$\gamma$'
plotly_kw = {'color_continuous_scale': 'spectral_r'}
plot_title = '%s of %s dataset' % (algo, A_label)
#plot_path = dirpath + os.sep + "%s_plotly_%s%s" % (algo, label, fmod)

if REDUCER_COMPONENTS == 2:
    df = pd.DataFrame({'index': range(num_runs),
                       clabel: c,
                       'x': embedding[:, 0],
                       'y': embedding[:, 1]})

    fig = px.scatter(df, x='x', y='y',
                     color=clabel,
                     title=plot_title,
                     hover_name='index',
                     **plotly_kw)
else:
    df = pd.DataFrame({'index': range(num_runs),
                   clabel: c,
                   'x': embedding[:, 0],
                   'y': embedding[:, 1],
                   'z': embedding[:, 2]})

    fig = px.scatter_3d(df, x='x', y='y', z='z',
                     color=clabel,
                     title=plot_title,
                     hover_name='index',
                     **plotly_kw)

fig.update_layout(**plotly_figstyle)
fig.show()

In [None]:
plot_title += ' landscape'
#plot_path += '_landscape'
df = pd.DataFrame({'index': range(num_runs),
                   clabel: c,
                   'x': embedding[:, 0],
                   'y': embedding[:, 1],
                   'z': A_gamma_fpshifts})

fig = px.scatter_3d(df, x='x', y='y', z='z',
                    color=clabel,
                    title=plot_title,
                    hover_name='index',
                    **plotly_kw,
                   labels=dict(x='x', y='y', z=u'\u03B3'))

fig.update_layout(**plotly_figstyle)
fig.show()

# Analysis: "Manyruns" for fixed gammascan but varying init cond seed

In [None]:
outdir = RUNS_FOLDER + os.sep + 'explore' + os.sep + 'bifurcation'
gammascan_manyruns = manyruns + os.sep + 'gscan_manyseeds'

seed_range = np.arange(0,201)
run_names = [gammascan_manyruns + os.sep + 'gscan_anchor1_gLow0_gHigh2.0_gStep5e-4_W_9_maze_R1_init_random_s%d_M100' % s for s in seed_range]

#seed_range = np.arange(0,501)
#run_names = [gammascan_manyruns + os.sep + 'gscan_anchor0_gLow0_gHigh2.0_gStep5e-4_W_9_maze_R1_init_random_s%d_M100' % s for s in seed_range]

#seed_range = np.arange(0,10)
#run_names = [gammascan_manyruns + os.sep + 'gscan_anchor1_gLow0_gHigh2.0_gStep5e-4_W_9_W15maze_R1_init_random_s%d_M400' % s for s in seed_range]

#seed_range = np.arange(0,26)
#run_names = [gammascan_manyruns + os.sep + 'gscan_anchor0_gLow0_gHigh4.0_gStep5e-4_W_9_W15maze_R1_init_random_s%d_M400' % s for s in seed_range]

In [None]:
maxlen=0
for s in seed_range:
    agg_states, gamma_values = gather_fp_data(run_names[s], midmod='fpshift', expand=False)
    plt.plot(np.arange(len(gamma_values)), gamma_values, '--o')
    print(s, len(gamma_values))
    if len(gamma_values) > maxlen:
        maxlen = len(gamma_values)
        print('newmax:', s, maxlen)

plt.xlabel(r'$n$, FP shift event index')
plt.ylabel(r'$\gamma$')
plt.show()

In [None]:
focus = False  # focus on one run instead of manyruns
if focus:
    seed_range = [76]
    run_names = [gammascan_manyruns + os.sep + 'gscan_anchor1_gLow0_gHigh2.0_gStep5e-4_W_9_maze_R1_init_random_s%d_M100' % s for s in seed_range]

X, gamma_space, fpshifts_events, ids_seed, ids_gamma = aggregate_manyruns_gammascan(seed_range, run_names)

Statistics of fpshifts

In [None]:
dg = gamma_space[1] - gamma_space[0]
nseed = len(list(seed_range))
fpshifts_events_normed = fpshifts_events / nseed

In [None]:
#plt.bar(gamma_space, fpshifts_events_normed, width=dg, ec='k', linewidth=1, alpha=0.8)
#plt.xlabel(r'$\gamma$')
#plt.ylabel('FP Shift Events (fraction of init. cond.)')

fig, ax = plt.subplots(2,2)
ax[0,0].bar(gamma_space, fpshifts_events_normed, width=dg, ec='k', linewidth=1, alpha=0.8)
ax[0,1].bar(gamma_space, fpshifts_events_normed, width=dg, ec='k', linewidth=1, alpha=0.8)
ax[1,0].bar(gamma_space, fpshifts_events_normed, width=dg, ec='k', linewidth=1, alpha=0.8)
ax[1,1].bar(gamma_space, fpshifts_events_normed, width=dg, ec='k', linewidth=1, alpha=0.8)
# vis help for bins
#ax[0,0].bar(gamma_space, np.ones(len(gamma_space))*400, width=0.5*dg, ec='k', linewidth=0.5*dg, alpha=0.3, color='orange')
# set log y-scales for right column
ax[0,1].set_yscale('log')
ax[1,1].set_yscale('log')
# set log x-scales for bottom row
ax[1,0].set_xscale('log')
ax[1,1].set_xscale('log')

plt.suptitle('FP Shift Events (fraction of init. cond.)')
ax[0,0].set_xlabel(r'$\gamma$')
ax[0,1].set_xlabel(r'$\gamma$')
ax[1,0].set_xlabel(r'$\gamma$')
ax[1,1].set_xlabel(r'$\gamma$')

# visualization checks
# ----------------------
#plt.bar(gamma_space, np.ones(len(gamma_space))*400, width=0.5*dg, ec='k', linewidth=0.5*dg, alpha=0.3, color='orange')
#plt.gca().set_xscale('log')
#plt.gca().set_yscale('log')
#plt.xlim(0.01, 2)
#plt.ylim(0, 100)
# ----------------------
plt.show()

In [None]:
# filter now only the ones above 10%
distro_clipped = np.copy(fpshifts_events_normed)
distro_clipped[distro_clipped < 0.099] = 0

In [None]:
plt.bar(gamma_space, distro_clipped, width=dg, ec='k', linewidth=1, alpha=0.8)
plt.xlabel(r'$\gamma$')
plt.ylabel('FP Shift Events (fraction of init. cond.)')
plt.title('Clipped distribution of FP shifts')
# visualization checks
# ----------------------
#plt.bar(gamma_space, np.ones(len(gamma_space))*400, width=0.5*dg, ec='k', linewidth=0.5*dg, alpha=0.3, color='orange')
plt.gca().set_xscale('log')
plt.gca().set_yscale('log')
plt.xlim(0.02, 0.1)
#plt.ylim(0, 100)
# ----------------------
plt.show()

In [None]:
plt.bar(gamma_space, distro_clipped, width=dg, ec='k', linewidth=1, alpha=0.8)
plt.xlabel(r'$\gamma$')
plt.ylabel('FP Shift Events (fraction of init. cond.)')
plt.title('Clipped distribution of FP shifts')
# visualization checks
# ----------------------
#plt.bar(gamma_space, np.ones(len(gamma_space))*400, width=0.5*dg, ec='k', linewidth=0.5*dg, alpha=0.3, color='orange')
plt.gca().set_xscale('log')
plt.gca().set_yscale('log')
plt.xlim(0.1, 2.0)
#plt.ylim(0, 100)
# ----------------------
plt.show()

**Rough umap for the manyruns manyseeds style data**

In [None]:
# remove some points
#ss = 400
#X = X[:, ss:]
#ids_gamma = ids_gamma[ss:]
#ids_seed = ids_seed[ss:]

alt_kwargs = {
    0: {
        'random_state': 40,
        'n_components': REDUCER_COMPONENTS,
        'metric': 'euclidean',
        'init': 'spectral',
        'unique': False,
        'n_neighbors': 15,
        'min_dist': 0.1,
        'spread': 1.0},
    1: {'perplexity': 30,
        'early_exaggeration': 12},  # default is 12
    2: {}
}

**Single set of dimension reduction parameters, multiple plots**

In [None]:
dimred_choice = 1

print('Reducing X.shape:', X.shape)
algo, embedding, reducer = dimred(dimred_choice, X, mod_kwargs=alt_kwargs[dimred_choice])
#algo, embedding, reducer = dimred(dimred_choice, X)
print('Done, %s embedding:' % algo, embedding.shape)

outname = '%s_cGamma' % algo
plot_embedding_jupyter(embedding, ids_seed, ids_gamma, r'$\gamma$', algo, outdir, outname, clip_low=None, clip_high=None, pts_subset=None)

for hclip in [1.5, 1.0, 0.5]:
    outname = '%s_cGamma_clip%.2f' % (algo, hclip)
    plot_embedding_jupyter(embedding, ids_seed, ids_gamma, r'$\gamma$', algo, outdir, outname, clip_low=None, clip_high=hclip, pts_subset=None)
    
outname = '%s_cSeed_a' % algo
plot_embedding_jupyter(embedding, ids_seed, ids_gamma, 'seed', algo, outdir, outname, clip_low=None, clip_high=None, pts_subset=None)

**Sweep of dimension reduction parameters, multiple plots for each**

In [None]:
h_clip_loop = [0.5, 1.0, 1.5]  # usde [0.5] for anchor 1, W15, M400 dataset 

dimred_choice = 0

if dimred_choice == 0:
    sweep1D = {'plist': [5, 10, 15, 30, 50, 100, 200, 400, 900], 
               'pvar': 'n_neighbors',
               'pstr': 'nnbr'}
    algo_choice = 'UMAP'
else: 
    assert dimred_choice == 1
    sweep1D = {'plist': [2, 5, 10, 40, 50, 100, 200, 400, 1000], 
               'pvar': 'perplexity',
               'pstr': 'perplex'}
    algo_choice = 't-SNE'


for p in sweep1D['plist']:
    fprefix = '%s_%s%d' % (algo_choice, sweep1D['pstr'], p)
    
    alt_kwargs_loop = alt_kwargs[dimred_choice].copy()
    alt_kwargs_loop[sweep1D['pvar']] = p
    
    print('Reducing X.shape:', X.shape)
    print(alt_kwargs_loop)
    algo, embedding, reducer = dimred(dimred_choice, X, mod_kwargs=alt_kwargs_loop)
    print('Done, %s embedding:' % algo_choice, embedding.shape)
    
    outname = '%s_cGamma' % (fprefix)   
    plot_embedding_jupyter(embedding, ids_seed, ids_gamma, r'$\gamma$', algo_choice, outdir, outname, clip_low=None, clip_high=None, pts_subset=None)

    for hclip in h_clip_loop:
        outname = '%s_cGamma_clip%.2f' % (fprefix, hclip)
        plot_embedding_jupyter(embedding, ids_seed, ids_gamma, r'$\gamma$', algo_choice, outdir, outname, clip_low=None, clip_high=hclip, pts_subset=None)

    outname = '%s_cSeed' % (fprefix)
    plot_embedding_jupyter(embedding, ids_seed, ids_gamma, 'seed', algo_choice, outdir, outname, clip_low=None, clip_high=None, pts_subset=None)

Second: plot only connected or disconnected vertices (UNUSED)

In [None]:
# umap specific plotting of disconnected/not points
if dimred_choice == 0:
    #disconnected_points = umap.utils.disconnected_vertices(reducer)
    #umap.plot.points(reducer, subset_points=~disconnected_points)
    
    disconnected_points = umap.utils.disconnected_vertices(reducer)
    print(disconnected_points)
    print(type(disconnected_points))
    print(np.sum(disconnected_points.astype(int)))
    pts_subset = ~disconnected_points
    #plot_embedding_jupyter(embedding, ids_seed, ids_gamma, r'$\gamma$', algo, clip_low=None, clip_high=0.5, pts_subset=pts_subset)

#plt.show()

# EXPERIMENTAL - add additional features (arrowpath, "distance", and "velocity")

In [None]:
outdir = RUNS_FOLDER + os.sep + 'explore' + os.sep + 'bifurcation'
gammascan_manyruns = manyruns + os.sep + 'gscan_manyseeds'

#seed_range = np.arange(0,201)
#run_names = [gammascan_manyruns + os.sep + 'gscan_anchor1_gLow0_gHigh2.0_gStep5e-4_W_9_maze_R1_init_random_s%d_M100' % s for s in seed_range]

#seed_range = np.arange(0,501)
#run_names = [gammascan_manyruns + os.sep + 'gscan_anchor0_gLow0_gHigh2.0_gStep5e-4_W_9_maze_R1_init_random_s%d_M100' % s for s in seed_range]

#seed_range = np.arange(0,10)
#run_names = [gammascan_manyruns + os.sep + 'gscan_anchor1_gLow0_gHigh2.0_gStep5e-4_W_9_W15maze_R1_init_random_s%d_M400' % s for s in seed_range]

#seed_range = np.arange(0,26)
#run_names = [gammascan_manyruns + os.sep + 'gscan_anchor0_gLow0_gHigh4.0_gStep5e-4_W_9_W15maze_R1_init_random_s%d_M400' % s for s in seed_range]

In [None]:
X, gamma_space, fpshifts_events, ids_seed, ids_gamma, ids_distance, ids_velocity = exptl_aggregate_manyruns_gammascan(seed_range, run_names)

In [None]:
alt_kwargs = {
    0: {
        'random_state': 40,
        'n_components': REDUCER_COMPONENTS,
        'metric': 'euclidean',
        'init': 'spectral',
        'unique': False,
        'n_neighbors': 900,
        'min_dist': 2,
        'spread': 2},
    1: {'perplexity': 100,
        'early_exaggeration': 12},  # default is 12
    2: {}
}

In [None]:
dimred_choice = 1

print('Reducing X.shape:', X.shape)
algo, embedding, reducer = dimred(dimred_choice, X, mod_kwargs=alt_kwargs[dimred_choice])
#algo, embedding, reducer = dimred(dimred_choice, X)
print('Done, %s embedding:' % algo, embedding.shape)

In [None]:
show=True
#key_choices = [r'$\gamma$', 'FP_distance', 'FP_velocity']
key_choices = [r'$\gamma$']

# list of seed paths to plot (overlay)
#plot_seed_traj = []
#plot_seed_traj = seed_range
plot_seed_traj = np.arange(20)  #[0, 1, 2, 75, 76, 77]  

# list of traj endpts to overlay
#endpts = plot_seed_traj
endpts = seed_range

color_plots = {
    r'$\gamma$': {'fname': 'Gamma', 
                  'hclips': [1.5, 1.0, 0.5]},
    'FP_distance': {'fname': 'FPdist', 
                    'hclips': [450, 200.0, 80, 40]},
    'FP_velocity': {'fname': 'FPvel', 
                    'hclips': [1e6, 5e5, 1e5, 5e4]},
}

plot_pos_args = [embedding, ids_seed, ids_gamma, ids_distance, ids_velocity, '', algo, outdir, '']
plot_base_kw = dict(
    show=show,
    seed_arrows=plot_seed_traj,
    seed_endpts=endpts,
    pts_subset=None, 
    clip_low=None
)

mass_plots = False
if mass_plots:
    for cdict_key in key_choices:
        plot_pos_args[5] = cdict_key              

        fname = color_plots[cdict_key]['fname']
        color_hclips = color_plots[cdict_key]['hclips']

        outname = '%s_c%s' % (algo, fname)
        plot_pos_args[8] = outname
        exptl_plot_embedding_jupyter(*plot_pos_args, **plot_base_kw)

        for hclip in color_hclips:
            outname = '%s_c%s_clip%.2f' % (algo, fname, hclip)
            plot_pos_args[8] = outname
            exptl_plot_embedding_jupyter(*plot_pos_args, clip_high=hclip, **plot_base_kw)

    outname = '%s_cSeed' % algo
    plot_pos_args[8] = outname
    exptl_plot_embedding_jupyter(*plot_pos_args, **plot_base_kw)

In [None]:
key = r'$\gamma$'
cdict = color_plots[key]
fname = cdict['fname']
plot_pos_args[5] = key              

plot_pos_args[8] = '%s_c%s' % (algo, fname)
exptl_plot_embedding_jupyter(*plot_pos_args, **plot_base_kw)

In [None]:
plot_pos_args[8] = '%s_cSeed' % algo
exptl_plot_embedding_jupyter(*plot_pos_args, **plot_base_kw)

In [None]:
#plot_embedding_jupyter(embedding, ids_seed, ids_gamma, 'seed', algo, outdir, outname, clip_low=None, clip_high=None, pts_subset=None)

In [None]:
print(ids_velocity)
plt.hist(ids_velocity, bins=500)
#plt.ylim(0,2500)
plt.xlim(-0.1, 4e4)
plt.show()

print(ids_distance)
plt.hist(ids_distance, bins=500)
#plt.ylim(0,500)
plt.xlim(0,90)
plt.show()