# Run times of main experiments

In [3]:
import numpy as np
import pandas as pd
import os
import umap
import umapns
from umapns.my_utils import get_ring, compute_low_dim_psims
from scipy.sparse import coo_matrix
%env CUDA_VISIBLE_DEVICES=3

env: CUDA_VISIBLE_DEVICES=3


## C. elegans experiments

In [4]:
# load C.elegans data
data_path = "../data/packer_c-elegans"
pca100 = pd.read_csv(os.path.join(data_path, "c-elegans_qc_final.txt"),
                     sep='\t',
                     header=None)
pca100.shape

(86024, 100)

### Run times without loss logging

In [5]:
def run_umap_no_loss_c_elegans():
    umapper = umap.UMAP(metric="cosine",
                    n_neighbors=30,
                    n_epochs=750,
                    random_state=0,
                    verbose=False)
    umapper.fit_transform(pca100)

In [6]:
# typically 8 mins
%timeit -oq  _ = run_umap_no_loss_c_elegans()

<TimeitResult : 8min 40s ± 13.9 s per loop (mean ± std. dev. of 7 runs, 1 loop each)>

In [7]:
time_no_loss_logging = str(_)
with open(os.path.join(data_path, f"c_elegans_time_no_loss_logging"), "w") as file:
    file.write("C elegans no loss logging profiling\n")
    file.write(time_no_loss_logging + "\n")

### With loss logging after full epoch

In [8]:
def run_umap_c_elegans():
    umapper_after = umapns.UMAP(metric="cosine",
                            n_neighbors=30,
                            n_epochs=750,
                            log_losses="after",
                            random_state=0,
                            verbose=False)
    umapper_after.fit_transform(pca100)


In [None]:
#typically 30 mins
%timeit -oq _ = run_umap_c_elegans()

In [None]:
time_loss_logging_after = str(_)
with open(f"c_elegans_time_loss_logging_after", "w") as file:
    file.write("C elegans loss logging 'after' profiling\n")
    file.write(time_loss_logging_after + "\n")

## Toy ring experiments

In [None]:
# get data
data_path = "../data/toy_ring"
seed = 3
try:
    data = np.load(os.path.join(data_path, f"toy_ring_1000_4_0_5_original_seed_{seed}.npy"))
except FileNotFoundError:
    data = get_ring(1000, 4, 0.5, noise="uniform")  # n=7000, r = 4, var = 0.5 yiels crisp circle for n_epochs=10000
    np.save(os.path.join(data_path,
                         f"toy_ring_1000_4_0_5_original_seed_{seed}.npy"),
            data)


### Initialization at input data and optimization for 10000 epochs

In [None]:
def run_umap_init_10000(data):
    umapperns_init_10000 = umapns.UMAP(random_state=0,
                                       init=data,
                                       n_epochs=10000,
                                       verbose=False)
    umapperns_init_10000.fit_transform(data)

In [None]:
%timeit -oq  _ = run_umap_init_10000(data)

In [None]:
time_ring_init_10000 = str(_)
with open(os.path.join(data_path, f"toy_ring_init_10000_profiling"), "w") as file:
    file.write("toy ring init 10000 profiling\n")
    file.write(time_ring_init_10000 + "\n")

### Initialization at input data,  dense input similarities and optimization for  10000 epochs

In [None]:
min_dist = 0.1
spread = 1.0
a, b= umapns.umap_.find_ab_params(spread=spread, min_dist=min_dist)
low_sim = compute_low_dim_psims(data, a, b)
low_sim_sparse = coo_matrix(low_sim)


In [None]:
def run_umap_init_graph_10000(data, low_sim_sparse):
    umapperns_init_graph_10000 = umapns.UMAP(random_state=0,
                                             init=data,
                                             graph=low_sim_sparse,
                                             n_epochs=10000,
                                             verbose=False)
    umapperns_init_graph_10000.fit_transform(data)

In [None]:
%timeit -oq  _ = run_umap_init_graph_10000(data, low_sim_sparse)

In [None]:
time_ring_init_graph_10000 = str(_)
with open(os.path.join(data_path, f"toy_ring_init_graph_10000_profiling"), "w") as file:
    file.write("toy ring init graph 10000 profiling\n")
    file.write(time_ring_init_graph_10000 + "\n")


