# Run times of main experiments

In [None]:
%env CUDA_VISIBLE_DEVICES=3

import numpy as np
import pandas as pd
import os
import umap
from vis_utils.utils import get_ring, compute_low_dim_psims
from scipy.sparse import coo_matrix

## C. elegans experiments

In [None]:
# load C.elegans data
data_path = "../data/packer_c-elegans"
pca100 = pd.read_csv(os.path.join(data_path, "c-elegans_qc_final.txt"),
                     sep='\t',
                     header=None)
pca100.shape

### Run times without loss logging

In [None]:
def run_umap_no_loss_c_elegans():
    umapper = umap.UMAP(metric="cosine",
                    n_neighbors=30,
                    n_epochs=750,
                    random_state=0,
                    verbose=False)
    umapper.fit_transform(pca100)

In [None]:
# typically 8 mins
%timeit -oq  _ = run_umap_no_loss_c_elegans()

In [None]:
time_no_loss_logging = str(_)
with open(os.path.join(data_path, f"c_elegans_time_no_loss_logging"), "w") as file:
    file.write("C elegans no loss logging profiling\n")
    file.write(time_no_loss_logging + "\n")

### With loss logging after full epoch

In [None]:
def run_umap_c_elegans():
    umapper_after = umap.UMAP(metric="cosine",
                            n_neighbors=30,
                            n_epochs=750,
                            log_losses="after",
                            random_state=0,
                            verbose=False)
    umapper_after.fit_transform(pca100)


In [None]:
#typically 30 mins
%timeit -oq _ = run_umap_c_elegans()

In [None]:
time_loss_logging_after = str(_)
with open(f"c_elegans_time_loss_logging_after", "w") as file:
    file.write("C elegans loss logging 'after' profiling\n")
    file.write(time_loss_logging_after + "\n")

## Toy ring experiments

In [None]:
# get data
data_path = "../data/toy_ring"
seed = 3
try:
    data = np.load(os.path.join(data_path, f"toy_ring_1000_4_0_5_original_seed_{seed}.npy"))
except FileNotFoundError:
    data = get_ring(1000, 4, 0.5, noise="uniform")  # n=7000, r = 4, var = 0.5 yiels crisp circle for n_epochs=10000
    np.save(os.path.join(data_path,
                         f"toy_ring_1000_4_0_5_original_seed_{seed}.npy"),
            data)


### Initialization at input data and optimization for 10000 epochs

In [None]:
def run_umap_init_10000(data):
    umapperns_init_10000 = umap.UMAP(random_state=0,
                                       init=data,
                                       n_epochs=10000,
                                       verbose=False)
    umapperns_init_10000.fit_transform(data)

In [None]:
%timeit -oq  _ = run_umap_init_10000(data)

In [None]:
time_ring_init_10000 = str(_)
with open(os.path.join(data_path, f"toy_ring_init_10000_profiling"), "w") as file:
    file.write("toy ring init 10000 profiling\n")
    file.write(time_ring_init_10000 + "\n")

### Initialization at input data,  dense input similarities and optimization for  10000 epochs

In [None]:
min_dist = 0.1
spread = 1.0
a, b= umap.umap_.find_ab_params(spread=spread, min_dist=min_dist)
low_sim = compute_low_dim_psims(data, a, b)
low_sim_sparse = coo_matrix(low_sim)


In [None]:
def run_umap_init_graph_10000(data, low_sim_sparse):
    umapperns_init_graph_10000 = umap.UMAP(random_state=0,
                                             init=data,
                                             graph=low_sim_sparse,
                                             n_epochs=10000,
                                             verbose=False)
    umapperns_init_graph_10000.fit_transform(data)

In [None]:
%timeit -oq  _ = run_umap_init_graph_10000(data, low_sim_sparse)

In [None]:
time_ring_init_graph_10000 = str(_)
with open(os.path.join(data_path, f"toy_ring_init_graph_10000_profiling"), "w") as file:
    file.write("toy ring init graph 10000 profiling\n")
    file.write(time_ring_init_graph_10000 + "\n")




# CIFAR 10 data

In [None]:
#load the data
data_path_cifar = "../data/cifar10"

cifar10_resnet50 = np.load(os.path.join(data_path_cifar, "cifar10_resnet50_features.npy"))

In [None]:
def run_umap_cifar10(data):
    umapperns_cifar = umap.UMAP(random_state=0,
                                  log_losses="after")
    umapperns_cifar.fit_transform(data)

In [None]:
%timeit -oq  _ = run_umap_cifar10(cifar10_resnet50)

In [None]:
time_cifar10 = str(_)
with open(os.path.join(data_path_cifar, f"cifar10_profiling"), "w") as file:
    file.write("cifar 10 profiling\n")
    file.write(time_cifar10 + "\n")


# PBMC data

In [None]:
#load the data
data_path_pbmc = "../data/zheng_pbmc"
# load the data
pca50 = pd.read_csv(os.path.join(data_path_pbmc,
                              "pbmc_qc_final.txt"),
                     sep='\t',
                     header=None)

In [None]:
def run_umap_pbmc(data):
    umapperns_pbmc = umap.UMAP(metric="cosine",
                                 n_neighbors=30,
                                 n_epochs=750,
                                 log_losses="after",
                                 random_state=0)
    umapperns_pbmc.fit_transform(data)

In [None]:
%timeit -oq  _ = run_umap_pbmc(pca50)

In [None]:
time_pbmc = str(_)
with open(os.path.join(data_path_pbmc, f"pbmc_profiling"), "w") as file:
    file.write("pbmc profiling\n")
    file.write(time_pbmc + "\n")



# Lung cancer data

In [None]:
#load the data
data_path_lung_cancer = "../data/zilionis_cancer"

# load the data
pca306 = pd.read_csv(os.path.join(data_path_lung_cancer,
                              "cancer_qc_final.txt"),
                     sep='\t',
                     header=None)

In [None]:
def run_umap_lung_cancer(data):
    umapperns_lung_cancer = umap.UMAP(metric="cosine",
                                        n_neighbors=30,
                                        n_epochs=750,
                                        log_losses="after",
                                        random_state=0)
    umapperns_lung_cancer.fit_transform(data)

In [None]:
%timeit -oq  _ = run_umap_lung_cancer(pca306)

In [None]:
time_lung_cancer = str(_)
with open(os.path.join(data_path_lung_cancer, f"lung_cancer_profiling"), "w") as file:
    file.write("lung cancer profiling\n")
    file.write(time_lung_cancer + "\n")



# Multi rings

In [None]:
# get data
data_path_ring = "../data/toy_ring"
n_rings = [2, 5, 10, 20] # time for one ring is measured above

# get data
datasets_multi_ring = []
for k in n_rings:
    try:
        data_rings = np.load(os.path.join(data_path_ring, f"toy_ring_multi_{k}_1000_4_0_5_original_seed_3.npy"))
    except FileNotFoundError:
        np.random.seed(3)
        data_rings = []
        for i in range(k):
            data = get_ring(1000, 4, 0.5, noise="uniform") + np.array([1, 0]) * i * 4 * 3
            data_rings.append(data)
        data_rings = np.concatenate(data_rings)
        np.save(os.path.join(data_path_ring,
                             f"toy_ring_multi_{k}_1000_4_0_5_original_seed_3.npy"),
                data_rings)
    datasets_multi_ring.append(data_rings)



In [None]:
def run_umap_multi_ring(data):
    for dataset in data:
        umapperns_multi_ring = umap.UMAP(random_state=0,
                                           n_epochs=10000,
                                           init=dataset)
        umapperns_multi_ring.fit(dataset)

In [None]:
%timeit -oq  _ = run_umap_multi_ring(datasets_multi_ring)

In [None]:
time_multi_ring = str(_)
with open(os.path.join(data_path_ring, f"toy_multi_ring_profiling"), "w") as file:
    file.write("toy multi ring profiling\n")
    file.write(time_multi_ring + "\n")


# Single ring vary n

In [None]:
data_path_ring = "../data/toy_ring"

# get the data
sizes = [50, 100, 200, 300, 400, 500, 600, 700, 800, 900, 2000, 5000, 7000, 8000, 10000] # 1000 is measured above
datasets = []
for n in sizes:
    try:
        data = np.load(os.path.join(data_path_ring, f"toy_ring_{n}_4_0_5_original_seed_3.npy"))
    except FileNotFoundError:
        np.random.seed(3)
        data = get_ring(n, 4, 0.5, noise="uniform")
        np.save(os.path.join(data_path_ring,
                             f"toy_ring_{n}_4_0_5_original_seed_3.npy"),
                data)
    datasets.append(data)



In [None]:
def run_umap_vary_n(data):
    for dataset in data:
        umapper_vary_n = umap.UMAP(random_state=0,
                                      n_epochs=10000,
                                      init=dataset)
        umapper_vary_n.fit(dataset)

In [None]:
%timeit -oq  _ = run_umap_vary_n(datasets)

In [None]:
time_vary_n = str(_)
with open(os.path.join(data_path_ring, f"toy_ring_vary_n_profiling"), "w") as file:
    file.write("toy ring vary n profiling\n")
    file.write(time_vary_n + "\n")

## Dense input similarities

In [None]:
# get dense input similarities
min_dist = 0.1
spread = 1.0
a, b= umap.umap_.find_ab_params(spread=spread, min_dist=min_dist)

In [None]:
# get the data
sizes_graph = [100, 200, 500, 2000, 5000] # size 1000 is already measured
datasets_graph = []
for n in sizes_graph:
    try:
        data = np.load(os.path.join(data_path_ring, f"toy_ring_{n}_4_0_5_original_seed_3.npy"))
    except FileNotFoundError:
        np.random.seed(3)
        data = get_ring(n, 4, 0.5, noise="uniform")
        np.save(os.path.join(data_path_ring,
                             f"toy_ring_{n}_4_0_5_original_seed_3.npy"),
                data)
    datasets_graph.append(data)

In [None]:
low_sims_sparse = [coo_matrix(compute_low_dim_psims(data, a, b)) for data in datasets_graph]

In [None]:
def run_umap_vary_n_graph(data):
    for i, dataset in enumerate(data):
        umapperns_vary_n_graph = umap.UMAP(init=dataset,
                                             graph=low_sims_sparse[i],
                                             n_epochs=10000,
                                             random_state=0)
        umapperns_vary_n_graph.fit(dataset)

In [None]:
%timeit -oq -n 2 -r 1  _ = run_umap_vary_n_graph(datasets_graph)

In [None]:
time_vary_n_graph = str(_)
with open(os.path.join(data_path_ring, f"toy_ring_vary_n_graph_profiling"), "w") as file:
    file.write("toy ring vary n graph profiling\n")
    file.write(time_vary_n_graph + "\n")




