In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys, os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.metrics import roc_auc_score
from new_experiments import run_simulation
from concurrent.futures import ProcessPoolExecutor
from IPython.display import display, clear_output
from consolidated_runs import run_simulations

sys.path.append(os.getcwd())
sys.path.append(os.path.join(os.getcwd(), 'baselines'))
sys.path.append(os.path.join(os.getcwd(), 'metrics'))
sys.path.append(os.path.join(os.getcwd(), 'prev_methods', 'clustering'))
sys.path.append(os.path.join(os.getcwd(), 'prev_methods', 'reconstruct_grn'))

2024-09-09 01:31:56.844559: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-09-09 01:31:57.658324: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-09-09 01:31:59.003925: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Instructions for updating:
non-resource variables are not supported in the long term


In [3]:
def run_experiment():
    outputs = []
    ret_df = None
    for dataset in [3]:
        if not os.path.exists(f"./zero_imputation_experiments/DS{dataset}/"):
            os.makedirs(f"./zero_imputation_experiments/DS{dataset}/")
        # Run for first iteration to prevent race condition
        res = run_simulation(
            dataset=dataset,
            sergio=True,
            saucie=True, 
            scScope=True, 
            deepImpute=True, 
            magic=True, 
            genie=True,
            arboreto=False,
            pearson=False,
            roc=True,
            precision_recall_k=False,
            run_with_regs=False,
            iteration=0
        )
        clear_output()
        if ret_df is None:
            ret_df = pd.DataFrame(columns=res.keys())
        new_df = pd.DataFrame([res], columns=res.keys())
        ret_df = pd.concat([ret_df, new_df], ignore_index=True)
        #write to temp file
        ret_df.to_csv("zero_imputation_experiments/imputation_results.csv", index=False)
        with ProcessPoolExecutor(max_workers=3) as executor:
            futures = []
            for i in range(1, 30):
                futures.append(executor.submit(run_simulation, 
                        dataset=dataset,
                        sergio=(i == 0),
                        saucie=True, 
                        scScope=True, 
                        deepImpute=True, 
                        magic=True, 
                        genie=True,
                        arboreto=False,
                        pearson=False,
                        roc=True,
                        precision_recall_k=False,
                        run_with_regs=False,
                        iteration=i
                    ))
                clear_output()
            for future in tqdm(futures):
                res = future.result()
                clear_output(wait=True)
                if ret_df is None:
                    ret_df = pd.DataFrame(columns=res.keys())
                new_df = pd.DataFrame([res], columns=res.keys())
                ret_df = pd.concat([ret_df, new_df], ignore_index=True)
                #write to temp file
                ret_df.to_csv("zero_imputation_experiments/imputation_results.csv", index=False)
    return

In [4]:
run_experiment()

---> Running SERGIO on DS3
---> Running SAUCIE on DS3
loading data
reset graph
Initialize saucie


  h1 = tf.layers.dense(self.x, self.layers[0], activation=lrelu, name='encoder_0')
  h2 = tf.layers.dense(h1, self.layers[1], activation=tf.nn.sigmoid, name='encoder_1')
  h3 = tf.layers.dense(h2, self.layers[2], activation=lrelu, name='encoder_2')
  self.embedded = tf.layers.dense(h3, self.layers[3], activation=tf.identity, name='embedding')
  h5 = tf.layers.dense(self.embedded, self.layers[2], activation=lrelu, name='decoder_0')
  h6 = tf.layers.dense(h5, self.layers[1], activation=lrelu, name='decoder_1')
  h7 = tf.layers.dense(h6, self.layers[0], activation=lrelu, name='decoder_2')
  self.reconstructed = tf.layers.dense(h7, self.input_dim, activation=tf.identity, name='recon')
2024-09-09 02:53:16.064730: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1960] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and 

Load saucie
Train saucie
---> Running scScope on DS3 
Building Computational Graph on GPU-0
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


2024-09-09 02:53:32.282931: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1960] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


Finisheded epoch: 100
Current reconstruction error is: 0.32588303183604234
Finisheded epoch: 200
Current reconstruction error is: 0.30992707779149564
Finisheded epoch: 300
Current reconstruction error is: 0.3002605436454365
Finisheded epoch: 400
Current reconstruction error is: 0.2959674696915626
Finisheded epoch: 500
Current reconstruction error is: 0.2939330786506229
Finisheded epoch: 600
Current reconstruction error is: 0.29293061878116616
Finisheded epoch: 700
Current reconstruction error is: 0.29230642822609393
Finisheded epoch: 800
Current reconstruction error is: 0.2919622645163149
Finisheded epoch: 900
Current reconstruction error is: 0.29165815418648294
Finisheded epoch: 1000
Current reconstruction error is: 0.2914686136506491
Finish training 2700 samples after 1000 epochs. The total training time is 104.29334592819214 seconds.
---> Running DeepImpute on DS3 
Using all the cores (48)
Input dataset is 2700 cells (rows) and 1200 genes (columns)
First 3 rows and columns:
   0  1 

  super().__init__(name, **kwargs)


Fitting with 2700 cells
Train on 2565 samples, validate on 135 samples
Epoch 1/500


2024-09-09 02:55:18.109303: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1960] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
2024-09-09 02:55:18.166777: W tensorflow/c/c_api.cc:304] Operation '{name:'training/Adam/dense_2/bias/m/Assign' id:1173 op device:{requested: '', assigned: ''} def:{{{node training/Adam/dense_2/bias/m/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](training/Adam/dense_2/bias/m, training/Adam/dense_2/bias/m/Initializer/zeros)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.


Epoch 2/500

  updates = self.state_updates
2024-09-09 02:55:18.510719: W tensorflow/c/c_api.cc:304] Operation '{name:'loss_1/AddN' id:900 op device:{requested: '', assigned: ''} def:{{{node loss_1/AddN}} = AddN[N=2, T=DT_FLOAT, _has_manual_control_dependencies=true](loss_1/mul, loss_1/mul_1)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.


Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Stopped fitting after 66 epochs
Saved model to disk in /state/partition1/job-50789262/tmpwvvplwwe
Instructions for updating:
Call initializer instance with the dtype argument 

  updates=self.state_updates,
2024-09-09 02:55:27.532290: W tensorflow/c/c_api.cc:304] Operation '{name:'dense_2/Softplus' id:782 op device:{requested: '', assigned: ''} def:{{{node dense_2/Softplus}} = Softplus[T=DT_FLOAT, _has_manual_control_dependencies=true](dense_2/BiasAdd)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2024-09-09 02:55:27.793586: W tensorflow/c/c_api.cc:304] Operation '{name:'dense_1_1/bias/Assign' id:1381 op device:{requested: '', assigned: ''} def:{{{node dense_1_1/bias/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](dense_1_1/bias, dense_1_1/bias/Initializer/zeros)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after runn

Filling zeros
---> Running MAGIC on DS3 
./zero_imputation_experiments/DS3/DS6_clean.npy ./zero_imputation_experiments/DS3/DS6_noisy.npy
Calculating MAGIC...
  Running MAGIC on 2700 cells and 1200 genes.
  Calculating graph and diffusion operator...
    Calculating PCA...
    Calculated PCA in 0.33 seconds.
    Calculating KNN search...
    Calculated KNN search in 0.26 seconds.
    Calculating affinities...
    Calculated affinities in 0.25 seconds.
  Calculated graph and diffusion operator in 0.85 seconds.
  Calculating imputation...
  Calculated imputation in 0.32 seconds.
Calculated MAGIC in 1.18 seconds.
---> Running GENIE3 on Clean Data for DS3 
Tree method: RF
K: sqrt
Number of trees: 100


running jobs on 12 threads


: 

### Other Imputation Methods Experimentation

In [None]:
import os
for dataset in [1,2,3]:
    if not os.path.exists(f"./zero_imputation_experiments/DS{dataset}/"):
        os.makedirs(f"./zero_imputation_experiments/DS{dataset}/")
    if not os.path.exists(f"./zero_imputation_experiments/DS{dataset}/DS6_noisy.npy"):
        res = run_simulation(
            dataset=dataset,
            sergio=True,
            saucie=False, 
            scScope=False, 
            deepImpute=False, 
            magic=False, 
            genie=False,
            arboreto=False,
            pearson=False,
            roc=False,
            precision_recall_k=False,
            run_with_regs=False,
            iteration=0
        )
        clear_output()

scVI

In [None]:
from experiment_utils import run_scvi
import numpy as np
import pandas as pd

def fetch_target_regs(dataset):
    if dataset == 1:   
        target_file = './SERGIO/data_sets/De-noised_100G_9T_300cPerT_4_DS1/Interaction_cID_4.txt'
        regs_path = './SERGIO/data_sets/De-noised_100G_9T_300cPerT_4_DS1/Regs_cID_4.txt'
    elif dataset == 2:
        target_file = './SERGIO/data_sets/De-noised_400G_9T_300cPerT_5_DS2/Interaction_cID_5.txt'
        regs_path = './SERGIO/data_sets/De-noised_400G_9T_300cPerT_5_DS2/Regs_cID_5.txt'
    else:
        target_file = './SERGIO/data_sets/De-noised_1200G_9T_300cPerT_6_DS3/Interaction_cID_6.txt'
        regs_path = './SERGIO/data_sets/De-noised_1200G_9T_300cPerT_6_DS3/Regs_cID_6.txt'
    return target_file, regs_path    

def scvi_impute():
    ret_df = None
    for dataset in [1, 2, 3]:
        save_path = f"./zero_imputation_experiments/DS{dataset}/"
        y = np.load(save_path + "/DS6_noisy.npy")
        target_file, regs_path = fetch_target_regs(dataset)
        with ProcessPoolExecutor(max_workers=4) as executor:
            futures = []
            for i in range(8):
                futures.append(executor.submit(run_scvi, 
                    data=y, 
                    save_path=save_path, 
                    it=i, 
                    file_extension=f"_iter{i}",
                    target_file=target_file
                ))
                clear_output()
            for future in tqdm(futures):
                vim, it = future.result()
                res = {
                    "dataset": dataset,
                    "method": "scvi",
                    "roc": vim,
                    "iteration": it }
                if ret_df is None:
                    ret_df = pd.DataFrame(columns=res.keys())
                new_df = pd.DataFrame([res], columns=res.keys())
                ret_df = pd.concat([ret_df, new_df], ignore_index=True)
                ret_df.to_csv("zero_imputation_experiments/scvi_imputation_results.csv", index=False)  

In [None]:
scvi_impute()

knn-smoothing

In [None]:
from experiment_utils import run_knn

def run_smoothing():
    ret_df = None
    for dataset in [1,2,3]:
        save_path = f"./zero_imputation_experiments/DS{dataset}/"
        y = np.load(save_path + "/DS6_noisy.npy")
        target_file, regs_path = fetch_target_regs(dataset)
        with ProcessPoolExecutor(max_workers=4) as executor:
            futures = []
            for i in range(8):
                futures.append(executor.submit(run_knn, 
                    data=y,
                    k=32,
                    save_path=save_path, 
                    it=i, 
                    file_extension=f"_iter{i}",
                    target_file=target_file
                ))
                clear_output()
            for future in tqdm(futures):
                vim, it = future.result()
                res = {
                    "dataset": dataset,
                    "method": "knn",
                    "roc": vim,
                    "iteration": it }
                if ret_df is None:
                    ret_df = pd.DataFrame(columns=res.keys())
                new_df = pd.DataFrame([res], columns=res.keys())
                ret_df = pd.concat([ret_df, new_df], ignore_index=True)
                ret_df.to_csv("zero_imputation_experiments/knn_imputation_results.csv", index=False)  

In [None]:
run_smoothing()

NameError: name 'run_smoothing' is not defined