In [1]:
# Install required packages
%%capture
!pip install anndata
!pip install hdf5plugin

In [2]:
# Import packages
import os
import h5py
import hdf5plugin
import shutil

import pandas as pd
import anndata as ad
import numpy as np

from google.colab import drive

In [3]:
# Mount Google drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [5]:
## PREDENOISING PREPROCESSING | FOR JUST DENOISE SCROLL DOWN
# Declare useful constants
DATA_DIR = '/content/drive/MyDrive/Thesis/dance/dance/data/'
INPUT_SUBTASK_NAME = 'mouse_liver_cite_fltr_rna'
OUTPUT_SUBTASK_NAME = 'mouse_liver_cite_fltr_denoised_pretrain_rna'

In [6]:
assert os.path.exists(os.path.join(DATA_DIR, INPUT_SUBTASK_NAME))

In [7]:
# Read h5ad files containing raw GEX counts
input_train_mod1 = ad.read_h5ad(os.path.join(DATA_DIR, 
                                             INPUT_SUBTASK_NAME, 
                                             f'{INPUT_SUBTASK_NAME}.censor_dataset.output_train_mod1.h5ad'))

input_test_mod1 = ad.read_h5ad(os.path.join(DATA_DIR, 
                                             INPUT_SUBTASK_NAME, 
                                             f'{INPUT_SUBTASK_NAME}.censor_dataset.output_test_mod1.h5ad'))

In [8]:
# Convert the raw cite inputs file to .txt which is required for SAVERX
# NOTE: This will can take quite some time and disk space
rna_arr = np.concatenate((input_train_mod1.X.toarray().T, 
                          input_test_mod1.X.toarray().T),
                          axis=1)
gene_index = list(input_train_mod1.var_names.values + input_test_mod1.var_names.values)
cell_header = list(input_train_mod1.obs['cell'].values) + list(input_test_mod1.obs['cell'].values)
rna_arr = pd.DataFrame(rna_arr, 
                       index=gene_index,
                       columns=cell_header)
rna_arr.to_csv(os.path.join(DATA_DIR, 'rna.csv'))

In [9]:
## DENOISING
# Install required packages
%%capture
!pip install scipy==1.4.1
!pip install h5py==2.10.0
!pip uninstall rpy2 -y
!pip install rpy2==3.5.1
!pip install sctransfer

In [10]:
# RESTART RUNTIME AND RUN FROM HERE

In [2]:
%reload_ext rpy2.ipython

In [12]:
import warnings
from rpy2.rinterface import RRuntimeWarning
warnings.filterwarnings("ignore", category=RRuntimeWarning)

In [13]:
from google.colab import drive

# Mount Google drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [14]:
%%R

library(devtools)
install_github("jingshuw/SAVERX")





These packages have more recent versions available.
It is recommended to update all of them.
Which would you like to update?

1: All                             
2: CRAN packages only              
3: None                            
4: jsonlite (1.8.3 -> 1.8.4) [CRAN]

Enter one or more numbers, or an empty line to skip updates: 1
iterators  (NA    -> 1.0.14   ) [CRAN]
foreach    (NA    -> 1.5.2    ) [CRAN]
RcppEigen  (NA    -> 0.3.3.9.3) [CRAN]
Rcpp       (NA    -> 1.0.9    ) [CRAN]
shape      (NA    -> 1.4.6    ) [CRAN]
doParallel (NA    -> 1.0.17   ) [CRAN]
glmnet     (NA    -> 4.1-6    ) [CRAN]
png        (NA    -> 0.1-8    ) [CRAN]
jsonlite   (1.8.3 -> 1.8.4    ) [CRAN]
here       (NA    -> 1.0.1    ) [CRAN]
RcppTOML   (NA    -> 0.2.0    ) [CRAN]
SAVER      (NA    -> 1.1.2    ) [CRAN]
reticulate (NA    -> 1.27     ) [CRAN]



(as ‘lib’ is unspecified)



































































	‘/tmp/Rtmp13ZE26/downloaded_packages’



── R CMD build ─────────────────────────────────────────────────────────────────
* checking for file ‘/tmp/Rtmp13ZE26/remotes166c6f019314/jingshuw-SAVERX-4b8b67e/DESCRIPTION’ ... OK
* preparing ‘SAVERX’:
* checking DESCRIPTION meta-information ... OK
* checking for LF line-endings in source and make files and shell scripts
* checking for empty or unneeded directories
* building ‘SAVERX_1.0.2.tar.gz’



(as ‘lib’ is unspecified)



In [None]:
%%R 

library(SAVERX)
setwd('/content/drive/MyDrive/Thesis/dance/dance/data/')
file <- saverx("rna.csv", data.species = "Mouse", ncores = 2, pretrained.weights.file = "mouse_AdultBrain.hdf5")
# file <- computeShrinkage("1673257077.71305", ncores=1)
denoised.data <- readRDS(file)
saveRDS(denoised.data, file="rna_denoised.rds")

[1] "Input file is: rna.csv"
[1] "Use a pretrained model: No"
[1] "Processed file saved as: 1673362947.63796/tmpdata.rds"
[1] "Data preprocessed ..."

    consider that it could be called from a Python process. This
    results in a quasi-obligatory segfault when rpy2 is evaluating
    R code using it. On the hand, rpy2 is accounting for the
    fact that it might already be running embedded in a Python
    process. This is why:
    - Python -> rpy2 -> R -> reticulate: crashes
    - R -> reticulate -> Python -> rpy2: works

    The issue with reticulate is tracked here:
    https://github.com/rstudio/reticulate/issues/208
    

Using TensorFlow backend.


[1] "Python module sctransfer imported ..."
[1] "Cross-validation round: 1"
[1] "Cross-validation round: 2"
[1] "Cross-validation round: 3"
[1] "Final prediction round using all cells. See below the summary of the autoencoder model:"
Model: "model_8"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
count (InputLayer)              (None, 6000)         0                                            
__________________________________________________________________________________________________
enc0 (Dense)                    (None, 64)           384064      count[0][0]                      
__________________________________________________________________________________________________
batch_normalization_10 (BatchNo (None, 64)           192         enc0[0][0]                       
________________________________________________________















[1] "Empirical Bayes shrinkage total computing time is: 6414.793 seconds"
[1] "Final denoised results saved as: 1673362947.63796/denoised.rds"
[1] "Intermediate files removed. Finished!!"
