In [2]:
import scanpy as sc
import anndata
import cupy as cp

import time
import rapids_singlecell as rsc
import decoupler as dc

import warnings
warnings.filterwarnings("ignore")

In [3]:
import rmm
from rmm.allocators.cupy import rmm_cupy_allocator
rmm.reinitialize(
    managed_memory=False, # Allows oversubscription
    pool_allocator=True, # default is False
)
cp.cuda.set_allocator(rmm_cupy_allocator)

In [4]:
%%time
adata = sc.read("../2024_gpu_severin.dicks/notebooks/h5/200000.h5ad")
adata.var_names_make_unique()

CPU times: user 293 ms, sys: 1.54 s, total: 1.83 s
Wall time: 3.67 s


In [5]:
%%time
rsc.get.anndata_to_GPU(adata)

CPU times: user 240 ms, sys: 1.25 s, total: 1.49 s
Wall time: 2.83 s


In [6]:
%%time
rsc.pp.flag_gene_family(adata,gene_family_name="MT", gene_family_prefix="MT")

CPU times: user 4.63 ms, sys: 995 μs, total: 5.62 ms
Wall time: 16.7 ms


In [7]:
%%time
rsc.pp.flag_gene_family(adata,gene_family_name="RIBO", gene_family_prefix="RPS")

CPU times: user 2.49 ms, sys: 1.45 ms, total: 3.94 ms
Wall time: 3.68 ms


In [8]:
%%time
rsc.pp.calculate_qc_metrics(adata,qc_vars=["MT","RIBO"])

CPU times: user 103 ms, sys: 48.3 ms, total: 152 ms
Wall time: 10.7 s


In [9]:
%%time
adata = adata[adata.obs["n_genes_by_counts"] < 5000]
adata = adata[adata.obs["n_genes_by_counts"] > 500]
adata = adata[adata.obs["pct_counts_MT"] < 20]

CPU times: user 30.6 ms, sys: 10.2 ms, total: 40.8 ms
Wall time: 73 ms


We also filter out genes that are expressed in less than 3 cells.

In [10]:
%%time
rsc.pp.filter_genes(adata,min_count=3)

filtered out 7131 genes based on n_cells_by_counts
CPU times: user 87 ms, sys: 59.9 ms, total: 147 ms
Wall time: 2.21 s


The size of our count matrix is now reduced.

In [11]:
adata.shape

(197045, 20867)

### Normalize

We normalize the count matrix so that the total counts in each cell sum to 1e4.

In [12]:
%%time
rsc.pp.normalize_total(adata,target_sum=1e4)

CPU times: user 1.61 ms, sys: 326 μs, total: 1.94 ms
Wall time: 51.2 ms


Next, we data transform the count matrix.

In [13]:
%%time
rsc.pp.log1p(adata)

CPU times: user 754 μs, sys: 0 ns, total: 754 μs
Wall time: 594 μs


In [14]:
#net = dc.get_dorothea(organism='mouse', levels=['A','B','C'])
net = dc.get_progeny(organism='mouse')

  File "/home/icb/severin.dicks/miniconda3/envs/rapids-24.06/lib/python3.11/site-packages/urllib3/connectionpool.py", line 715, in urlopen
    httplib_response = self._make_request(
                       ^^^^^^^^^^^^^^^^^^^
  File "/home/icb/severin.dicks/miniconda3/envs/rapids-24.06/lib/python3.11/site-packages/urllib3/connectionpool.py", line 404, in _make_request
    self._validate_conn(conn)
  File "/home/icb/severin.dicks/miniconda3/envs/rapids-24.06/lib/python3.11/site-packages/urllib3/connectionpool.py", line 1060, in _validate_conn
    conn.connect()
  File "/home/icb/severin.dicks/miniconda3/envs/rapids-24.06/lib/python3.11/site-packages/urllib3/connection.py", line 419, in connect
    self.sock = ssl_wrap_socket(
                ^^^^^^^^^^^^^^^^
  File "/home/icb/severin.dicks/miniconda3/envs/rapids-24.06/lib/python3.11/site-packages/urllib3/util/ssl_.py", line 449, in ssl_wrap_socket
    ssl_sock = _ssl_wrap_socket_impl(
               ^^^^^^^^^^^^^^^^^^^^^^
  File "/home/i

In [15]:
adata.layers["counts"] = adata.X.copy()
rsc.get.anndata_to_CPU(adata)

In [16]:
%%time
rsc.dcg.run_aucell(mat=adata, net=net,batch_size=30000,layer ="counts",
                   source='source', target='target', verbose=True,pre_load = True)

1 features of mat are empty, they will be removed.
Running aucell on mat with 197045 samples and 20866 targets for 14 sources.


  0%|          | 0/7 [00:00<?, ?it/s]

CPU times: user 4.8 s, sys: 87.4 ms, total: 4.89 s
Wall time: 13.6 s


In [None]:
%%time
rsc.dcg.run_mlm(mat=adata, net=net, source='source', target='target', weight='weight',
                verbose=True, layer="counts",pre_load = True)

1 features of mat are empty, they will be removed.
Running mlm on mat with 197045 samples and 20866 targets for 14 sources.


  0%|          | 0/20 [00:00<?, ?it/s]

In [None]:
%%time
rsc.dcg.run_ulm(mat=adata, net=net, source='source', target='target', weight='weight',
                verbose=True, layer="counts",pre_load = True)