# RNA Velocity Data

In [1]:
# default_exp datasets

## Pancreas Dataset

### Functions to Return RNA Velocity Dataset

In [2]:
#export

import scvelo as scv
import numpy as np
import torch
import scipy

def pancreas_rnavelo_load_data():
    # load data
    adata = scv.datasets.pancreas()
    
    #preprocess data and calculate rna velocity
    scv.pp.filter_and_normalize(adata)
    scv.pp.moments(adata)
    scv.tl.velocity(adata, mode='stochastic')
    
    return adata
        
def add_labels_pancreas(clusters):
    cluster_set = set(clusters)
    d = {}
    count = 0
    for c in cluster_set:
        d[c] = count
        count +=1
    labels = []
    for i in range(len(clusters)):
        labels.append(d[clusters[i]])
        
    return labels

def pancreas_rnavelo():
    # load preprocessed data
    adata = pancreas_rnavelo_load_data()
    
    # set datapoints (X) and flows
    X = torch.tensor(adata.X.todense())
    flows = torch.tensor(adata.layers["velocity"])
    labels = add_labels_pancreas(adata.obs["clusters"])
        
    return X, flows, labels

def pancreas_rnavelo_50pcs():
    adata = pancreas_rnavelo_load_data()
    
    # calculate velocity pca (50 dimensions) and display pca plot (2 dimensions)
    scv.tl.velocity_graph(adata)
    scv.pl.velocity_embedding_stream(adata, basis='pca')
    
    X = torch.tensor(adata.obsm["X_pca"])
    flows = torch.tensor(adata.obsm["velocity_pca"])
    labels = add_labels_pancreas(adata.obs["clusters"])
    
    return X, flows, labels

### Function Checks

In [3]:
X, flows, labels = pancreas_rnavelo()

Normalized count data: X, spliced, unspliced.
Logarithmized X.
computing neighbors


OMP: Info #277: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


    finished (0:00:14) --> added 
    'distances' and 'connectivities', weighted adjacency matrices (adata.obsp)
computing moments based on connectivities
    finished (0:00:08) --> added 
    'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)
computing velocities
    finished (0:00:23) --> added 
    'velocity', velocity vectors for each individual cell (adata.layers)


In [None]:
X2, flows2, labels2 = pancreas_rnavelo_50pcs()

Normalized count data: X, spliced, unspliced.
Logarithmized X.
computing neighbors
    finished (0:00:01) --> added 
    'distances' and 'connectivities', weighted adjacency matrices (adata.obsp)
computing moments based on connectivities
    finished (0:00:08) --> added 
    'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)
computing velocities
    finished (0:00:23) --> added 
    'velocity', velocity vectors for each individual cell (adata.layers)
computing velocity graph (using 1/8 cores)


  0%|          | 0/3696 [00:00<?, ?cells/s]

In [None]:
print(X2.shape)
print(flows2.shape)
print(len(labels2))

print(X2.shape)
print(flows2.shape)
print(len(labels2))

In [None]:
!nbdev_build_lib