# Velocity Analysis with scVelo

In [1]:
import anndata
import scvelo as scv
import pandas as pd
import numpy as np
import matplotlib as plt
import scanpy as sc
import os
import re

I'll be analyzing the loom files created in child_qmds/velocity_analysis.qmd using scVelo.

First I'm going to define a function that will take all loom files for a given dataset and merge them into a single anndata object. This function will add metadata to the anndata objects as well.

In [2]:
def loom_to_an(loom_path, metadata_path):
    loom_files=os.listdir(loom_path)
    #initialize list to hold anndata objects
    ad_obs = []
    for one_loom in loom_files:
        #get sample id from file name
        sample_id = re.sub(".loom", "", one_loom)
        #add object to list
        tmp = anndata.read_loom(loom_path + "/" + one_loom)
        #read in metadata file
        md_path = metadata_path + "/" + sample_id + "_metadata.csv"
        metadata = pd.read_csv(md_path, index_col = "bc")
        tmp.obs = metadata.filter(regex = '^(?!umap_|PC_|fdl_|harmony_).*$').reindex(tmp.obs.index)
        #Add pca embeddings
        tmp.obsm["X_pca"] = metadata.filter(regex = "PC_").reindex(tmp.obs.index).to_numpy()
        #Add umap embeddings
        tmp.obsm["X_umap"] = metadata.filter(regex = "umap_").reindex(tmp.obs.index).to_numpy()
        #add fdl embeddings
        tmp.obsm["X_fdl"] = metadata.filter(regex = "fdl_").reindex(tmp.obs.index).to_numpy()
        #add harmony embeddings
        tmp.obsm["X_harmony"] = metadata.filter(regex = "harmony_").reindex(tmp.obs.index).to_numpy ()
        #make var names unique (this step is necessary to merge anndata list)
        tmp.var_names_make_unique()
        #add object to list
        ad_obs.append(tmp)
    #merge anndata list
    merged_anndata = anndata.concat(ad_obs)
    #return merged anndata
    return merged_anndata


def make_stream_plot(ad_ob, mode = "stochastic"):
    sc.pp.neighbors(ad_ob, n_pcs = 30, n_neighbors = 30)
    scv.pp.moments(ad_ob)
    scv.tl.velocity(ob, mode = mode)
    scv.tl.velocity_graph(ob)
    scv.pl.velocity_embedding_stream(ob, basis = "umap", color = "seurat_clusters")



## Cancer Cells

This section will examine the cancer cells from our datasets.

### mm_mets_cancer_cells

In [None]:
ob = loom_to_an(loom_path = "loom_output/mm_mets_cancer_cells/loom_files",
                metadata_path = "loom_output/mm_mets_cancer_cells/metadata")

make_stream_plot(ob)

### mm_prim_cancer_cells

In [None]:
ob =loom_to_an(loom_path="loom_output/mm_prim_cancer_cells/loom_files",
               metadata_path="loom_output/mm_prim_cancer_cells/metadata")

make_stream_plot(ob)

### patient_mets_cancer_cells

In [3]:
ob = loom_to_an(loom_path= "loom_output/patient_mets_cancer_cells/loom_files",
                metadata_path = "loom_output/patient_mets_cancer_cells/metadata")

make_stream_plot(ob)

In [None]:
md_files = os.listdir("loom_output/patient_mets_cancer_cells/metadata")
md_df = pd.DataFrame()
for md in md_files:
    tmp_md = pd.read_csv("loom_output/patient_mets_cancer_cells/metadata/" + md)
    md_df = pd.concat([md_df, tmp_md])

md_df

In [None]:
ad_bcs = ob.obs.index
md_bcs = md_df.reindex("bc").index
ad_bcs.difference(md_bcs)
md_bcs.difference(ad_bcs)