In [2]:
# conda activate velocyto

import os
import numpy as np
import scanpy as sc
import anndata as ad

# import scvelo as scv
# import velocyto as vcy

os.chdir("/mnt/lareaulab/reliscu/projects/Chronocell/analyses/janssens_2025_preprint")

In [None]:
# Here I create matrices of S/U counts from each condition:
## 1. eLNP
## 2. pIC-LNP
## 2. CpG-LNP

In [None]:
# Get metadata for annotated cells:

adata = ad.read_h5ad("data/janssens_2025_CellRanger_model/janssens_2025_CellRanger_counts_scVI.h5ad")

In [39]:
adata.obs['Condition'] = adata.obs['Condition'].str.replace("LNPs", "LNP")
adata.obs['Condition'] = adata.obs['Condition'].str.replace(" ", "_")
adata.obs['Experimental_condition'] = adata.obs['Condition'].str.split(r"_[0-9]").str[0]
adata.obs['Experimental_condition'] = adata.obs['Experimental_condition'].str.replace(" ", "_")

In [None]:
data_dir = "/mnt/lareaulab/reliscu/projects/Chronocell/data/janssens_2025_preprint/processed/CellRanger"

In [None]:
# Prep steady state data once

cdir = "Steady_State_0h"
cond = "Steady_state"
adata_ss = sc.read_loom(f"{data_dir}/{cdir}/velocyto/{cdir}.loom")
annotated_cells = adata.obs.index[adata.obs['Condition'] == cond]

# Format barcodes to match naming convention 
suffix = np.unique(annotated_cells.str.split("-").str[1])
adata_ss.obs_names = adata_ss.obs_names.str.split(":").str[1].str.replace("x", "") 
adata_ss.obs_names = adata_ss.obs_names + "-" + suffix

# Subset to cells (and genes) in annotated data
adata_ss.var_names_make_unique()
row_mask = adata_ss.obs_names.isin(annotated_cells)
col_mask = adata_ss.var_names.isin(adata.raw.var_names)
adata_ss = adata_ss[row_mask, col_mask].copy()

# Add metadata
adata_ss.obs = adata_ss.obs.join(adata.obs, how="left")

adata_ss.write(f"data/janssens_2025_CellRanger_Velocyto_counts_{cond}.h5ad")

In [None]:
# Combine Velocyto data from each condition into a single anndata object

for cond in np.unique(adata.obs['Experimental_condition']):
    if cond == "Steady_state":
        continue
    
    print(f"Starting {cond}")
    cond_dirs = [d for d in os.listdir(data_dir) if cond in d]
    
    adata_list = []
    for cdir in cond_dirs:
        print(f"Starting {cdir}")
        
        # Note: loading these loom files takes a couple of minutes
        adata_cond = sc.read_loom(f"{data_dir}/{cdir}/velocyto/{cdir}.loom")
        annotated_cells = adata.obs.index[adata.obs['Condition'] == cdir]
        
        # Format barcodes to match naming convention 
        suffix = np.unique(annotated_cells.str.split("-").str[1])
        adata_cond.obs_names = adata_cond.obs_names.str.split(":").str[1].str.replace("x", "") 
        adata_cond.obs_names = adata_cond.obs_names + "-" + suffix
        
        # Subset to cells (and genes) in annotated data
        adata_cond.var_names_make_unique()
        row_mask = adata_cond.obs_names.isin(annotated_cells)
        col_mask = adata_cond.var_names.isin(adata.raw.var_names)
        adata_cond = adata_cond[row_mask, col_mask].copy()
        
         # Add metadata
        adata_cond.obs = adata_cond.obs.join(adata.obs, how="left")
        
        adata_list.append(adata_cond)
    
    # Merge data from the same experimental condition (plus steady state)
    adata_cond_merged = ad.concat(adata_list, join="inner") # inner = intersect on genes
    adata_cond_merged.write(f"data/janssens_2025_CellRanger_Velocyto_counts_{cond}.h5ad")

In [None]:
# Make sure they look okay:

adata_cond = ad.read_h5ad("data/janssens_2025_CellRanger_Velocyto_counts_CpG-LNP.h5ad")
adata_cond.obs.head()

In [5]:
adata_cond.obs.value_counts("Experimental_condition")

Experimental_condition
CpG-LNP         25598
Steady_state    14455
Name: count, dtype: int64

In [6]:
adata_cond.obs.value_counts("Condition")

Condition
Steady_state    14455
CpG-LNP_8h      12836
CpG-LNP_2h      12762
Name: count, dtype: int64