In [1]:
# Required packages and data paths
# Scanpy 1.8
import scanpy as sc
import anndata as ad
import episcanpy.api as epi

STUD_PATH = '/mnt/workspace_stud/'
OUTPUT_PATH = f'{STUD_PATH}stud2/output/'

In [2]:
# define input path and output directories
TISSUE = "liver_sample_9"
INPUT_PATH = f'{OUTPUT_PATH}wp1/{TISSUE}_pp17.h5ad'
#TISSUE = INPUT_PATH.split('/')[-1].replace('_pp17.h5ad', '')
output_wp4 = f"{OUTPUT_PATH}wp4/{TISSUE}_obs.csv"
output_wp3 = f"{OUTPUT_PATH}wp3/clusters_{TISSUE}.tsv"

In [3]:
# read input file and check matrix size
adata = ad.read(INPUT_PATH)
adata

AnnData object with n_obs × n_vars = 15588 × 1107578
    obs: 'nb_features', 'log_nb_features'
    var: 'n_cells', 'commonness', 'prop_shared_cells', 'variability_score'
    layers: 'binary', 'normalised'

In [4]:
# calculate log
epi.pp.log1p(adata)

In [None]:
# prepare neighbours for clustering
# epi.pp.lazy(adata, min_dist=1.0, spread=2.0)
sc.pp.pca(adata, n_comps=50, svd_solver='arpack', use_highly_variable=False)
sc.pp.neighbors(adata, n_neighbors=25, n_pcs=50, method='umap', metric='euclidean')

In [None]:
# calculate umap and cluster find clusters with louvain
# Umap: spread = 2.0, min_dist = 0.1
# Louvain: resolution = 0.5 - 0.8
sc.tl.umap(adata, min_dist=0.1, spread=2.0)
epi.tl.louvain(adata, resolution=0.5, use_weights=True)
epi.pl.umap(adata, color=['louvain'])

In [None]:
# write files for wp4/3
with open(output_wp4, "w") as obs_file, open(output_wp3, "w") as tsv_file:
    for barcode in range(len(adata.obs.index)):
        line = f"{adata.obs.index[barcode]}\t{adata.obs['louvain'][barcode]}\n"
        obs_file.write(line)
        tsv_file.write(line)