In [None]:
"""
This script is used to create the anndata objects per spaceranger slide, to subsequently deconvolute

authors: Roy Oelen
"""

In [None]:
# import the libraries
import scanpy as sc
import pickle


In [None]:
def read_slices(slices_loc, slices, counts_file='filtered_feature_bc_matrix.h5', do_norm=True, do_dimruc=True):
    """read ST slices, do QC, and put them in a list
        
        Parameters
        ----------
        slices_loc : str
            the location of the folders containing the slices
        slices : list
            a list containing the slices (directory names)
        counts_file : str
            the name of the count expression matrix
        do_norm : bool
            run normalization
        do_dimruc : bool
            run dimensional reduction
        
        Returns
        -------
        result
           a dictionary of AnnData objects
        """
    # create a dictionary to store the slices
    slices_dict = {}
    # read each slice
    for slice in slices:
        # paste together the path
        full_visium_path = ''.join([slices_loc, '/', slice, '/outs/'])
        # read the file
        adata = sc.read_visium(path = full_visium_path,
                             count_file = counts_file)
        # make gene names unique
        adata.var_names_make_unique()
        # do some standard preprocessing
        adata.var["mt"] = adata.var_names.str.startswith("MT-")
        sc.pp.calculate_qc_metrics(adata, qc_vars=["mt"], inplace=True)

        sc.pp.filter_cells(adata, min_counts=200)
        sc.pp.filter_genes(adata, min_cells=3)
        
        # normalize if requested
        if do_norm:
            # as well as the normalization
            sc.pp.normalize_total(adata, inplace=True)
            sc.pp.log1p(adata)
            # and calculation of highly variable genes
            sc.pp.highly_variable_genes(adata, flavor="seurat", n_top_genes=2000)
            
            # and dimensional reduction if requested
            if do_dimruc:
                # calculate principal components as well
                sc.pp.pca(adata)
                # do nearest neighbours
                sc.pp.neighbors(adata)
                # do 2d UMAP dim reduction
                sc.tl.umap(adata)
                # and Leiden clustering
                sc.tl.leiden(adata, key_added="clusters")
        
        # add the result to the dictionary
        slices_dict[slice] = adata
    return slices_dict
    

In [None]:
# read each slices
slice_objects = read_slices('/groups/umcg-franke-scrna/tmp02/projects/epifat/processed/alignment/spaceranger_out/', ['V10A20-016_A1', 'V10A20-016_B1', 'V10A20-016_C1', 'V10A20-016_D1'])

In [None]:
# check that the objects are actually there
slice_objects

In [None]:
# save the result
with open(''.join(['/groups/umcg-franke-scrna/tmp02/projects/epifat/ongoing/seurat_preprocess_samples/objects/', 'spaceranger.20230823.pickle']), 'wb') as f:
    # Pickle the 'data' dictionary using the highest protocol available.
    pickle.dump(slice_objects, f, pickle.HIGHEST_PROTOCOL)

In [None]:
# load the result to see if it is was exported correctly
with open(''.join(['/groups/umcg-franke-scrna/tmp02/projects/epifat/ongoing/seurat_preprocess_samples/objects/', 'spaceranger.20230823.pickle']), 'rb') as f:
    slice_objects_reloaded = pickle.load(f)
slice_objects_reloaded

In [3]:
# now without any normalization
slice_objects = read_slices('/groups/umcg-franke-scrna/tmp02/projects/epifat/processed/alignment/spaceranger_out/', ['V10A20-016_A1', 'V10A20-016_B1', 'V10A20-016_C1', 'V10A20-016_D1'], do_norm=False, do_dimruc=False)

  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")


In [4]:
# save the result
with open(''.join(['/groups/umcg-franke-scrna/tmp02/projects/epifat/ongoing/seurat_preprocess_samples/objects/', 'spaceranger.20230823.raw.pickle']), 'wb') as f:
    # Pickle the 'data' dictionary using the highest protocol available.
    pickle.dump(slice_objects, f, pickle.HIGHEST_PROTOCOL)