In [5]:
import sys
import scanpy as sc
import anndata
import pandas as pd
import numpy as np
import os
import gc

In [71]:
def read_and_qc(sample_name, path=sp_data_folder, force_filter = True):
    r""" This function reads the data for one 10X spatial experiment into the anndata object.
    It also calculates QC metrics. Modify this function if required by your workflow.

    :param sample_name: Name of the sample
    :param path: path to data
    """

    adata = sc.read_visium(path + str(sample_name) + '/outs',
                           count_file='filtered_feature_bc_matrix.h5', 
                           load_images=True)
    
    adata.obs['sample'] = sample_name
    adata.var['SYMBOL'] = adata.var_names

    # Calculate QC metrics
    sc.pp.calculate_qc_metrics(adata, inplace=True)
    adata.var['mt'] = [gene.startswith('MT-') for gene in adata.var['SYMBOL']]
    adata.var['rps'] = [gene.startswith('RPS') for gene in adata.var['SYMBOL']]
    adata.var['mrp'] = [gene.startswith('MRP') for gene in adata.var['SYMBOL']]
    adata.var['rpl'] = [gene.startswith('RPL') for gene in adata.var['SYMBOL']]
    adata.obs['mt_frac'] = adata[:,adata.var['mt'].tolist()].X.sum(1).A.squeeze()/adata.obs['total_counts']

    # add sample name to obs names
    adata.obs["sample"] = [str(i) for i in adata.obs['sample']]
    adata.obs_names = adata.obs["sample"] + '_' + adata.obs_names
    adata.obs.index.name = 'spot_id'
    adata.var["duplicated"] = adata.var['SYMBOL'].duplicated(keep = "first")
    adata = adata[:, ~adata.var['duplicated'].values]
    
    if force_filter:
        # First filter: mt and rb genes
        # mitochondria-encoded (MT) genes should be removed for spatial mapping
        adata.obsm['mt'] = adata[:,   adata.var['mt'].values | 
                              adata.var['rps'].values |
                              adata.var['mrp'].values |
                              adata.var['rpl'].values].X.toarray() 
        
        adata = adata[:, ~ (adata.var['mt'].values | 
                              adata.var['rps'].values |
                              adata.var['mrp'].values |
                              adata.var['rpl'].values)]
        
        # Second filter
        # Genes expressed in less than 10 spots
        adata = adata[:, adata.var['n_cells_by_counts'].values > 10]
        
        # Third filter
        # spots with no information (less than 300 genes and 500 UMIs)
        sc.pp.calculate_qc_metrics(adata, inplace=True)
        adata = adata[(adata.obs['n_genes_by_counts'].values > 300) & 
              (adata.obs['total_counts'].values > 500), :] 
        

    return adata


In [75]:
#adata stuff
sp_data_folder = "/Users/ricardoramirez/Dropbox/PhD/Research/mi_atlas/visium_data/"

adata = read_and_qc(s, path = sp_data_folder, force_filter = False)

Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


In [76]:
adata.shape

(2049, 36591)

In [63]:
adata.var["duplicated"] = adata.var['SYMBOL'].duplicated(keep = "first")
adata = adata[:, ~adata.var['duplicated'].values]

# First filter: mt and rb genes
# mitochondria-encoded (MT) genes should be removed for spatial mapping
adata.obsm['mt'] = adata[:,   adata.var['mt'].values | 
                              adata.var['rps'].values |
                              adata.var['mrp'].values |
                              adata.var['rpl'].values].X.toarray() 

adata = adata[:, ~ (adata.var['mt'].values | 
                              adata.var['rps'].values |
                              adata.var['mrp'].values |
                              adata.var['rpl'].values)]

In [64]:
# Second filter
# Genes expressed in less than 10 spots
adata = adata[:, adata.var['n_cells_by_counts'].values > 10]

array([3416., 3131., 3214., ..., 1617., 2551., 2722.], dtype=float32)

In [66]:
# Third filter
# spots wit 
sc.pp.calculate_qc_metrics(adata, inplace=True)

adata = adata[(adata.obs['n_genes_by_counts'].values > 300) & 
              (adata.obs['total_counts'].values > 500), :] 

Trying to set attribute `.obs` of view, copying.


In [69]:
adata.shape

(2043, 13141)

In [70]:
s

'Visium_1_CK279'