In [1]:
import os
import scanpy as sc
import pandas as pd
import numpy as np
import anndata as ad
import matplotlib.pyplot as plt
from anndata import AnnData

In [None]:
### Read Adata Object.

os.chdir("/folder/")
adata =  ad.read_h5ad("adata_sample_1.h5ad")

In [None]:
### Percent mt.

adata.var['mt'] = adata.var_names.str.upper().str.startswith('MT-')
adata.obs['percent_mt'] = np.sum(
    adata[:, adata.var['mt']].X, axis=1
).A1 / np.sum(adata.X, axis = 1).A1 * 100

In [None]:
### Mean and Standard Deviation of n_features.

mean_features = adata.obs['n_genes_by_counts'].mean()
sd_features = adata.obs['n_genes_by_counts'].std()

threshold = mean_features - 1.2 * sd_features
print("Threshold:", threshold)

In [None]:
### Summary Statistics.

print("Min nCount_RNA:", adata.obs['total_counts'].min())
print("Min nFeature_RNA:", adata.obs['n_genes_by_counts'].min())
print("Max percent.mt:", adata.obs['percent_mt'].max())

In [None]:
### Proportion of Cells Passing Thresholds. 

prop_above_threshold = (adata.obs['n_genes_by_counts'] > threshold).mean()
prop_below_5pct_mt = (adata.obs['percent_mt'] < 5).mean()

print("Proportion nFeature_RNA > threshold:", prop_above_threshold)
print("Proportion percent.mt < 5:", prop_below_5pct_mt)

In [None]:
### Filtering.

adata_filtered = adata[
    (adata.obs['n_genes_by_counts'] > threshold) &
    (adata.obs['percent_mt'] < 5),
    :
].copy()

print(f"Filtered dataset: {adata_filtered.n_obs} cells remain out of {adata.n_obs}")

In [None]:
### Basic QC plots.

cols = ['nCount_RNA', 'nFeature_RNA', 'percent.mt']

for col in cols:
    adata_filtered.obs[col] = pd.to_numeric(adata_filtered.obs[col], errors = 'coerce')

    
sc.pl.violin(
    adata_filtered,
    keys = ['percent.mt'],
    groupby = 'Cluster_Name',
    stripplot = False 
)

In [None]:
### Downstream Analysis

adata_filtered.X = adata_filtered.layers['counts_RNA'].copy()
adata_filtered.X.max()

sc.pp.normalize_total(adata_filtered, target_sum = 1e4)
adata_filtered.layers["data"] = adata_filtered.X.copy()
sc.pp.log1p(adata_filtered)
adata_filtered.layers["log1p_normalized"] = adata_filtered.X.copy()
sc.pp.highly_variable_genes(adata_filtered, n_top_genes = 5000, flavor = 'seurat')
sc.pp.scale(adata_filtered, max_value = 10)
adata_filtered.layers["scale.data"] = adata_filtered.X.copy()

sc.pp.pca(adata_filtered, n_comps = 50)
sc.tl.pca(adata_filtered, n_comps = 50)
sc.pp.neighbors(adata_filtered, use_rep = 'X_pca', n_neighbors = 20) 
sc.tl.umap(adata_filtered)
sc.tl.leiden(adata_filtered, resolution = 0.1, key_added = "leiden_0.1")

In [None]:
### Write Adata File.

os.chdir("/folder/")
adata_filtered.write("adata_filtered.h5ad")