# Defining Input 

In [None]:
# Parameters
input_file = 'input.h5ad'
sample_name = 'sample'
output_file = 'output.h5ad'

# Importing

In [2]:
import scanpy as sc
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

# functions

In [None]:
from scipy.stats import median_abs_deviation

def is_outlier(adata, metric: str, nmads: int):
    M = adata.obs[metric]
    outlier = (M < np.median(M) - nmads * median_abs_deviation(M)) | \
              (np.median(M) + nmads * median_abs_deviation(M) < M)
    return outlier

# Reading sample 

In [None]:
adata = sc.read_h5ad(input_file)

In [None]:
numbers = {'cells': adata.n_obs, 'genes': adata.n_vars}

# Defining QC 

In [None]:
adata.obs['sample'] = sample_name

In [None]:
adata.var['mt'] = adata.var_names.str.startswith('MT-')
sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=True, inplace=True)

## Plotting QC Before Filtering

In [None]:
sc.pl.violin(adata, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt'],
             jitter=0.4, multi_panel=True)

# Filtering through Median Absolute Deviation (MAD)

In [None]:
outlier_counts = is_outlier(adata, 'log1p_total_counts', 3)
outlier_genes = is_outlier(adata, 'log1p_n_genes_by_counts', 3)
outlier_mt = adata.obs['pct_counts_mt'] > 10
#MIN_CELLS = 1  # Filtering genes on minimum cells: 1%

In [None]:
#Filter Genes
#sc.pp.filter_genes(adata, min_cells=MIN_CELLS)
#Filter Cells 
adata = adata[~(outlier_counts | outlier_genes | outlier_mt), :]


# QC Plots after Filtering

In [None]:
sc.pl.violin(adata, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt'],
             jitter=0.4, multi_panel=True)

In [None]:
print (f" Before Filtering: {numbers['cells']} cells, {numbers['genes']} genes")
print (f" After Filtering: {adata.n_obs} cells, {adata.n_vars} genes")
filterd_cells = numbers['cells'] - adata.n_obs
filterd_genes = numbers['genes'] - adata.n_vars
print (f" Filtered out {filterd_cells} cells.")
print (f" Filtered out {filterd_genes} genes.")

# Writing Filtered Data

In [4]:
adata

NameError: name 'adata' is not defined

In [None]:
adata.write(output_file)