# Defining Input 

In [None]:
# Parameters
input_file = 'input.h5ad'
sample_name = 'sample'
output_file = 'output.h5ad'

# Importing

In [2]:
import scanpy as sc
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

# functions

In [None]:
from scipy.stats import median_abs_deviation

def is_outlier(adata, metric: str, nmads: int):
    M = adata.obs[metric]
    outlier = (M < np.median(M) - nmads * median_abs_deviation(M)) | \
              (np.median(M) + nmads * median_abs_deviation(M) < M)
    return outlier

# Reading sample 

In [None]:
adata = sc.read_h5ad(input_file)

# Defining QC 

In [None]:
adata.obs['sample'] = sample_name

In [None]:
adata.var['mt'] = adata.var_names.str.startswith('MT-')
sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)

## Plotting QC Before Filtering

In [None]:
sc.pl.violin(adata, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt'],
             jitter=0.4, multi_panel=True)

# Filtering through Median Absolute Deviation (MAD)

In [None]:
outlier_counts = is_outlier(adata, 'total_counts', 5)
outlier_genes = is_outlier(adata, 'n_genes_by_counts', 5)
outlier_mt = is_outlier(adata, 'pct_counts_mt', 5)
MIN_CELLS = np.rint(adata.n_obs / 100)  # Filtering genes on minimum cells: 1%

In [None]:
adata = adata[~(outlier_counts | outlier_genes | outlier_mt), :]
sc.pp.filter_genes(adata, min_cells=MIN_CELLS)

# QC Plots after Filtering

In [3]:
sc.pl.violin(adata, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt'],
             jitter=0.4, multi_panel=True)

NameError: name 'adata' is not defined

# Writing Filtered Data

In [None]:
adata.write(output_file)