# Lab 7: Marker Gene Identification

**Module 7** - Finding Cluster Markers

## Objectives
- Run differential expression for markers
- Visualize markers (dotplot, violin, heatmap)
- Interpret marker statistics


In [None]:
import scanpy as sc
import numpy as np
import matplotlib.pyplot as plt

sc.settings.verbosity = 3
sc.settings.set_figure_params(dpi=100, facecolor='white')

# Load preprocessed data
adata = sc.datasets.pbmc3k_processed()
print(f"Clusters: {adata.obs['louvain'].nunique()}")


In [None]:
# Find marker genes for each cluster
sc.tl.rank_genes_groups(adata, groupby='louvain', method='wilcoxon')

# View top markers per cluster
sc.pl.rank_genes_groups(adata, n_genes=10, sharey=False)


In [None]:
# Get marker results as DataFrame
markers = sc.get.rank_genes_groups_df(adata, group=None)
print("Top markers across all clusters:")
print(markers.head(20))


In [None]:
# Dot plot of top markers
# Select top 3 markers per cluster
top_markers = []
for cluster in adata.obs['louvain'].cat.categories:
    cluster_markers = markers[markers['group'] == cluster].head(3)['names'].tolist()
    top_markers.extend(cluster_markers)

sc.pl.dotplot(adata, var_names=top_markers, groupby='louvain', standard_scale='var')


In [None]:
# Check known PBMC markers
pbmc_markers = {
    'T cells': ['CD3D', 'CD3E'],
    'CD4 T': ['CD4', 'IL7R'],
    'CD8 T': ['CD8A', 'CD8B'],
    'B cells': ['MS4A1', 'CD79A'],
    'NK': ['NKG7', 'GNLY'],
    'Monocytes': ['CD14', 'LYZ'],
    'DC': ['FCER1A', 'CST3'],
    'Platelets': ['PPBP', 'PF4']
}

sc.pl.dotplot(adata, var_names=pbmc_markers, groupby='louvain', standard_scale='var')
