# Variant Analysis, Heatmaps

In [None]:
import os
import seaborn as sns
import anndata as ad
import matplotlib.pyplot as plt
import numpy as np

RLSRWP_2025_dir = os.path.dirname(os.path.abspath(""))  # if this notebook resides in RLSRWP_2025/notebooks/0_data_download.ipynb, then this retrieves RLSRWP_2025

In [None]:
n_obs = 5  # or "all"
n_var = 10  # or "all"

vk_count_out_dir = os.path.join(RLSRWP_2025_dir, "data", "vk_count_out")
adata_path = os.path.join(vk_count_out_dir, "adata_cleaned.h5ad")
out_path = os.path.join(vk_count_out_dir, "analysis", "adata_matrix.png")

In [None]:
adata = ad.read_h5ad(adata_path)

sorted_variant_order = adata.var.sort_values(by="vcrs_count", ascending=False).index
adata = adata[:, sorted_variant_order]  # Apply this order to both adata.var and adata.X

# filter anndata here if large to avoid RAM issues
if n_obs == "all" and n_var == "all":
    pass
elif n_obs == "all":
    adata = adata[:, :n_var]
elif n_var == "all":
    adata = adata[:n_obs, :]
else:
    adata = adata[:n_obs, :n_var]

### Make heatmap of cells/samples (x), mutations (y), and counts (z)

In [None]:
# # Convert sparse matrix to dense if needed
# matrix = np.random.randint(0, 101, size=(5, 10))  # for testing
matrix = adata.X.toarray() if hasattr(adata.X, "toarray") else adata.X

# Plot heatmap
plt.figure(figsize=(10, 6))
sns.heatmap(matrix, cmap="viridis", xticklabels=False, yticklabels=False)
plt.xlabel("Genes")
plt.ylabel("Cells")
plt.title("AnnData Heatmap")
plt.show()