In [6]:
import scanpy as sc
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from tifffile import imread
import os

In [25]:
# --- File paths ---
h5ad_path = "/blue/kejun.huang/tan.m/IBDCosMx_scRNAseq/CosMx/Corrected_CompleteCosMx.h5ad"
tif_path = "/blue/kejun.huang/tan.m/IBDCosMx_scRNAseq/CosMx/UC a_8/GSM7473685_UCa/RawMorphologyImages/20220314_113221_S3_C902_P99_N99_F008_Z004.TIF"
output_dir = "/blue/kejun.huang/tan.m/IBDCosMx_scRNAseq/CosMx/UC a_8/Mapping"
fov_prefix = "UC a_8"  # Only consider cells from this FOV

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# ===========================
# Step 1. Load the TIF image
# ===========================
tif_image = imread(tif_path)

# Handle multi-layer images: use the first layer or average across all
if tif_image.ndim == 3 and tif_image.shape[0] > 1:
    print(f"Multi-layer image detected with shape {tif_image.shape}. Using the first layer for display.")
    image_to_display = tif_image[0]  # Use first layer for clarity
else:
    image_to_display = tif_image

# Image dimensions
image_height, image_width = image_to_display.shape
print(f"Image dimensions: width={image_width}, height={image_height}")

# ===========================
# Step 2. Load cell data
# ===========================
adata = sc.read_h5ad(h5ad_path)

# Fix index conflict if index name matches a column name
if adata.obs.index.name == 'unique_cell_id':
    adata.obs.index.name = 'index_id'

# Convert to DataFrame
obs_df = adata.obs.reset_index()

# Filter only rows for this FOV
obs_fov = obs_df[obs_df['unique_cell_id'].str.startswith(fov_prefix)].copy()

# ===========================
# Step 3. Prepare coordinates
# ===========================
obs_fov['CenterX_local_px'] = pd.to_numeric(obs_fov['CenterX_local_px'], errors='coerce')
obs_fov['CenterY_local_px'] = pd.to_numeric(obs_fov['CenterY_local_px'], errors='coerce')
obs_fov.dropna(subset=['CenterX_local_px', 'CenterY_local_px'], inplace=True)

# Flip Y-axis to align correctly with image origin
obs_fov['FlippedY'] = image_height - obs_fov['CenterY_local_px']

# ===========================
# Step 4. Assign distinct colors
# ===========================
# Very distinct colors
distinct_colors = ["yellow", "green", "blue", "red"]

unique_niches = sorted(obs_fov['NMF_factor'].unique())
num_niches = len(unique_niches)

# Assign colors in sequence, repeat if niches > colors
color_dict = {niche: distinct_colors[i % len(distinct_colors)] for i, niche in enumerate(unique_niches)}

# Map colors to each cell
obs_fov['color'] = obs_fov['NMF_factor'].map(color_dict)

# ===========================
# Step 5. Plot image + cells
# ===========================
fig, ax = plt.subplots(figsize=(12, 12))

# Display tissue image
ax.imshow(image_to_display, cmap='gray')

# Overlay cells with larger markers
ax.scatter(
    obs_fov['CenterX_local_px'],
    obs_fov['FlippedY'],
    c=obs_fov['color'],
    s=32,          # Increased marker size
    alpha=0.9,
    edgecolors='none'
)

# Create legend
patches = [
    mpatches.Patch(color=color_dict[niche], label=f'NMF Factor {niche}')
    for niche in unique_niches
]

# Position legend closer and increase font size
legend = ax.legend(
    handles=patches,
    title="NMF Niches",
    title_fontsize=21,
    fontsize=20,
    loc='upper right',
    bbox_to_anchor=(1.35, 1),  # Move closer to figure
    frameon=True
)

# Remove all axes and labels for a clean look
ax.axis('off')

plt.tight_layout()

# ===========================
# Step 6. Save figure
# ===========================
output_path = os.path.join(output_dir, "UC_a_8_NMF_Mapping.png")
plt.savefig(output_path, dpi=300, bbox_inches='tight')
plt.close()

print(f"Mapping figure saved to: {output_path}")
print(f"Total cells plotted for {fov_prefix}: {obs_fov.shape[0]}")
print(f"Unique NMF niches found: {num_niches}")
print("NMF Factors:", unique_niches)

Multi-layer image detected with shape (5, 3648, 5472). Using the first layer for display.
Image dimensions: width=5472, height=3648
Mapping figure saved to: /blue/kejun.huang/tan.m/IBDCosMx_scRNAseq/CosMx/UC a_8/Mapping/UC_a_8_NMF_Mapping.png
Total cells plotted for UC a_8: 3765
Unique NMF niches found: 4
NMF Factors: [1, 2, 3, 4]


In [22]:
# --- File paths ---
h5ad_path = "/blue/kejun.huang/tan.m/IBDCosMx_scRNAseq/CosMx/Corrected_CompleteCosMx.h5ad"
tif_path = "/blue/kejun.huang/tan.m/IBDCosMx_scRNAseq/CosMx/UC a_8/GSM7473685_UCa/RawMorphologyImages/20220314_113221_S3_C902_P99_N99_F008_Z004.TIF"
output_dir = "/blue/kejun.huang/tan.m/IBDCosMx_scRNAseq/CosMx/UC a_8/Mapping"
fov_prefix = "UC a_8"   # Only consider cells from this FOV
niche_focus = 3          # Focus on NMF Factor 3
top_n = 5                 # Show only top 5 most abundant cell types

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# ===========================
# Step 1. Load the TIF image
# ===========================
tif_image = imread(tif_path)

# Handle multi-layer images: use the first layer
if tif_image.ndim == 3 and tif_image.shape[0] > 1:
    print(f"Multi-layer image detected with shape {tif_image.shape}. Using the first layer for display.")
    image_to_display = tif_image[0]
else:
    image_to_display = tif_image

# Image dimensions
image_height, image_width = image_to_display.shape
print(f"Image dimensions: width={image_width}, height={image_height}")

# ===========================
# Step 2. Load cell data
# ===========================
adata = sc.read_h5ad(h5ad_path)

# Fix index conflict if index name matches a column name
if adata.obs.index.name == 'unique_cell_id':
    adata.obs.index.name = 'index_id'

# Convert to DataFrame
obs_df = adata.obs.reset_index()

# Filter only rows for this FOV
obs_fov = obs_df[obs_df['unique_cell_id'].str.startswith(fov_prefix)].copy()

# ===========================
# Step 3. Focus on niche 3
# ===========================
obs_fov = obs_fov[obs_fov['NMF_factor'] == niche_focus].copy()

if obs_fov.empty:
    raise ValueError(f"No cells found for NMF_factor = {niche_focus} in {fov_prefix}.")

# ===========================
# Step 4. Prepare coordinates
# ===========================
obs_fov['CenterX_local_px'] = pd.to_numeric(obs_fov['CenterX_local_px'], errors='coerce')
obs_fov['CenterY_local_px'] = pd.to_numeric(obs_fov['CenterY_local_px'], errors='coerce')
obs_fov.dropna(subset=['CenterX_local_px', 'CenterY_local_px'], inplace=True)

# Flip Y-axis for proper alignment with the tissue image
obs_fov['FlippedY'] = image_height - obs_fov['CenterY_local_px']

# ===========================
# Step 5. Clean PredictedCellType
# ===========================
# Convert to string to avoid categorical fill issues
obs_fov['PredictedCellType'] = obs_fov['PredictedCellType'].astype(str)
obs_fov['PredictedCellType'] = obs_fov['PredictedCellType'].replace(['nan', 'None', 'NaN'], 'Unknown')

# ===========================
# Step 6. Identify top 5 most abundant cell types
# ===========================
cell_type_counts = obs_fov['PredictedCellType'].value_counts()
top_cell_types = cell_type_counts.head(top_n).index.tolist()

print(f"Top {top_n} cell types in NMF {niche_focus}:")
print(cell_type_counts.head(top_n))

# Filter dataframe to only keep these top cell types
obs_fov = obs_fov[obs_fov['PredictedCellType'].isin(top_cell_types)].copy()

# ===========================
# Step 7. Assign very distinct colors
# ===========================
# Fixed distinct color order
distinct_colors = ["yellow", "green", "blue", "red", "magenta"]

# Assign colors to top cell types by abundance order
color_dict = {cell_type: distinct_colors[i] for i, cell_type in enumerate(top_cell_types)}

# Map colors to each cell
obs_fov['color'] = obs_fov['PredictedCellType'].map(color_dict)

# ===========================
# Step 8. Plot image + top cell types
# ===========================
fig, ax = plt.subplots(figsize=(12, 12))

# Show tissue image
ax.imshow(image_to_display, cmap='gray')

# Plot top cell types
ax.scatter(
    obs_fov['CenterX_local_px'],
    obs_fov['FlippedY'],
    c=obs_fov['color'],
    s=32,            # Same marker size you liked
    alpha=0.9,
    edgecolors='none'
)

# Create legend
patches = [
    mpatches.Patch(color=color_dict[ctype], label=ctype)
    for ctype in top_cell_types
]

legend = ax.legend(
    handles=patches,
    title="Predicted Cell Type (Top 5)",
    title_fontsize=17,
    fontsize=16,
    loc='upper right',
    bbox_to_anchor=(1.5, 1),
    frameon=True
)

# Clean figure
ax.axis('off')

plt.tight_layout()

# ===========================
# Step 9. Save figure
# ===========================
output_path = os.path.join(output_dir, "UC_a_8_NMF3_Top5CellTypes.png")
plt.savefig(output_path, dpi=300, bbox_inches='tight')
plt.close()

print(f"Mapping figure (Top 5 cell types, NMF 3 only) saved to: {output_path}")
print(f"Total niche 3 cells plotted: {obs_fov.shape[0]}")
print(f"Top 5 cell types: {top_cell_types}")

Multi-layer image detected with shape (5, 3648, 5472). Using the first layer for display.
Image dimensions: width=5472, height=3648
Top 5 cell types in NMF 3:
PredictedCellType
Cycling T cells    376
Ribhi T cells       72
Cycling TA          52
M0                  43
Mast                41
Name: count, dtype: int64
Mapping figure (Top 5 cell types, NMF 3 only) saved to: /blue/kejun.huang/tan.m/IBDCosMx_scRNAseq/CosMx/UC a_8/Mapping/UC_a_8_NMF3_Top5CellTypes.png
Total niche 3 cells plotted: 584
Top 5 cell types: ['Cycling T cells', 'Ribhi T cells', 'Cycling TA', 'M0', 'Mast']
