In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scanpy as sc
import anndata
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42

atlas = pd.read_parquet("atlas.parquet")
atlas

## Rename lipizones

In [None]:
renaminglipizones = pd.read_csv("lipizones_newnames.csv", index_col=0)
renaminglipizones['full_final_name'] = renaminglipizones['final_name'].copy()
renaminglipizones['final_name'] = [name.split(' ||')[0] for name in renaminglipizones['final_name']]

def rename_duplicates(series):
    new_names = series.copy()
    
    counts = series.value_counts()
    duplicates = series[series.duplicated(keep=False)]
    for name in duplicates.unique():
        mask = series == name
        new_names[mask] = [f"{name}_{i+1}" for i in range(sum(mask))]
    
    return new_names

renaminglipizones['final_name'] = rename_duplicates(renaminglipizones['final_name'])
renaminglipizones

In [None]:
atlas['lipizone_names'] = atlas['lipizone_names'].map(renaminglipizones['final_name'])
atlas['lipizone_names']

## Prepare second atlas, programs, centroids...

In [None]:
secondatlas = pd.read_parquet("brain3.parquet")
secondatlas

In [None]:
for i in range(2, 12):
    atlas['level_'+str(i)] = atlas['level_'+str(i-1)].astype(str) + atlas['level_'+str(i)].astype(str)

In [None]:
programs = pd.read_hdf("/data/francesca/datasets/20241213_LBA_brain2_latent.h5ad", key="table")
programs

In [None]:
namingtable = {
    "cluster": [
        11111, 11112, 11121, 11122, 11211, 11212, 11221, 11222, 12111, 12112, 
        12121, 12122, 12211, 12212, 12221, 12222, 21111, 21112, 21120, 21211, 
        21212, 21221, 21222, 22111, 22112, 22121, 22122, 22211, 22212, 22221, 22222
    ],
    "zone": [
        "Mixed and hindbrain white matter", "Core callosal white matter", 
        "Callosal and cerebellar white matter", "Ventral white matter", 
        "Boundary white matter", "Thalamic and mid/hindbrain white matter", 
        "Mid/hindbrain white matter", "Mixed white matter", 
        "Choroid plexus and ventricles", "Ventricular linings", 
        "Thalamic and midbrain regions", "White and gray matter boundary", 
        "Thalamic mixed gray and white matter", "Thalamic mixed gray and white matter #2", 
        "Neuron-rich lateral white matter", "Neuron-rich lateral white matter #2", 
        "Pallidum and projections", "Cortical layer 4", 
        "Subcortical plate, hippocampus and hypothalamus", 
        "GABA-ergic Purkinje cells of the cerebellum", "Cortical layers 2-3 and 4", 
        "Piriform cortex", "Cortical layers 1 and 2-3", "Cortical layer 5", 
        "Cortical layer 6, dentate gyrus", "Striatum, hypothalamus and hippocampus", 
        "Striatum, hypothalamus and hippocampus #2", 
        "Retrosplenial, cortical, cerebellar", "Cortical layer 6 and cerebellar Y", 
        "Cerebellar glutamatergic neurons", "Cortical layer 6 and thalamic"
    ],
    "color": [
        "#360064", "#980053", "#170b3b", "#ac2f5c", "#2a3f6d", "#002657", 
        "#21366b", "#3e4b6c", "#f75400", "#ef633e", "#a5d4e6", "#6399c6", 
        "#853a00", "#edeef4", "#fdbf71", "#ce710e", "#940457", "#a2d36c", 
        "#d5edb5", "#0065d6", "#bcf18b", "#a68d68", "#79e47e", "#2f0097", 
        "#47029f", "#7500a8", "#d70021", "#ca99c9", "#d4b9da", "#e00085", 
        "#f6f3f8"
    ]
}

namingtable = pd.DataFrame(namingtable)
namingtable

In [None]:
allencolors = atlas[['acronym', 'allencolor']].drop_duplicates()
allencolors.index = allencolors['acronym']

allencolors

## Colocalization matrices

In [None]:
import scipy.cluster.hierarchy as sch
acronyms = atlas['acronym'].copy()
lipizones = atlas['lipizone_names'].copy()

acronyms = acronyms.loc[acronyms.isin(acronyms.value_counts().index[acronyms.value_counts() > 500])]
lipizones = lipizones.loc[acronyms.index]

cmat = pd.crosstab(acronyms, lipizones)

normalized_df = cmat / cmat.sum() # fraction 
normalized_df = (normalized_df.T / normalized_df.T.mean()).T ## switch to enrichments
normalized_df1 = normalized_df.copy()
normalized_df1

cmat = pd.crosstab(lipizones, acronyms)
normalized_df = cmat / cmat.sum() 
normalized_df = (normalized_df.T / normalized_df.T.mean()).T 
normalized_df2 = normalized_df.copy().T
normalized_df2

normalized_df = normalized_df2 * normalized_df1
linkage = sch.linkage(sch.distance.pdist(normalized_df.T), method='weighted', optimal_ordering=True)
order = sch.leaves_list(linkage)
normalized_df = normalized_df.iloc[:, order]

order = np.argmax(normalized_df.values, axis=1)
order = np.argsort(order)
normalized_df = normalized_df.iloc[order,:]

ACROnormalized_df = normalized_df.copy()

plt.figure(figsize=(10, 10))
sns.heatmap(normalized_df, cmap="Purples", cbar_kws={'label': 'Enrichment'}, xticklabels=True, yticklabels=False, vmin = np.percentile(normalized_df, 2), vmax = np.percentile(normalized_df, 98))

plt.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
plt.tick_params(axis='y', which='both', left=False, right=False)

plt.yticks(rotation=0)

plt.tight_layout()
plt.show()

In [None]:
pixelclosestcells = pd.read_hdf("pixelclosestcells.h5ad", key="table")
pixelclosestcells = pixelclosestcells.dropna()
lipizoneZ = atlas.loc[pixelclosestcells.index, 'lipizone_names']
ctnow = pd.read_hdf("celltypesnow.h5ad", key="table")

#####
ctnow = ctnow[ctnow.isin(ctnow.value_counts()[ctnow.value_counts() > 50].index)] ###### FOCUS ONLY ON THE ABUNDANT CELL TYPES FIRST
pixelclosestcells = pixelclosestcells[pixelclosestcells.isin(ctnow.index)]
lipizoneZ = lipizoneZ.loc[pixelclosestcells.index]
#####

celltypeZ = ctnow.loc[pixelclosestcells.values]
celltypeZ.index = lipizoneZ.index
cmat = pd.crosstab(lipizoneZ, celltypeZ)
normalized_df = cmat / cmat.sum() # fraction 
normalized_df = (normalized_df.T / normalized_df.T.mean()).T ## switch to enrichments
normalized_df1 = normalized_df.copy()

pixelclosestcells = pd.read_hdf("pixelclosestcells.h5ad", key="table")
pixelclosestcells = pixelclosestcells.dropna()
lipizoneZ = atlas.loc[pixelclosestcells.index, 'lipizone_names']
ctnow = pd.read_hdf("celltypesnow.h5ad", key="table")

#####
ctnow = ctnow[ctnow.isin(ctnow.value_counts()[ctnow.value_counts() > 50].index)]
pixelclosestcells = pixelclosestcells[pixelclosestcells.isin(ctnow.index)]
lipizoneZ = lipizoneZ.loc[pixelclosestcells.index]
#####

celltypeZ = ctnow.loc[pixelclosestcells.values]
celltypeZ.index = lipizoneZ.index
cmat = pd.crosstab(lipizoneZ, celltypeZ).T
normalized_df = cmat / cmat.sum() # fraction 
normalized_df = (normalized_df.T / normalized_df.T.mean()) ## switch to enrichments
normalized_df2 = normalized_df.copy()

normalized_df = normalized_df2 * normalized_df1
normalized_df
linkage = sch.linkage(sch.distance.pdist(normalized_df.T), method='weighted', optimal_ordering=True)
order = sch.leaves_list(linkage)
normalized_df = normalized_df.iloc[:, order]

order = np.argmax(normalized_df.values, axis=1)
order = np.argsort(order)
normalized_df = normalized_df.iloc[order,:]

CTnormalized_df = normalized_df.copy()

plt.figure(figsize=(20, 5))
sns.heatmap(normalized_df, cmap="Purples", cbar_kws={'label': 'Enrichment'}, xticklabels=True, yticklabels=False, vmin = np.percentile(normalized_df, 2), vmax = np.percentile(normalized_df, 98))

plt.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
plt.tick_params(axis='y', which='both', left=False, right=False)

plt.yticks(rotation=0)
plt.title('Enrichment of lipids across major brain divisions')

plt.tight_layout()
plt.show()

In [None]:
pixelclosestcells = pd.read_hdf("pixelclosestcells.h5ad", key="table")
pixelclosestcells = pixelclosestcells.dropna()
lipizoneZ = atlas.loc[pixelclosestcells.index, 'lipizone_names']
ctnow = pd.read_hdf("celltypesnow.h5ad", key="table")

#####
ctnow = ctnow[ctnow.isin(ctnow.value_counts()[ctnow.value_counts() > 50].index)] ###### FOCUS ONLY ON THE ABUNDANT CELL TYPES FIRST
pixelclosestcells = pixelclosestcells[pixelclosestcells.isin(ctnow.index)]
lipizoneZ = lipizoneZ.loc[pixelclosestcells.index]
#####

celltypeZ = ctnow.loc[pixelclosestcells.values]
celltypeZ.index = lipizoneZ.index
cmat = pd.crosstab(lipizoneZ, celltypeZ)

celltype_classes = np.array([_.split('=')[-1].split('_')[0] for _ in cmat.columns])
ctscores = cmat.copy()
ctscores.columns = celltype_classes
ctscores = ctscores.groupby(ctscores.columns, axis=1).sum()

ctscores

In [None]:
color_dict = {
   'Astro': '#FF69B4',     # Hot pink
   'Chol': '#4B0082',      # Indigo
   'CholEx': '#00FF7F',    # Spring green
   'DopEx': '#FF4500',     # Orange red
   'Endo': '#1E90FF',      # Dodger blue
   'Ependymal': '#FFD700', # Gold
   'Ex': '#8B008B',        # Dark magenta
   'ExInh': '#00CED1',     # Dark turquoise
   'Fibro': '#FF1493',     # Deep pink
   'Inh': '#32CD32',       # Lime green
   'Macro': '#BA55D3',     # Medium orchid
   'Micro': '#FF8C00',     # Dark orange
   'NG': '#4169E1',        # Royal blue
   'Nor': '#8FBC8F',       # Dark sea green
   'OPC': '#DDA0DD',       # Plum
   'Oligo': '#20B2AA',     # Light sea green
   'Pit': '#CD5C5C',       # Indian red
   'Ser': '#9370DB',       # Medium purple
   'Tanycyte': '#F08080'   # Light coral
}

In [None]:
columns_to_normalize = atlas.columns[:173]

p_low = atlas[columns_to_normalize].quantile(0.005)
p_high = atlas[columns_to_normalize].quantile(0.995)
atlas_clipped = atlas.copy()
atlas_clipped[columns_to_normalize] = atlas_clipped[columns_to_normalize].clip(lower=p_low, upper=p_high, axis=1)
atlas_clipped[columns_to_normalize] = (atlas_clipped[columns_to_normalize] - p_low) / (p_high - p_low)
atlas_clipped[columns_to_normalize] = atlas_clipped[columns_to_normalize].clip(0, 1)

atlas_clipped

In [None]:
secondatlas_clipped = secondatlas.copy()
secondatlas_clipped[columns_to_normalize] = secondatlas_clipped[columns_to_normalize].clip(lower=p_low, upper=p_high, axis=1)
secondatlas_clipped[columns_to_normalize] = (secondatlas_clipped[columns_to_normalize] - p_low) / (p_high - p_low)
secondatlas_clipped[columns_to_normalize] = secondatlas_clipped[columns_to_normalize].clip(0, 1)

secondatlas_clipped

In [None]:
columns_to_normalize = programs.columns

p_low = programs[columns_to_normalize].quantile(0.005)
p_high = programs[columns_to_normalize].quantile(0.995)
programs_clipped = programs.copy()
programs_clipped[columns_to_normalize] = programs_clipped[columns_to_normalize].clip(lower=p_low, upper=p_high, axis=1)
programs_clipped[columns_to_normalize] = (programs_clipped[columns_to_normalize] - p_low) / (p_high - p_low)
programs_clipped[columns_to_normalize] = programs_clipped[columns_to_normalize].clip(0, 1)

programs_clipped

In [None]:
centroids = atlas_clipped.iloc[:,:173].groupby(atlas_clipped["lipizone_names"]).mean()

enrichments = centroids / centroids.mean()
enrichments

In [None]:
splits = pd.read_hdf("splithistory_allbrains.h5ad", key="table")
for i in range(2, 12):
    splits['level_'+str(i)] = splits['level_'+str(i-1)].astype(str) + splits['level_'+str(i)].astype(str)
splits["cluster"] = splits["level_11"]
secondatlas_clipped["cluster"] = splits.loc[secondatlas.index, "cluster"]
centroids_second = secondatlas_clipped.iloc[:,:173].groupby(secondatlas_clipped["cluster"]).mean()
centroids_second

In [None]:
atlas_clipped["cluster"] = splits.loc[atlas_clipped.index, "cluster"]
centroids_first = atlas_clipped.iloc[:,:173].groupby(atlas_clipped["cluster"]).mean()
centroids_first = centroids_first.loc[centroids_second.index, centroids_second.columns]
centroids_first

In [None]:
centroidsP = programs_clipped.groupby(atlas["lipizone_names"]).mean()

enrichmentsP = centroidsP / centroidsP.mean()
enrichmentsP

In [None]:
import re

df = pd.DataFrame(enrichments.columns).fillna('')
df.columns = ["lipid_name"]

# extract the "class" etc from the lipid_name
df["class"] = df["lipid_name"].apply(lambda x: re.split(' |\(', x)[0])
df["carbons"] = df["lipid_name"].apply(lambda x: int(re.search(r'(\d+):', x).group(1)) if re.search(r'(\d+):', x) else np.nan)
df["insaturations"] = df["lipid_name"].apply(lambda x: int(re.search(r':(\d+)', x).group(1)) if re.search(r':(\d+)', x) else np.nan)
df["insaturations_per_Catom"] = df["insaturations"] / df["carbons"]

df["broken"] = df["lipid_name"].str.endswith('_uncertain')
df.loc[df["broken"], 'carbons'] = np.nan
df.loc[df["broken"], 'class'] = np.nan
df.loc[df["broken"], 'insaturations'] = np.nan
df.loc[df["broken"], 'insaturations_per_Catom'] = np.nan

colors = pd.read_hdf("lipidclasscolors.h5ad", key="table")
df['color'] = df['class'].map(colors['classcolors'])
df.loc[df["broken"], 'color'] = "gray"

df.index = df['lipid_name']
df = df.drop_duplicates()
df['color'] = df['color'].fillna("black")
df

In [None]:
tsne = pd.read_hdf("tsne_df.h5ad", key="table")
tsne

## The lipizone I use for testing

## Sex proportions

In [None]:
splits = pd.read_hdf("splithistory_allbrains.h5ad", key="table")

splits['lipizone'] = splits['level_1'].astype(str)
for i in range(2,12):
    splits['lipizone'] = splits['lipizone'].astype(str) + splits['level_'+str(i)].astype(str)

colors = pd.read_hdf("colorzones.h5ad", key="table")
mapping = pd.DataFrame({
    'lipizone': splits.loc[colors.index, 'lipizone'],
    'lipizone_color': colors['lipizone_color']
})

modal_mapping = mapping.groupby('lipizone').agg(
    lipizone_color=('lipizone_color', lambda x: x.mode().iloc[0])
).reset_index()

modal_mapping.set_index('lipizone', inplace=True)

splits['lipizone_color'] = splits['lipizone'].map(modal_mapping['lipizone_color'])
metadata = pd.read_hdf("metadata.h5ad", key="table")
tmp = pd.concat([splits['lipizone_color'], metadata.loc[splits.index, "Sample"]], axis=1)

sample_x_lipiz_counts = (
    tmp.groupby("Sample")["lipizone_color"] 
    .value_counts(normalize=True)          
    .unstack(fill_value=0)                
)
sample_x_lipiz_counts

propfemales = sample_x_lipiz_counts.loc[["Female1", "Female2", "Female3"],:].mean()
propmales = sample_x_lipiz_counts.loc[["Male1", "Male2", "Male3"],:].mean()

propfemales = propfemales.loc[propmales.index]

propmales_vs_females = propmales / (propmales + propfemales)
propfemales_vs_males = 1 - propmales_vs_females

## Gene markers

In [None]:
# extract the top 100 markers per lipizone to be used to dissect their associated GO processes

markers = pd.read_hdf("lipizonegenemarkers.h5ad", key="table")
from tqdm import tqdm
lipitomark = {} 

for lipizone in tqdm(markers['cell_type'].unique()):
    lipitomark[lipizone] = markers.loc[markers["cell_type"] == lipizone,:].sort_values(by="logfoldchanges")[::-1].iloc[:100,:].loc[:,'names'].values
    
lipitomark['222222222000000']    

In [None]:
GOres_bylipi = pd.read_hdf("GOres_bylipi_significant.h5ad", key="table")

In [None]:
connectome = pd.read_parquet("./connectomic_datasets/connectome2992features_lipizonewise.parquet")
connectome.index = renaminglipizones.loc[connectome.index,'final_name']
connectome

## Make 1 ID card

In [None]:
LEV = "pyramid_1"
CLUSTER = atlas.loc[atlas['lipizone_names'] == LEV, 'cluster'].unique()[0]
COLOR = atlas.loc[atlas['lipizone_names'] == LEV, 'lipizone_color'].unique()[0]
SUBCLASS = atlas.loc[atlas['lipizone_names'] == LEV, 'level_5'].unique()[0]
SUBCLNAME = namingtable.loc[namingtable['cluster'].astype(str) == CLUSTER[:5], "zone"].values[0]

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import pearsonr
from adjustText import adjust_text
import textwrap
import re
import gc
from tqdm import tqdm

# --- Define helper functions ---
def clean_subclass_name(name):
    cleaned = re.sub(r'[/\\*:"<>|?]+', ' ', str(name))
    cleaned = ' '.join(cleaned.split())
    return cleaned

def clean_filenamePD(name):
    # Replace problematic characters with an underscore
    return re.sub(r'[\\/:"<>|?]', '_', str(name))

# --- Set your chunk size ---
chunk_size = 50

# --- Get all lipizone names starting at index 112 ---
lipizone_names = atlas['lipizone_names'].unique()[449:]

# --- Create chunks ---
chunks = [lipizone_names[i:i+chunk_size] for i in range(0, len(lipizone_names), chunk_size)]

# --- Process each chunk ---
for chunk_idx, chunk in enumerate(chunks, start=1):
    print(f"Processing chunk {chunk_idx}/{len(chunks)}")
    for LEV in tqdm(chunk, desc=f"Chunk {chunk_idx}"):
        # Extract values for the current lipizone
        CLUSTER   = atlas.loc[atlas['lipizone_names'] == LEV, 'cluster'].unique()[0]
        COLOR     = atlas.loc[atlas['lipizone_names'] == LEV, 'lipizone_color'].unique()[0]
        SUBCLASS  = atlas.loc[atlas['lipizone_names'] == LEV, 'level_5'].unique()[0]
        SUBCLNAME = namingtable.loc[namingtable['cluster'].astype(str) == CLUSTER[:5], "zone"].values[0]
        SUBCLNAME = clean_subclass_name(SUBCLNAME)

        filtered_lev = atlas[atlas['lipizone_names'] == LEV]
        most_common_section = filtered_lev["Section"].value_counts().idxmax()
        subdata = filtered_lev.copy()
        SF = 5

        # Create the figure and grid layout
        fig = plt.figure(figsize=(20, 25), constrained_layout=True)
        fig.set_constrained_layout_pads(w_pad=2.0, h_pad=2.0)
        outer = fig.add_gridspec(nrows=7, ncols=1, height_ratios=[3,1,1,1,1,1,1], hspace=0.4)

        # --- ROW 1: 4x8 subplots ("Lipizone across sections") ---
        gs_r1 = outer[0].subgridspec(4, 8, wspace=0.05, hspace=0.05)
        sections = atlas['Section'].unique()
        for i, sec in enumerate(sections[:32]):
            ax_spatial = fig.add_subplot(gs_r1[i])
            sec_data = atlas[(atlas["Section"] == sec) & (atlas['lipizone_names'] == LEV)]
            ax_spatial.scatter(sec_data['z_index'], -sec_data['y_index'],
                               c='red', s=5.0/SF, rasterized=True, edgecolors='none')
            sec_bound = atlas[(atlas["Section"] == sec) & (atlas['boundary'] == 1)]
            ax_spatial.scatter(sec_bound['z_index'], -sec_bound['y_index'],
                               c='black', s=0.5/SF, rasterized=True, edgecolors='none', alpha=0.9)
            ax_spatial.set_aspect('equal')
            ax_spatial.axis('off')

        # --- ROW 2: "Lipizone zoom-in" + "Subclass zoom-in" ---
        gs_r2 = outer[1].subgridspec(1, 2, wspace=0.3)
        ax_zoom = fig.add_subplot(gs_r2[0])
        ax_subclass = fig.add_subplot(gs_r2[1])

        main_data = filtered_lev[filtered_lev["Section"] == most_common_section]
        main_bound = atlas[(atlas["Section"] == most_common_section) & (atlas['boundary'] == 1)]
        ax_zoom.scatter(main_data['z_index'], -main_data['y_index'],
                        c='red', s=10/SF, edgecolors='none', rasterized=True)
        ax_zoom.scatter(main_bound['z_index'], -main_bound['y_index'],
                        c='black', s=2/SF, edgecolors='none', rasterized=True)
        ax_zoom.set_aspect('equal')
        ax_zoom.axis('off')
        ax_zoom.set_title("Lipizone zoom-in", fontsize=10)

        subclass_data = atlas[(atlas['level_5'] == SUBCLASS) & (atlas["Section"] == most_common_section)]
        ax_subclass.scatter(subclass_data['z_index'], -subclass_data['y_index'],
                            c=subclass_data['lipizone_color'], s=4/SF, edgecolors='none', rasterized=True)
        ax_subclass.scatter(main_bound['z_index'], -main_bound['y_index'],
                            c='black', s=2/SF, edgecolors='none', rasterized=True)
        ax_subclass.set_aspect('equal')
        ax_subclass.axis('off')
        ax_subclass.set_title("Subclass zoom-in", fontsize=10)

        # --- ROW 3: "t-SNE," "AP axis," "DV axis," "ML axis" ---
        gs_r3 = outer[2].subgridspec(1, 4, wspace=0.4)
        ax_TSNE   = fig.add_subplot(gs_r3[0])
        ax_AP     = fig.add_subplot(gs_r3[1])
        ax_DV     = fig.add_subplot(gs_r3[2])
        ax_ML     = fig.add_subplot(gs_r3[3])

        tesneat = tsne.loc[atlas.index, :]
        ax_TSNE.scatter(tesneat.iloc[:, 0], tesneat.iloc[:, 1],
                        c="gray", s=0.0005, alpha=0.5, rasterized=True)
        sub_idx = atlas.index[atlas['lipizone_names'] == LEV]
        ax_TSNE.scatter(tesneat.loc[sub_idx, 0], tesneat.loc[sub_idx, 1],
                        c=COLOR, s=0.005, alpha=0.5, rasterized=True)
        for spine in ax_TSNE.spines.values():
            spine.set_visible(False)
        ax_TSNE.set_xticks([])
        ax_TSNE.set_yticks([])
        ax_TSNE.set_title("t-SNE", fontsize=10)

        sns.histplot(data=subdata['Section'], bins=50, stat='density', ax=ax_AP, rasterized=True)
        sns.kdeplot(data=subdata['Section'], color='red', ax=ax_AP, rasterized=True)
        for spine in ['top','right','left','bottom']:
            ax_AP.spines[spine].set_visible(False)
        ax_AP.tick_params(bottom=False, left=False, labelbottom=False, labelleft=False)
        ax_AP.set_title("AP axis", fontsize=10)

        sns.histplot(data=subdata['zccf'], bins=50, stat='density', ax=ax_DV, rasterized=True)
        sns.kdeplot(data=subdata['zccf'], color='red', ax=ax_DV, rasterized=True)
        for spine in ['top','right','left','bottom']:
            ax_DV.spines[spine].set_visible(False)
        ax_DV.tick_params(bottom=False, left=False, labelbottom=False, labelleft=False)
        ax_DV.set_title("DV axis", fontsize=10)

        sns.histplot(data=subdata['yccf'], bins=50, stat='density', ax=ax_ML, rasterized=True)
        sns.kdeplot(data=subdata['yccf'], color='red', ax=ax_ML, rasterized=True)
        for spine in ['top','right','left','bottom']:
            ax_ML.spines[spine].set_visible(False)
        ax_ML.tick_params(bottom=False, left=False, labelbottom=False, labelleft=False)
        ax_ML.set_title("ML axis", fontsize=10)

        # --- ROW 4: "Abundance," "Male vs Female," "Repro scatter" ---
        gs_r4 = outer[3].subgridspec(1, 3, wspace=0.4)
        ax_ABUND  = fig.add_subplot(gs_r4[0])
        ax_MF     = fig.add_subplot(gs_r4[1])
        ax_REPRO  = fig.add_subplot(gs_r4[2])

        medianvc = atlas['lipizone_names'].value_counts().median()
        n = subdata.shape[0]
        bar_names = ['Median abundance', 'This lipizone']
        bar_lengths = [medianvc, n]
        bar_colors = ['yellow', 'black']
        ax_ABUND.barh(bar_names, bar_lengths, color=bar_colors, rasterized=True)
        for spine in ['top','right','left','bottom']:
            ax_ABUND.spines[spine].set_visible(False)
        ax_ABUND.tick_params(left=False, bottom=False)
        ax_ABUND.set_title("Abundance", fontsize=10)

        sizes = [propmales_vs_females.loc[COLOR], 1 - propmales_vs_females.loc[COLOR]]
        ax_MF.pie(sizes, colors=['darkblue', 'pink'], startangle=90)
        ax_MF.set_title("Male vs Female", fontsize=10)
        ax_MF.axis('equal')

        x_values = centroids_first.loc[CLUSTER[:11], :].values.flatten()
        y_values = centroids_second.loc[CLUSTER[:11], :].values.flatten()
        pearson_r, _ = pearsonr(x_values, y_values)
        ax_REPRO.scatter(x_values, y_values, s=2, color='darkorange', rasterized=True)
        ax_REPRO.set_xlabel("Brain Atlas", fontsize=8)
        ax_REPRO.set_ylabel("Second Brain Atlas", fontsize=8)
        ax_REPRO.set_title(f"R = {pearson_r:.3f}", fontsize=10)
        for spine in ['top','right']:
            ax_REPRO.spines[spine].set_visible(False)

        # --- ROW 5: "Lipid markers," "lipiMap programs" ---
        gs_r5 = outer[4].subgridspec(1, 2, wspace=0.4)
        ax_markers = fig.add_subplot(gs_r5[0])
        ax_programs = fig.add_subplot(gs_r5[1])

        markers = enrichments.loc[LEV, :].sort_values(ascending=False)[:5]
        lipids_markers = markers.index
        vals_markers = markers.values
        dot_sizes_markers = vals_markers * 75
        dot_colors_markers = [df.loc[lipid, "color"] for lipid in lipids_markers]
        ax_markers.scatter([0]*len(lipids_markers), range(len(lipids_markers)),
                           s=dot_sizes_markers, c=dot_colors_markers,
                           alpha=0.8, rasterized=True)
        ax_markers.set_yticks(range(len(lipids_markers)))
        ax_markers.set_yticklabels(lipids_markers)
        ax_markers.set_xticks([])
        ax_markers.set_ylim(-0.5, len(lipids_markers)+0.5)
        for spine in ax_markers.spines.values():
            spine.set_visible(False)
        ax_markers.set_title("Lipid markers", fontsize=10)

        markersP = enrichmentsP.loc[LEV, :].sort_values(ascending=False)[:5]
        lipids_prog = markersP.index
        vals_prog = markersP.values
        dot_sizes_prog = vals_prog * 75
        ax_programs.scatter([0]*len(lipids_prog), range(len(lipids_prog)),
                            s=dot_sizes_prog, c="black", alpha=0.8, rasterized=True)
        ax_programs.set_yticks(range(len(lipids_prog)))
        ax_programs.set_yticklabels(lipids_prog)
        ax_programs.set_xticks([])
        ax_programs.set_ylim(-0.5, len(lipids_prog)+0.5)
        for spine in ax_programs.spines.values():
            spine.set_visible(False)
        ax_programs.set_title("lipiMap programs", fontsize=10)

        # --- ROW 6: "Allen acronyms," "colocalizing cell types,"
        #             "colocalizing proportions," "Lipizone neighborhood" ---
        gs_r6 = outer[5].subgridspec(1, 4, wspace=0.4)
        ax_ACRON  = fig.add_subplot(gs_r6[0])
        ax_CT     = fig.add_subplot(gs_r6[1])
        ax_CTCAKE = fig.add_subplot(gs_r6[2])
        ax_NEIGH  = fig.add_subplot(gs_r6[3])

        markacro = ACROnormalized_df.loc[:, LEV].sort_values(ascending=False)[:10]
        lipids_acron = markacro.index
        vals_acron = markacro.values
        ax_ACRON.scatter([0]*len(lipids_acron), range(len(lipids_acron)),
                         s=vals_acron / 10,
                         c=[allencolors.loc[lipid, "allencolor"] for lipid in lipids_acron],
                         alpha=0.8, rasterized=True)
        ax_ACRON.set_yticks(range(len(lipids_acron)))
        ax_ACRON.set_yticklabels(lipids_acron)
        ax_ACRON.set_xticks([])
        ax_ACRON.set_ylim(-0.5, len(lipids_acron)-0.5)
        for spine in ax_ACRON.spines.values():
            spine.set_visible(False)
        ax_ACRON.set_title("Enriched Allen acronyms", fontsize=10)

        markacro_ct = CTnormalized_df.loc[LEV, :].sort_values(ascending=False)[:10]
        new_labels = markacro_ct.index.str.split('=').str[1]
        vals_ct = markacro_ct.values
        ax_CT.scatter([0]*len(new_labels), range(len(new_labels)),
                      s=vals_ct / 10, c="gray", alpha=0.8, rasterized=True)
        ax_CT.set_yticks(range(len(new_labels)))
        ax_CT.set_yticklabels(new_labels)
        ax_CT.set_xticks([])
        ax_CT.set_ylim(-0.5, len(new_labels)-0.5)
        for spine in ax_CT.spines.values():
            spine.set_visible(False)
        ax_CT.set_title("Enriched colocalizing cell types", fontsize=10)

        celltypesneighborhood = ctscores.loc[LEV, :]
        top_5_mask = celltypesneighborhood.nlargest(5).index
        all_labels = [label if label in top_5_mask else '' for label in celltypesneighborhood.index]
        wedges, texts = ax_CTCAKE.pie(
            celltypesneighborhood,
            labels=all_labels,
            colors=[color_dict[x] for x in celltypesneighborhood.index],
            wedgeprops=dict(edgecolor='w')
        )
        adjust_text([txt for txt in texts if txt.get_text() != ''], ax=ax_CTCAKE)
        ax_CTCAKE.axis('equal')
        ax_CTCAKE.set_title("Colocalizing proportions", fontsize=10)

        dd3 = atlas.copy()
        xyz_indexes = subdata[['Section','y_index','z_index']].copy()
        xyz_indexes['index'] = (xyz_indexes['Section'].astype(str) + "_" +
                                xyz_indexes['y_index'].astype(str) + "_" +
                                xyz_indexes['z_index'].astype(str))
        xyz_indexes.index = xyz_indexes['index']
        neighbors_list = []
        for _, row in xyz_indexes.iterrows():
            for dy in [-1, 0, 1]:
                for dz in [-1, 0, 1]:
                    neighbors_list.append({
                        'Section': row['Section'],
                        'y_index': row['y_index'] + dy,
                        'z_index': row['z_index'] + dz
                    })
        neighbors_df = pd.DataFrame(neighbors_list)
        neighbors_df['index'] = (neighbors_df['Section'].astype(str) + "_" +
                                 neighbors_df['y_index'].astype(str) + "_" +
                                 neighbors_df['z_index'].astype(str))
        neighbors_df.index = neighbors_df['index']
        neighbors_df = neighbors_df.loc[~neighbors_df.index.isin(xyz_indexes.index), :]
        dd3['index'] = (dd3['Section'].astype(str) + "_" +
                        dd3['y_index'].astype(str) + "_" +
                        dd3['z_index'].astype(str))
        dd3.index = dd3['index']
        dd3 = dd3.loc[dd3.index.isin(neighbors_df.index), :]
        dd3 = dd3[dd3['lipizone_names'] != LEV]
        unique_labels = dd3['lipizone_names'].value_counts().index.to_numpy()
        counts = dd3['lipizone_names'].value_counts().to_numpy()
        proportions = counts / counts.sum()
        colors_neigh = dd3['lipizone_color'].value_counts().index.to_numpy()
        top_5_indices = proportions.argsort()[-5:][::-1]
        label_list = [label if i in top_5_indices else '' 
                      for i, label in enumerate(unique_labels)]
        wedges, texts = ax_NEIGH.pie(proportions,
                                     labels=label_list,
                                     colors=colors_neigh,
                                     wedgeprops=dict(edgecolor='w'))
        ax_NEIGH.axis('equal')
        ax_NEIGH.set_title("Lipizone neighborhood", fontsize=10)

        # --- ROW 7: "Top marker genes," "Significant GO terms," "Connectome" ---
        gs_r7 = outer[6].subgridspec(1, 3, wspace=0.4)
        ax_GEXPR       = fig.add_subplot(gs_r7[0])
        ax_GONT        = fig.add_subplot(gs_r7[1])
        ax_CONNECTOME  = fig.add_subplot(gs_r7[2])

        gexpr_text = ", ".join(lipitomark[CLUSTER][:10])
        wrapped_gexpr_text = textwrap.fill(gexpr_text, width=50)
        ax_GEXPR.text(0.01, 0.99, 
                      wrapped_gexpr_text,
                      transform=ax_GEXPR.transAxes,
                      ha='left', va='top')
        ax_GEXPR.axis('off')
        ax_GEXPR.set_title("Top marker genes", fontsize=10)

        ontologies = GOres_bylipi.loc[GOres_bylipi['lipizone'] == CLUSTER, 'GO_name']
        wrapped_gont_text = textwrap.fill(" ".join(ontologies), width=50)
        ax_GONT.text(0.01, 0.99,
                     wrapped_gont_text,
                     transform=ax_GONT.transAxes,
                     ha='left', va='top')
        ax_GONT.axis('off')
        ax_GONT.set_title("Significant GO terms", fontsize=10)

        maerkersC = connectome.loc[LEV, :].sort_values(ascending=False)[:5]
        lipids_conn = maerkersC.index
        vals_conn = maerkersC.values
        ax_CONNECTOME.scatter(range(len(lipids_conn)), [0]*len(lipids_conn),
                              s=vals_conn * 300, c="black", alpha=0.8,
                              rasterized=True)
        ax_CONNECTOME.set_xticks(range(len(lipids_conn)))
        ax_CONNECTOME.set_xticklabels(lipids_conn, rotation=45)
        ax_CONNECTOME.set_yticks([])
        ax_CONNECTOME.set_xlim(-0.5, len(lipids_conn)-0.5)
        # --- FIX: Iterate directly over the spine objects ---
        for spine in ax_CONNECTOME.spines.values():
            spine.set_visible(False)
        ax_CONNECTOME.set_title("Top connectomic streams", fontsize=10)

        # --- Final adjustments and saving ---
        fig.suptitle(f"{LEV} (subclass: {SUBCLNAME})", fontsize=24)
        fig.subplots_adjust(top=0.92)
        
        safe_LEV = clean_filenamePD(LEV)
        output_path = f"./ID_cards/lipizone_ID_card_{safe_LEV}.pdf"
        fig.savefig(output_path, dpi=300)
        plt.close(fig)
    
    # Clean up memory after each chunk
    gc.collect()


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import pearsonr
from adjustText import adjust_text
import textwrap
import re

def clean_subclass_name(name):
    cleaned = re.sub(r'[/\\*:"<>|?]+', ' ', str(name))
    cleaned = ' '.join(cleaned.split())
    return cleaned

for LEV in tqdm(atlas['lipizone_names'].unique()[112:]):
    
    CLUSTER   = atlas.loc[atlas['lipizone_names'] == LEV, 'cluster'].unique()[0]
    COLOR     = atlas.loc[atlas['lipizone_names'] == LEV, 'lipizone_color'].unique()[0]
    SUBCLASS  = atlas.loc[atlas['lipizone_names'] == LEV, 'level_5'].unique()[0]
    SUBCLNAME = namingtable.loc[namingtable['cluster'].astype(str) == CLUSTER[:5], "zone"].values[0]
    SUBCLNAME = clean_subclass_name(SUBCLNAME)

    filtered_lev = atlas[atlas['lipizone_names'] == LEV]
    most_common_section = filtered_lev["Section"].value_counts().idxmax()
    subdata = filtered_lev.copy()
    SF = 5

    fig = plt.figure(figsize=(20, 25), constrained_layout=True)
    fig.set_constrained_layout_pads(w_pad=2.0, h_pad=2.0)

    # 7 rows, row #1 is triple height:
    outer = fig.add_gridspec(nrows=7, ncols=1,
                             height_ratios=[3,1,1,1,1,1,1],
                             hspace=0.4)

    # -------------------------------------------------------------------------
    # ROW 1 (4×8 subplots): "Lipizone across sections"
    # -------------------------------------------------------------------------
    gs_r1 = outer[0].subgridspec(4, 8, wspace=0.05, hspace=0.05)
    sections = atlas['Section'].unique()
    for i, sec in enumerate(sections[:32]):
        ax_spatial = fig.add_subplot(gs_r1[i])
        sec_data = atlas[(atlas["Section"] == sec) & (atlas['lipizone_names'] == LEV)]
        ax_spatial.scatter(sec_data['z_index'], -sec_data['y_index'],
                           c='red', s=5.0/SF, rasterized=True, edgecolors='none')
        sec_bound = atlas[(atlas["Section"] == sec) & (atlas['boundary'] == 1)]
        ax_spatial.scatter(sec_bound['z_index'], -sec_bound['y_index'],
                           c='black', s=0.5/SF, rasterized=True, edgecolors='none', alpha=0.9)
        ax_spatial.set_aspect('equal')
        ax_spatial.axis('off')

    # -------------------------------------------------------------------------
    # ROW 2 (1×2): "Lipizone zoom-in" + "Subclass zoom-in"
    # -------------------------------------------------------------------------
    gs_r2 = outer[1].subgridspec(1, 2, wspace=0.3)
    ax_zoom = fig.add_subplot(gs_r2[0])
    ax_subclass = fig.add_subplot(gs_r2[1])

    main_data = filtered_lev[filtered_lev["Section"] == most_common_section]
    main_bound = atlas[(atlas["Section"] == most_common_section) & (atlas['boundary'] == 1)]
    ax_zoom.scatter(main_data['z_index'], -main_data['y_index'],
                    c='red', s=10/SF, edgecolors='none', rasterized=True)
    ax_zoom.scatter(main_bound['z_index'], -main_bound['y_index'],
                    c='black', s=2/SF, edgecolors='none', rasterized=True)
    ax_zoom.set_aspect('equal')
    ax_zoom.axis('off')
    ax_zoom.set_title("Lipizone zoom-in", fontsize=10)

    subclass_data = atlas[(atlas['level_5'] == SUBCLASS) & (atlas["Section"] == most_common_section)]
    ax_subclass.scatter(subclass_data['z_index'], -subclass_data['y_index'],
                        c=subclass_data['lipizone_color'], s=4/SF, edgecolors='none', rasterized=True)
    ax_subclass.scatter(main_bound['z_index'], -main_bound['y_index'],
                        c='black', s=2/SF, edgecolors='none', rasterized=True)
    ax_subclass.set_aspect('equal')
    ax_subclass.axis('off')
    ax_subclass.set_title("Subclass zoom-in", fontsize=10)

    # -------------------------------------------------------------------------
    # ROW 3 (1×4): "t-SNE," "AP axis," "DV axis," "ML axis"
    # -------------------------------------------------------------------------
    gs_r3 = outer[2].subgridspec(1, 4, wspace=0.4)
    ax_TSNE   = fig.add_subplot(gs_r3[0])
    ax_AP     = fig.add_subplot(gs_r3[1])
    ax_DV     = fig.add_subplot(gs_r3[2])
    ax_ML     = fig.add_subplot(gs_r3[3])

    tesneat = tsne.loc[atlas.index, :]
    ax_TSNE.scatter(tesneat.iloc[:, 0], tesneat.iloc[:, 1],
                    c="gray", s=0.0005, alpha=0.5, rasterized=True)
    sub_idx = atlas.index[atlas['lipizone_names'] == LEV]
    ax_TSNE.scatter(tesneat.loc[sub_idx, 0], tesneat.loc[sub_idx, 1],
                    c=COLOR, s=0.005, alpha=0.5, rasterized=True)
    for spine in ax_TSNE.spines.values():
        spine.set_visible(False)
    ax_TSNE.set_xticks([])
    ax_TSNE.set_yticks([])
    ax_TSNE.set_title("t-SNE", fontsize=10)

    sns.histplot(data=subdata['Section'], bins=50, stat='density', ax=ax_AP, rasterized=True)
    sns.kdeplot(data=subdata['Section'], color='red', ax=ax_AP, rasterized=True)
    for spine in ['top','right','left','bottom']:
        ax_AP.spines[spine].set_visible(False)
    ax_AP.tick_params(bottom=False, left=False, labelbottom=False, labelleft=False)
    ax_AP.set_title("AP axis", fontsize=10)

    sns.histplot(data=subdata['zccf'], bins=50, stat='density', ax=ax_DV, rasterized=True)
    sns.kdeplot(data=subdata['zccf'], color='red', ax=ax_DV, rasterized=True)
    for spine in ['top','right','left','bottom']:
        ax_DV.spines[spine].set_visible(False)
    ax_DV.tick_params(bottom=False, left=False, labelbottom=False, labelleft=False)
    ax_DV.set_title("DV axis", fontsize=10)

    sns.histplot(data=subdata['yccf'], bins=50, stat='density', ax=ax_ML, rasterized=True)
    sns.kdeplot(data=subdata['yccf'], color='red', ax=ax_ML, rasterized=True)
    for spine in ['top','right','left','bottom']:
        ax_ML.spines[spine].set_visible(False)
    ax_ML.tick_params(bottom=False, left=False, labelbottom=False, labelleft=False)
    ax_ML.set_title("ML axis", fontsize=10)

    # -------------------------------------------------------------------------
    # ROW 4 (1×3): "Abundance," "Male vs Female," "Repro scatter"
    # -------------------------------------------------------------------------
    gs_r4 = outer[3].subgridspec(1, 3, wspace=0.4)
    ax_ABUND  = fig.add_subplot(gs_r4[0])
    ax_MF     = fig.add_subplot(gs_r4[1])
    ax_REPRO  = fig.add_subplot(gs_r4[2])

    medianvc = atlas['lipizone_names'].value_counts().median()
    n = subdata.shape[0]
    bar_names = ['Median abundance', 'This lipizone']
    bar_lengths = [medianvc, n]
    bar_colors = ['yellow', 'black']
    ax_ABUND.barh(bar_names, bar_lengths, color=bar_colors, rasterized=True)
    for spine in ['top','right','left','bottom']:
        ax_ABUND.spines[spine].set_visible(False)
    ax_ABUND.tick_params(left=False, bottom=False)
    ax_ABUND.set_title("Abundance", fontsize=10)

    sizes = [propmales_vs_females.loc[COLOR], 1 - propmales_vs_females.loc[COLOR]]
    ax_MF.pie(sizes, colors=['darkblue', 'pink'], startangle=90)
    ax_MF.set_title("Male vs Female", fontsize=10)
    ax_MF.axis('equal')

    x_values = centroids_first.loc[CLUSTER[:11], :].values.flatten()
    y_values = centroids_second.loc[CLUSTER[:11], :].values.flatten()
    pearson_r, _ = pearsonr(x_values, y_values)
    ax_REPRO.scatter(x_values, y_values, s=2, color='darkorange', rasterized=True)
    ax_REPRO.set_xlabel("Brain Atlas", fontsize=8)
    ax_REPRO.set_ylabel("Second Brain Atlas", fontsize=8)
    ax_REPRO.set_title(f"R = {pearson_r:.3f}", fontsize=10)
    for spine in ['top','right']:
        ax_REPRO.spines[spine].set_visible(False)

    # -------------------------------------------------------------------------
    # ROW 5 (1×2): "Lipid markers," "lipiMap programs"
    # -------------------------------------------------------------------------
    gs_r5 = outer[4].subgridspec(1, 2, wspace=0.4)
    ax_markers = fig.add_subplot(gs_r5[0])
    ax_programs = fig.add_subplot(gs_r5[1])

    markers = enrichments.loc[LEV, :].sort_values(ascending=False)[:5]
    lipids_markers = markers.index
    vals_markers = markers.values
    dot_sizes_markers = vals_markers * 75
    dot_colors_markers = [df.loc[lipid, "color"] for lipid in lipids_markers]
    ax_markers.scatter([0]*len(lipids_markers), range(len(lipids_markers)),
                       s=dot_sizes_markers, c=dot_colors_markers,
                       alpha=0.8, rasterized=True)
    ax_markers.set_yticks(range(len(lipids_markers)))
    ax_markers.set_yticklabels(lipids_markers)
    ax_markers.set_xticks([])
    ax_markers.set_ylim(-0.5, len(lipids_markers)+0.5)
    for spine in ax_markers.spines.values():
        spine.set_visible(False)
    ax_markers.set_title("Lipid markers", fontsize=10)

    markersP = enrichmentsP.loc[LEV, :].sort_values(ascending=False)[:5]
    lipids_prog = markersP.index
    vals_prog = markersP.values
    dot_sizes_prog = vals_prog * 75
    ax_programs.scatter([0]*len(lipids_prog), range(len(lipids_prog)),
                        s=dot_sizes_prog, c="black", alpha=0.8, rasterized=True)
    ax_programs.set_yticks(range(len(lipids_prog)))
    ax_programs.set_yticklabels(lipids_prog)
    ax_programs.set_xticks([])
    ax_programs.set_ylim(-0.5, len(lipids_prog)+0.5)
    for spine in ax_programs.spines.values():
        spine.set_visible(False)
    ax_programs.set_title("lipiMap programs", fontsize=10)

    # -------------------------------------------------------------------------
    # ROW 6 (1×4): "Allen acronyms," "colocalizing cell types,"
    #             "colocalizing proportions," "Lipizone neighborhood"
    # -------------------------------------------------------------------------
    gs_r6 = outer[5].subgridspec(1, 4, wspace=0.4)
    ax_ACRON  = fig.add_subplot(gs_r6[0])
    ax_CT     = fig.add_subplot(gs_r6[1])
    ax_CTCAKE = fig.add_subplot(gs_r6[2])
    ax_NEIGH  = fig.add_subplot(gs_r6[3])

    markacro = ACROnormalized_df.loc[:, LEV].sort_values(ascending=False)[:10]
    lipids_acron = markacro.index
    vals_acron = markacro.values
    ax_ACRON.scatter([0]*len(lipids_acron), range(len(lipids_acron)),
                     s=vals_acron / 10,
                     c=[allencolors.loc[lipid, "allencolor"] for lipid in lipids_acron],
                     alpha=0.8, rasterized=True)
    ax_ACRON.set_yticks(range(len(lipids_acron)))
    ax_ACRON.set_yticklabels(lipids_acron)
    ax_ACRON.set_xticks([])
    ax_ACRON.set_ylim(-0.5, len(lipids_acron)-0.5)
    for spine in ax_ACRON.spines.values():
        spine.set_visible(False)
    ax_ACRON.set_title("Enriched Allen acronyms", fontsize=10)

    markacro_ct = CTnormalized_df.loc[LEV, :].sort_values(ascending=False)[:10]
    new_labels = markacro_ct.index.str.split('=').str[1]
    vals_ct = markacro_ct.values
    ax_CT.scatter([0]*len(new_labels), range(len(new_labels)),
                  s=vals_ct / 10, c="gray", alpha=0.8, rasterized=True)
    ax_CT.set_yticks(range(len(new_labels)))
    ax_CT.set_yticklabels(new_labels)
    ax_CT.set_xticks([])
    ax_CT.set_ylim(-0.5, len(new_labels)-0.5)
    for spine in ax_CT.spines.values():
        spine.set_visible(False)
    ax_CT.set_title("Enriched colocalizing cell types", fontsize=10)

    celltypesneighborhood = ctscores.loc[LEV, :]
    top_5_mask = celltypesneighborhood.nlargest(5).index
    all_labels = [label if label in top_5_mask else '' for label in celltypesneighborhood.index]
    wedges, texts = ax_CTCAKE.pie(
        celltypesneighborhood,
        labels=all_labels,
        colors=[color_dict[x] for x in celltypesneighborhood.index],
        wedgeprops=dict(edgecolor='w')
    )
    adjust_text([txt for txt in texts if txt.get_text() != ''], ax=ax_CTCAKE)
    ax_CTCAKE.axis('equal')
    ax_CTCAKE.set_title("Colocalizing proportions", fontsize=10)

    dd3 = atlas.copy()
    xyz_indexes = subdata[['Section','y_index','z_index']].copy()
    xyz_indexes['index'] = (xyz_indexes['Section'].astype(str) + "_" +
                            xyz_indexes['y_index'].astype(str) + "_" +
                            xyz_indexes['z_index'].astype(str))
    xyz_indexes.index = xyz_indexes['index']
    neighbors_list = []
    for _, row in xyz_indexes.iterrows():
        for dy in [-1, 0, 1]:
            for dz in [-1, 0, 1]:
                neighbors_list.append({
                    'Section': row['Section'],
                    'y_index': row['y_index'] + dy,
                    'z_index': row['z_index'] + dz
                })
    neighbors_df = pd.DataFrame(neighbors_list)
    neighbors_df['index'] = (neighbors_df['Section'].astype(str) + "_" +
                             neighbors_df['y_index'].astype(str) + "_" +
                             neighbors_df['z_index'].astype(str))
    neighbors_df.index = neighbors_df['index']
    neighbors_df = neighbors_df.loc[~neighbors_df.index.isin(xyz_indexes.index), :]
    dd3['index'] = (dd3['Section'].astype(str) + "_" +
                    dd3['y_index'].astype(str) + "_" +
                    dd3['z_index'].astype(str))
    dd3.index = dd3['index']
    dd3 = dd3.loc[dd3.index.isin(neighbors_df.index), :]
    dd3 = dd3[dd3['lipizone_names'] != LEV]
    unique_labels = dd3['lipizone_names'].value_counts().index.to_numpy()
    counts = dd3['lipizone_names'].value_counts().to_numpy()
    proportions = counts / counts.sum()
    colors_neigh = dd3['lipizone_color'].value_counts().index.to_numpy()
    top_5_indices = proportions.argsort()[-5:][::-1] 
    label_list = [label if i in top_5_indices else '' 
                  for i, label in enumerate(unique_labels)]
    wedges, texts = ax_NEIGH.pie(proportions,
                                 labels=label_list,
                                 colors=colors_neigh,
                                 wedgeprops=dict(edgecolor='w'))
    ax_NEIGH.axis('equal')
    ax_NEIGH.set_title("Lipizone neighborhood", fontsize=10)

    # -------------------------------------------------------------------------
    # ROW 7 (1×3): "Top marker genes," "Significant GO terms," "Connectome"
    # -------------------------------------------------------------------------
    gs_r7 = outer[6].subgridspec(1, 3, wspace=0.4)
    ax_GEXPR       = fig.add_subplot(gs_r7[0])
    ax_GONT        = fig.add_subplot(gs_r7[1])
    ax_CONNECTOME  = fig.add_subplot(gs_r7[2])

    gexpr_text = ", ".join(lipitomark[CLUSTER][:10])
    wrapped_gexpr_text = textwrap.fill(gexpr_text, width=50)
    ax_GEXPR.text(0.01, 0.99, 
                  wrapped_gexpr_text,
                  transform=ax_GEXPR.transAxes,
                  ha='left', va='top')
    ax_GEXPR.axis('off')
    ax_GEXPR.set_title("Top marker genes", fontsize=10)

    ontologies = GOres_bylipi.loc[GOres_bylipi['lipizone'] == CLUSTER, 'GO_name']
    wrapped_gont_text = textwrap.fill(" ".join(ontologies), width=50)
    ax_GONT.text(0.01, 0.99,
                 wrapped_gont_text,
                 transform=ax_GONT.transAxes,
                 ha='left', va='top')
    ax_GONT.axis('off')
    ax_GONT.set_title("Significant GO terms", fontsize=10)

    maerkersC = connectome.loc[LEV, :].sort_values(ascending=False)[:5]
    lipids_conn = maerkersC.index
    vals_conn = maerkersC.values
    ax_CONNECTOME.scatter(range(len(lipids_conn)), [0]*len(lipids_conn),
                          s=vals_conn * 300, c="black", alpha=0.8,
                          rasterized=True)
    ax_CONNECTOME.set_xticks(range(len(lipids_conn)))
    ax_CONNECTOME.set_xticklabels(lipids_conn, rotation=45)
    ax_CONNECTOME.set_yticks([])
    ax_CONNECTOME.set_xlim(-0.5, len(lipids_conn)-0.5)
    for spine in ax_CONNECTOME.spines.values():
        spine.set_visible(False)
    ax_CONNECTOME.set_title("Top connectomic streams", fontsize=10)

    # -------------------------------------------------------------------------
    # Make the suptitle bigger and reduce top blank space
    # -------------------------------------------------------------------------
    fig.suptitle(f"{LEV} (subclass: {SUBCLNAME})", fontsize=24)
    # Move suptitle closer to the top edge (default is ~0.95)
    fig.subplots_adjust(top=0.92)  

    def clean_filenamePD(name):
        # Replace / and other problematic characters with an underscore
        return re.sub(r'[\\/:"<>|?]', '_', str(name))

    safe_LEV = clean_filenamePD(LEV)
    output_path = f"./ID_cards/lipizone_ID_card_{safe_LEV}.pdf"
    
    fig.savefig(output_path, dpi=300)
    plt.close(fig)
