In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scanpy as sc
import anndata
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42

atlas = pd.read_parquet("./zenodo/maindata_2.parquet")
fractionsacro_leidenmerfish = pd.read_csv("./zenodo/csv/fractionsacro_leidenmerfish.csv", index_col=0)

## Determine the unique spatial patterns of cortical lipizones

In [None]:
import pickle

file_path = './zenodo/mixed/allen_name_to_annots.pkl'

with open(file_path, 'rb') as file:
    allen_name_to_annots = pickle.load(file)

divisions = ['Olfactory areas', 'Isocortex', 'Hippocampal formation', 'Cortical subplate', 'Striatum', 'Pallidum', 'Thalamus', 'Hypothalamus', 'Midbrain', 'Hindbrain', 'Cerebellum', 'fiber tracts', 'ventricular systems']#, ventricular systems']

atlas['division'] = "General"
for i in divisions:
    atlas['division'][atlas['id'].isin(allen_name_to_annots[i])] = i
    
atlas['division'].value_counts()

In [None]:
atlas = atlas.loc[atlas['Sample'] == "ReferenceAtlas",:]

midp = atlas['zccf'].mean()

data = atlas.loc[(atlas['division'] == "Isocortex") & (atlas['zccf'] > midp),:]

data = data.loc[data['Section'] > 4,:] #### anterior sections are a bit their own world

unique_sections = data["Section"].unique()

In [None]:
# select AP positions and --> ALL layers
focus = data[data["Section"].isin([9, 11, 12])]

# keep only abundant lipizones
unique_colors = focus["lipizone_color"].value_counts().index[focus["lipizone_color"].value_counts() > 150]
focus = focus.loc[focus['lipizone_color'].isin(unique_colors),:]

# find clusters of colocalizing lipizones (organizational archetypes)
cmat = pd.crosstab(focus['acronym'], focus['lipizone_color'])
normalized_df1 = cmat / cmat.sum() # fraction 
normalized_df1 = (normalized_df1.T / normalized_df1.T.mean()).T
cmat = pd.crosstab(focus['acronym'], focus['lipizone_color']).T
normalized_df2 = cmat / cmat.sum() # fraction 
normalized_df2 = (normalized_df2.T / normalized_df2.T.mean())
normalized_df = normalized_df1 * normalized_df2
tc = normalized_df.T
adata = anndata.AnnData(X=tc)
sc.pp.neighbors(adata, use_rep='X')
sc.tl.leiden(adata, resolution=2.0)
cluster_labels = adata.obs['leiden']

# plot in groups to eyeball patterns
color_to_cluster = pd.Series(cluster_labels.values, index=cluster_labels.index).to_dict()
focus['leiden_cluster'] = focus['lipizone_color'].map(color_to_cluster)
unique_clusters = sorted(focus['leiden_cluster'].unique())
sections = focus["Section"].unique()

focus = focus.loc[focus["Section"] == 11,:]

n_rows = 3
n_cols = 6
fig, axs = plt.subplots(n_rows, n_cols, figsize=(n_cols * 4, n_rows * 4))
axs = axs.flatten()  
for cluster_idx, cluster in enumerate(unique_clusters):
    if cluster_idx < len(axs): 
        cleancandidates = []
        
        cluster_colors = focus[focus['leiden_cluster'] == cluster]['lipizone_color'].unique()
        
        a1 = normalized_df.T
        a2 = cluster_labels
        findacro = a1.groupby(a2).mean()
        candidates = findacro.loc[cluster,:].sort_values()[::-1]
        candidates = candidates[candidates > 36].index.values.astype(str) #25
        
        """
        for acronym in candidates:
            matching_structures = [s for s in structure_graph if s['acronym'] == acronym]
            if matching_structures:
                cleancandidates.append(matching_structures[0]['name'])
            else:
                cleancandidates.append(acronym)
        """      
        titlenow = ' + '.join(candidates)
        
        # Plot for section 11
        for color in cluster_colors:
            color_section = focus[focus['lipizone_color'] == color]
            axs[cluster_idx].scatter(
                color_section['z_index'], 
                -color_section['y_index'],
                c=color, 
                s=7,
                alpha=1, 
                zorder=1, 
                rasterized=True
            )
            
        filtered_section_contour = focus.loc[focus['boundary'] == 1, :]
        axs[cluster_idx].scatter(
            filtered_section_contour['z_index'], 
            -filtered_section_contour['y_index'],
            c='black', 
            s=0.5, 
            rasterized=True, 
            zorder=2, 
            alpha=0.5
        )
        
        axs[cluster_idx].set_aspect('equal')
        axs[cluster_idx].axis('off')
        axs[cluster_idx].set_title(cluster, fontsize=15)
for idx in range(len(unique_clusters), len(axs)):
    fig.delaxes(axs[idx])

plt.tight_layout()
plt.show()

In [None]:
def showcortexlayer_groupedbyspace(SELECTED_SECTIONS):
    # select AP positions and --> ALL layers
    focus = data[data["Section"].isin(SELECTED_SECTIONS)]
    
    # keep only abundant lipizones
    unique_colors = focus["lipizone_color"].value_counts().index[focus["lipizone_color"].value_counts() > 150]
    focus = focus.loc[focus['lipizone_color'].isin(unique_colors),:]

    # find clusters of colocalizing lipizones (organizational archetypes)
    cmat = pd.crosstab(focus['acronym'], focus['lipizone_color'])
    normalized_df1 = cmat / cmat.sum() # fraction 
    normalized_df1 = (normalized_df1.T / normalized_df1.T.mean()).T
    cmat = pd.crosstab(focus['acronym'], focus['lipizone_color']).T
    normalized_df2 = cmat / cmat.sum() # fraction 
    normalized_df2 = (normalized_df2.T / normalized_df2.T.mean())
    normalized_df = normalized_df1 * normalized_df2
    tc = normalized_df.T
    adata = anndata.AnnData(X=tc)
    sc.pp.neighbors(adata, use_rep='X')
    sc.tl.leiden(adata, resolution=2.0)
    cluster_labels = adata.obs['leiden']

    # plot in groups to eyeball patterns
    color_to_cluster = pd.Series(cluster_labels.values, index=cluster_labels.index).to_dict()
    focus['leiden_cluster'] = focus['lipizone_color'].map(color_to_cluster)
    unique_clusters = sorted(focus['leiden_cluster'].unique())
    sections = focus["Section"].unique()
    
    n_cols = len(SELECTED_SECTIONS)
    
    fig, axs = plt.subplots(len(unique_clusters), n_cols, figsize=(n_cols * 6, len(unique_clusters) * 6))

    for cluster_idx, cluster in enumerate(unique_clusters):
        cleancandidates = []
        
        cluster_colors = focus[focus['leiden_cluster'] == cluster]['lipizone_color'].unique()
        
        a1 = normalized_df.T
        a2 = cluster_labels
        findacro = a1.groupby(a2).mean()

        candidates = findacro.loc[cluster,:].sort_values()[::-1]
        candidates = candidates[candidates > 36].index.values.astype(str)
        
        i = 0
        """
        for acronym in candidates:
            matching_structures = [s for s in structure_graph if s['acronym'] == acronym]
            if matching_structures:
                cleancandidates.append(matching_structures[0]['name'])
            else:
                cleancandidates.append(acronym)
            i = i+1
        print(cleancandidates)
        titlenow= ' + '.join(candidates)
        """
        for section_idx, section_value in enumerate(sections):
            section = focus[focus["Section"] == section_value]

            for color in cluster_colors:
                color_section = section[section['lipizone_color'] == color]

                axs[cluster_idx, section_idx].scatter(
                    color_section['z_index'], 
                    -color_section['y_index'],
                    c=color, 
                    s=10,
                    alpha=1, 
                    zorder=1, 
                    rasterized=True
                )

            filtered_section_contour = section.loc[section['boundary'] == 1, :]
            axs[cluster_idx, section_idx].scatter(
                filtered_section_contour['z_index'], 
                -filtered_section_contour['y_index'],
                c='black', 
                s=2, 
                rasterized=True, 
                zorder=2, 
                alpha=0.9
            )

            axs[cluster_idx, section_idx].set_aspect('equal')
            axs[cluster_idx, section_idx].axis('off')

            """
            colors_str = ', '.join(cluster_colors)
            if section_idx == 1:
                axs[cluster_idx, section_idx].set_title(
                    titlenow, 
                    fontsize=20
                )
            """

    plt.tight_layout()
    plt.show()
    
SELECTED_SECTIONS = [9.0, 11.0, 12.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0]
showcortexlayer_groupedbyspace(SELECTED_SECTIONS) 

## Zoom in on layer 6 to see intralayer stratification

In [None]:
SELECTED_SECTIONS = [9.0, 11.0, 12.0]
LA = "6a"
LB = "6b"

from matplotlib.backends.backend_pdf import PdfPages

# select AP positions and layers of interest
focus = data[data["Section"].isin(SELECTED_SECTIONS)]
focus = focus[focus['acronym'].str.endswith(LA, na=False) | focus['acronym'].str.endswith(LB, na=False)]
# keep only abundant lipizones
unique_colors = focus["lipizone_color"].value_counts().index[focus["lipizone_color"].value_counts() > 100]
focus = focus.loc[focus['lipizone_color'].isin(unique_colors),:]
# find clusters of colocalizing lipizones (organizational archetypes)
cmat = pd.crosstab(focus['acronym'], focus['lipizone_color'])
normalized_df1 = cmat / cmat.sum() # fraction 
normalized_df1 = (normalized_df1.T / normalized_df1.T.mean()).T
cmat = pd.crosstab(focus['acronym'], focus['lipizone_color']).T
normalized_df2 = cmat / cmat.sum() # fraction 
normalized_df2 = (normalized_df2.T / normalized_df2.T.mean())
normalized_df = normalized_df1 * normalized_df2
tc = normalized_df.T
adata = anndata.AnnData(X=tc)
sc.pp.neighbors(adata, use_rep='X')
sc.tl.leiden(adata, resolution=2.0)
cluster_labels = adata.obs['leiden']
# plot in groups to eyeball patterns
color_to_cluster = pd.Series(cluster_labels.values, index=cluster_labels.index).to_dict()
focus['leiden_cluster'] = focus['lipizone_color'].map(color_to_cluster)
unique_clusters = sorted(focus['leiden_cluster'].unique())
sections = focus["Section"].unique()

for cluster_idx, cluster in enumerate(unique_clusters):
    fig, axs = plt.subplots(1, 3, figsize=(18, 6))
    cluster_colors = focus[focus['leiden_cluster'] == cluster]['lipizone_color'].unique()

    for section_idx, section_value in enumerate(sections):
        section = focus[focus["Section"] == section_value]
        for color in cluster_colors:
            color_section = section[section['lipizone_color'] == color]
            axs[section_idx].scatter(
                color_section['z_index'], 
                -color_section['y_index'],
                c=color, 
                s=10,
                alpha=1, 
                zorder=1, 
                rasterized=True
            )
        filtered_section_contour = section.loc[section['boundary'] == 1, :]
        axs[section_idx].scatter(
            filtered_section_contour['z_index'], 
            -filtered_section_contour['y_index'],
            c='black', 
            s=2, 
            rasterized=True, 
            zorder=2, 
            alpha=0.9
        )
        axs[section_idx].set_aspect('equal')
        axs[section_idx].axis('off')
        colors_str = ', '.join(cluster_colors)

    plt.tight_layout()

    plt.suptitle(cluster)
    plt.show()

In [None]:
# a function to check for differential lipids between two groups

from scipy.stats import mannwhitneyu, entropy
import matplotlib.pyplot as plt
from tqdm import tqdm
from statsmodels.stats.multitest import multipletests
from tqdm import tqdm

def differential_lipids(lipidata, kmeans_labels, min_fc=0.2, pthr=0.05):
    results = []

    a = lipidata.loc[kmeans_labels == 0,:]
    b = lipidata.loc[kmeans_labels == 1,:]
    
    for rrr in range(lipidata.shape[1]):
       
        groupA = a.iloc[:,rrr]
        groupB = b.iloc[:,rrr]
    
        # log2 fold change
        meanA = np.mean(groupA) + 0.00000000001
        meanB = np.mean(groupB) + 0.00000000001
        log2fold_change = np.log2(meanB / meanA) if meanA > 0 and meanB > 0 else np.nan
    
        # Wilcoxon test
        try:
            _, p_value = mannwhitneyu(groupA, groupB, alternative='two-sided')
        except ValueError:
            p_value = np.nan
    
        results.append({'lipid': rrr, 'log2fold_change': log2fold_change, 'p_value': p_value})

    results_df = pd.DataFrame(results)

    # correct for multiple testing
    reject, pvals_corrected, _, _ = multipletests(results_df['p_value'].values, alpha=0.05, method='fdr_bh')
    results_df['p_value_corrected'] = pvals_corrected
    
    return results_df

In [None]:
lipidata = focus.iloc[:, :173]
labels = focus['leiden_cluster'].isin(["1","5","6"])
print(labels.value_counts())

dl_bottom = differential_lipids(lipidata, labels)
dl_bottom.index = lipidata.columns
dl_bottom = dl_bottom.sort_values(by='log2fold_change')[::-1]
dl_bottom

In [None]:
lipidata = focus.iloc[:, :173]
labels = focus['leiden_cluster'].isin(["9","7"])
print(labels.value_counts())

dl_middle = differential_lipids(lipidata, labels)
dl_middle.index = lipidata.columns
dl_middle = dl_middle.sort_values(by='log2fold_change')[::-1]
dl_middle[:20]

In [None]:
lipidata = focus.iloc[:, :173]
labels = focus['leiden_cluster'].isin(["2"])
print(labels.value_counts())

dl_top = differential_lipids(lipidata, labels)
dl_top.index = lipidata.columns
dl_top = dl_top.sort_values(by='log2fold_change')[::-1]
dl_top

In [None]:
lipidata = focus.iloc[:, :173]
labels = focus['leiden_cluster'].isin(["0"])
print(labels.value_counts())

dl_lateral = differential_lipids(lipidata, labels)
dl_lateral.index = lipidata.columns
dl_lateral = dl_lateral.sort_values(by='log2fold_change')[::-1]
dl_lateral

In [None]:
for LIPID in np.concatenate((dl_bottom.index[:3].values, dl_middle.index[:3].values, dl_top.index[:3].values, dl_lateral.index[:3].values, dl_bottom.index[-3:].values, dl_middle.index[-3:].values, dl_top.index[-3:].values, dl_lateral.index[-3:].values)):

    fig, axs = plt.subplots(1, 3, figsize=(18, 6))

    for section_idx, section_value in enumerate(sections):
        section = focus[focus["Section"] == section_value]
        axs[section_idx].scatter(
            section['z_index'], 
            -section['y_index'],
            c=section[LIPID], vmin = np.percentile(focus[LIPID], 5), vmax = np.percentile(focus[LIPID], 95), cmap="plasma",
            s=20,
            alpha=0.8, 
            zorder=1, 
            rasterized=True
        )

        axs[section_idx].set_aspect('equal')
        axs[section_idx].axis('off')
        colors_str = ', '.join(cluster_colors)

    plt.tight_layout()
    plt.suptitle(LIPID, fontsize=30)
    plt.show()

## Compare the MERFISH spatial distributions with the MALDI-MSI spatial distributions

In [None]:
cmat = pd.crosstab(focus['acronym'], focus['leiden_cluster'])
normalized_df1 = cmat / cmat.sum()
fractionsacro_leidenmaldi = normalized_df1
fractionsacro_leidenmaldi

In [None]:
fractionsacro_leidenmerfish = fractionsacro_leidenmerfish.loc[fractionsacro_leidenmerfish.index.isin(fractionsacro_leidenmaldi.index),:]
fractionsacro_leidenmerfish

In [None]:
fractionsacro_leidenmaldi = fractionsacro_leidenmaldi.loc[fractionsacro_leidenmerfish.index,:]

In [None]:
fractionsacro_leidenmaldi = fractionsacro_leidenmaldi.T
fractionsacro_leidenmerfish = fractionsacro_leidenmerfish.T
fractionsacro_leidenmaldi.index = "MALDI_" + fractionsacro_leidenmaldi.index
fractionsacro_leidenmerfish.index = "MERFISH_" + fractionsacro_leidenmerfish.index

In [None]:
df1 = fractionsacro_leidenmaldi.copy()
df2 = fractionsacro_leidenmerfish.copy()

df1_viz = df1.copy()
df1_viz['dataset'] = 'Dataset1'
df2_viz = df2.copy()
df2_viz['dataset'] = 'Dataset2'

combined_df = pd.concat([df1_viz, df2_viz], axis=0)
features = combined_df.drop(columns=['dataset']).values
pca = PCA(n_components=2, random_state=42)
pca_result = pca.fit_transform(features)
combined_df['PCA1'] = pca_result[:, 0]
combined_df['PCA2'] = pca_result[:, 1]

n_clusters = 10 
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
combined_df['cluster'] = kmeans.fit_predict(features)

In [None]:
from adjustText import adjust_text
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(10, 8))
scatter = sns.scatterplot(
    data=combined_df, 
    x='PCA1', y='PCA2',
    hue='dataset',
    palette='deep',
    s=100
)

texts = []
for idx, row in combined_df.iterrows():
    texts.append(plt.text(row['PCA1'], row['PCA2'], row.name))

adjust_text(texts, arrowprops=dict(arrowstyle='->', color='black', lw=0.5))

plt.title("PCA of Combined Cell Type Distributions")
plt.xlabel("PCA Component 1")
plt.ylabel("PCA Component 2")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.tight_layout()
plt.show()

## Study the lipid distributions underlying the observations

In [None]:
# select AP positions and --> ALL layers
focus = data[data["Section"].isin([9, 11, 12])]

# keep only abundant lipizones
unique_colors = focus["lipizone_color"].value_counts().index[focus["lipizone_color"].value_counts() > 150]
focus = focus.loc[focus['lipizone_color'].isin(unique_colors),:]

# find clusters of colocalizing lipizones (organizational archetypes)
cmat = pd.crosstab(focus['acronym'], focus['lipizone_color'])
normalized_df1 = cmat / cmat.sum() # fraction 
normalized_df1 = (normalized_df1.T / normalized_df1.T.mean()).T
cmat = pd.crosstab(focus['acronym'], focus['lipizone_color']).T
normalized_df2 = cmat / cmat.sum() # fraction 
normalized_df2 = (normalized_df2.T / normalized_df2.T.mean())
normalized_df = normalized_df1 * normalized_df2
tc = normalized_df.T
adata = anndata.AnnData(X=tc)
sc.pp.neighbors(adata, use_rep='X')
sc.tl.leiden(adata, resolution=2.0)
cluster_labels = adata.obs['leiden']

# plot in groups to eyeball patterns
color_to_cluster = pd.Series(cluster_labels.values, index=cluster_labels.index).to_dict()
focus['leiden_cluster'] = focus['lipizone_color'].map(color_to_cluster)
unique_clusters = sorted(focus['leiden_cluster'].unique())
sections = focus["Section"].unique()

fig, axs = plt.subplots(1, 3, figsize=(18, 6))

secs = focus["Section"].unique()
for i, section_value in enumerate(secs):
    section = focus[focus["Section"] == section_value]

    axs[i].scatter(section['z_index'], -section['y_index'],
                     c=section['lipizone_color'], s=10,
                     alpha=1, zorder=1, rasterized=True)
    
    filtered_section_contour = section.loc[section['boundary'] == 1, :]
    axs[i].scatter(filtered_section_contour['z_index'], -filtered_section_contour['y_index'],
                     c='black', s=2, rasterized=True, zorder=2, alpha=0.9)
    
    axs[i].set_aspect('equal')
    axs[i].axis('off')
    #axs[i].set_title(f'Section {section_value} - Lipizones')

plt.tight_layout()
plt.savefig("cortex_jointly.pdf")
plt.show()

In [None]:
lipidata = focus.iloc[:, :173]
for lei in ['5', '8', '3', '10', '2']:
    print(lei)
    focus['LEI'] = 0
    focus.loc[focus['leiden_cluster'] == lei, 'LEI'] = 1
    print(focus['LEI'].value_counts())

    lipidata = focus.iloc[:, :173]

    difflips = differential_lipids(lipidata, focus['LEI'])
    difflips.index = lipidata.columns

    toplotLEI = np.concatenate((difflips.sort_values('log2fold_change')[-5:].index.values, difflips.sort_values('log2fold_change')[:5].index.values))

    for LIPID in toplotLEI:
        fig, axs = plt.subplots(1, 3, figsize=(18, 6))

        secs = focus["Section"].unique()
        for i, section_value in enumerate(secs):
            section = focus[focus["Section"] == section_value]

            axs[i].scatter(section['z_index'], -section['y_index'],
                             c=section[LIPID], cmap="plasma", vmin = np.percentile(focus[LIPID], 5), vmax = np.percentile(focus[LIPID], 95), s=10,
                             alpha=0.7, zorder=1, rasterized=True)

            filtered_section_contour = section.loc[section['boundary'] == 1, :]
            axs[i].scatter(filtered_section_contour['z_index'], -filtered_section_contour['y_index'],
                             c='black', s=2, rasterized=True, zorder=2, alpha=0.9)

            axs[i].set_aspect('equal')
            axs[i].axis('off')
            #axs[i].set_title(f'Section {section_value} - Lipizones')

        plt.tight_layout()
        plt.show()

In [None]:
focus = focus.loc[focus['Section'] == 12,:]

comat = focus.iloc[:,:173].corr()

from scipy.cluster.hierarchy import linkage, leaves_list

distance = 1 - comat
Z = linkage(distance, method='ward')
leaf_order = leaves_list(Z)
comat_sorted = comat.iloc[leaf_order, leaf_order]
sns.heatmap(comat_sorted, cmap="plasma")
plt.show()

lipids = comat_sorted.index.values

rows, cols = 17, 11
total_plots = rows * cols

fig, axs = plt.subplots(rows, cols, figsize=(cols * 6, rows * 6))
axs = axs.flatten()  # flatten to iterate easily

for ax, lipid in zip(axs, lipids):
    for section_value in focus["Section"].unique():
        section = focus[focus["Section"] == section_value]
        
        ax.scatter(section['z_index'], -section['y_index'],
                   c=section[lipid], cmap="plasma",
                   vmin=np.percentile(focus[lipid], 5),
                   vmax=np.percentile(focus[lipid], 95),
                   s=10, alpha=0.7, zorder=1, rasterized=True)
        
       
        filtered_contour = section[section['boundary'] == 1]
        ax.scatter(filtered_contour['z_index'], -filtered_contour['y_index'],
                   c='black', s=2, alpha=0.9, zorder=2, rasterized=True)
    
    ax.set_aspect('equal')
    ax.axis('off')
    ax.set_title(lipid, fontsize=40)

for ax in axs[len(lipids):]:
    ax.axis('off')

plt.tight_layout()
plt.show()

## Study anatomy-related lipizones

In [None]:
import scipy.cluster.hierarchy as sch

import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42

acronyms = atlas['acronym'].copy()
lipizones = atlas['lipizone'].copy()

acronyms = acronyms.loc[acronyms.isin(acronyms.value_counts().index[acronyms.value_counts() > 500])]
lipizones = lipizones.loc[acronyms.index]

cmat = pd.crosstab(acronyms, lipizones)

normalized_df = cmat / cmat.sum() # fraction 
normalized_df = (normalized_df.T / normalized_df.T.mean()).T ## switch to enrichments
normalized_df1 = normalized_df.copy()
normalized_df1

cmat = pd.crosstab(lipizones, acronyms)
normalized_df = cmat / cmat.sum() 
normalized_df = (normalized_df.T / normalized_df.T.mean()).T 
normalized_df2 = normalized_df.copy().T
normalized_df2

normalized_df = normalized_df2 * normalized_df1
linkage = sch.linkage(sch.distance.pdist(normalized_df.T), method='weighted', optimal_ordering=True)
order = sch.leaves_list(linkage)
normalized_df = normalized_df.iloc[:, order]

order = np.argmax(normalized_df.values, axis=1)
order = np.argsort(order)
normalized_df = normalized_df.iloc[order,:]

plt.figure(figsize=(10, 10))
sns.heatmap(normalized_df, cmap="Purples", cbar_kws={'label': 'Enrichment'}, xticklabels=True, yticklabels=False, vmin = np.percentile(normalized_df, 2), vmax = np.percentile(normalized_df, 98))

plt.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
plt.tick_params(axis='y', which='both', left=False, right=False)

plt.yticks(rotation=0)

plt.tight_layout()
plt.show()

In [None]:
n1 = normalized_df.copy()
normalized_df.loc['SNr',:].sort_values()[::-1][:15]

In [None]:
THR = 100

for TESTACRO in ['SNr']:
    esca = normalized_df.loc[TESTACRO,:].sort_values()[::-1]
    candidatelipizones = esca[esca > THR].index.values
    midp = atlas['zccf'].mean()
    data = atlas.loc[(atlas['zccf'] > midp),:]
    unique_sections = data['Section'].unique()
    data['acronym'] = data['acronym'].fillna("General")

    areas = []
    for section_value in unique_sections:
        section = data[data["Section"] == section_value]
        filtered_section = section.loc[section['acronym'] == TESTACRO, :]
        area = len(filtered_section) 
        areas.append((section_value, area))

    sorted_by_area = sorted(areas, key=lambda x: x[1], reverse=True)[:5]
    top_5_sections = [section for section, _ in sorted_by_area]
    top_5_sections_ordered = [s for s in unique_sections if s in top_5_sections]

    fig, axs = plt.subplots(2, 5, figsize=(30, 12))

    for i, section_value in enumerate(top_5_sections_ordered):
        section = data[data["Section"] == section_value]
        ax_top = axs[0, i]
        filtered_section = section.loc[section['acronym'] == TESTACRO, :]
        ax_top.scatter(filtered_section['z_index'], -filtered_section['y_index'],
                      c=filtered_section['allencolor'], s=5,
                      alpha=1, zorder=1, rasterized=True)
        filtered_section_contour = section.loc[section['boundary'] == 1, :]
        ax_top.scatter(filtered_section_contour['z_index'], -filtered_section_contour['y_index'],
                      c='black', s=2, rasterized=True, zorder=2, alpha=0.9)
        ax_top.set_aspect('equal')
        ax_top.axis('off')
        ax_top.set_title(f'Section {section_value}')
        filtered_section = section.loc[section['lipizone'].isin(candidatelipizones), :]
        ax_middle = axs[1, i]
        ax_middle.scatter(filtered_section['z_index'], -filtered_section['y_index'],
                         c=filtered_section['lipizone_color'], s=10,
                         alpha=1, zorder=1, rasterized=True)
        filtered_section_contour = section.loc[section['boundary'] == 1, :]
        ax_middle.scatter(filtered_section_contour['z_index'], -filtered_section_contour['y_index'],
                         c='black', s=2, rasterized=True, zorder=2, alpha=0.9)
        ax_middle.set_aspect('equal')
        ax_middle.axis('off')
        ax_middle.set_title(f'Section {section_value} - Lipizones')

    plt.suptitle(TESTACRO, fontsize=30)
    plt.tight_layout()

    plt.show()

## Characterize the substantia nigra

In [None]:
section = atlas[atlas["Section"] == atlas['Section'].unique()[14]]

filtered_section = section.loc[(section['level_1'] == 1),:] 
filtered_section = filtered_section.loc[filtered_section['lipizone'] == "121212111000000",:]
filtered_section = filtered_section.loc[filtered_section['y_index'] >150,:]
filtered_section = filtered_section.loc[(filtered_section['z_index'] >125)&(filtered_section['z_index'] <330),:]
filtered_section
plt.scatter(filtered_section['z_index'], -filtered_section['y_index'],
                c=filtered_section['lipizone_color'],s=0.2,
                alpha=1, zorder=1, rasterized=True)  

filtered_section = section.copy()#.loc[(section['level_1'] == 1),:] 
filtered_section = filtered_section.loc[filtered_section['y_index'] >150,:]
filtered_section = filtered_section.loc[(filtered_section['z_index'] >125)&(filtered_section['z_index'] <330),:]
filtered_section_contour = filtered_section.loc[filtered_section['boundary'] == 1,:]
plt.scatter(filtered_section_contour['z_index'], -filtered_section_contour['y_index'],
                c='black', s=0.01, rasterized=True, zorder=2, alpha=0.9)
plt.show()

In [None]:
filtered_section = section.copy()
filtered_section = filtered_section.loc[filtered_section['y_index'] >150,:]
filtered_section = filtered_section.loc[(filtered_section['z_index'] >125)&(filtered_section['z_index'] <330),:]
filtered_section['dopa'] = 0
filtered_section.loc[filtered_section['lipizone'] == "121212111000000",'dopa']=1

filtered_section['dopa'].value_counts()

In [None]:
lipidata = filtered_section.iloc[:, :173]

difflips = differential_lipids(lipidata, filtered_section['dopa'])
difflips.index = lipidata.columns

difflips.sort_values('log2fold_change')[::-1][:5]

In [None]:
for LIPID in difflips.sort_values('log2fold_change')[::-1][:8].index.values:#np.concatenate((difflips.sort_values('log2fold_change')[::-1][:5].index.values, difflips.sort_values('log2fold_change')[:5].index.values)):

    plt.scatter(filtered_section['z_index'], -filtered_section['y_index'],
                    c=filtered_section[LIPID],vmin = np.percentile(filtered_section[LIPID], 5),vmax = np.percentile(filtered_section[LIPID], 95), cmap="plasma",s=10,
                    alpha=1, zorder=1, rasterized=True)
    plt.scatter(filtered_section_contour['z_index'], -filtered_section_contour['y_index'],
                    c='black', s=1, rasterized=True, zorder=2, alpha=1.0)
    plt.title(LIPID)
    plt.show()

## Prepare the tree representation from manual curation

In [None]:
import numpy as np
from scipy.cluster.hierarchy import dendrogram
import matplotlib.pyplot as plt

# Define the leaf labels in the desired order
labels = [
    "Cingulate, striatum, hippocampus,\nmixed cortex and subcortical plate regions",
    "Layers 2/3 and 4",
    "HPF, AMY, CTXsp, HY nuclei",
    "[empty]",
    "Purkinje cells",
    "Layers 2/3 and 4",
    "Entorhinal cortex, CA1, nuclei",
    "Outer and piriform cortex",
    "Retrosplenial and L5 and its boundaries",
    "Layer 5, retrosplenial, dopaminergic,\nhippocampus, visual regions",
    "Mostly noncortical,\ncomplex and widespread GM",
    "Striatum, nuclei, ventricular linings",
    "Layer 5-6, nuclei, granule layer",
    "Layer 5-6, nuclei, granule layer",
    "Granular layer",
    "Layer 6, mixed complex GM,\nnoradrenergic regions"
]

def generate_custom_linkage():
    """
    Generates a custom linkage matrix for 16 leaves arranged in a balanced binary tree,
    with all branches of the same length.
    """
    # Number of original observations (leaves)
    n = 16
    # Initialize an empty linkage matrix with (n-1) rows and 4 columns, dtype=float
    linkage_matrix = np.zeros((n - 1, 4), dtype=float)
    
    # Initialize cluster indices
    current_cluster = n  # Clusters are indexed from n onwards

    # Define pairs to merge at each level
    # Level 1: Merge adjacent leaves
    level1_pairs = [
        (0, 1),
        (2, 3),
        (4, 5),
        (6, 7),
        (8, 9),
        (10, 11),
        (12, 13),
        (14, 15)
    ]
    
    # Assign first 8 merges (Level 1)
    for i, (a, b) in enumerate(level1_pairs):
        linkage_matrix[i, 0] = a
        linkage_matrix[i, 1] = b
        linkage_matrix[i, 2] = 1.0  # Distance for Level 1
        linkage_matrix[i, 3] = 2      # Number of samples in the new cluster

    # Level 2: Merge the clusters formed in Level 1
    level2_pairs = [
        (current_cluster, current_cluster + 1),
        (current_cluster + 2, current_cluster + 3),
        (current_cluster + 4, current_cluster + 5),
        (current_cluster + 6, current_cluster + 7)
    ]
    
    for i, (a, b) in enumerate(level2_pairs, start=8):
        linkage_matrix[i, 0] = a
        linkage_matrix[i, 1] = b
        linkage_matrix[i, 2] = 2.0  # Distance for Level 2
        linkage_matrix[i, 3] = 4      # Number of samples

    # Update current_cluster
    current_cluster += 8

    # Level 3: Merge the clusters formed in Level 2
    level3_pairs = [
        (current_cluster, current_cluster + 1),
        (current_cluster + 2, current_cluster + 3)
    ]
    
    for i, (a, b) in enumerate(level3_pairs, start=12):
        linkage_matrix[i, 0] = a
        linkage_matrix[i, 1] = b
        linkage_matrix[i, 2] = 3.0  # Distance for Level 3
        linkage_matrix[i, 3] = 8      # Number of samples

    # Update current_cluster
    current_cluster += 4

    # Level 4: Final merge to form the root
    linkage_matrix[14, 0] = current_cluster
    linkage_matrix[14, 1] = current_cluster + 1
    linkage_matrix[14, 2] = 4.0      # Distance for Level 4
    linkage_matrix[14, 3] = 16         # Number of samples

    return linkage_matrix

# Generate the custom linkage matrix
linkage_matrix = generate_custom_linkage()

# Verify that the linkage matrix contains floats
assert linkage_matrix.dtype == float, "Linkage matrix must be of float type."

# Create the dendrogram plot
plt.figure(figsize=(14, 10))  # Adjust figure size as needed

dendro = dendrogram(
    linkage_matrix,
    orientation='left',
    color_threshold=0,               # All links colored the same
    above_threshold_color='black',   # Color of the links
    labels=labels,                   # Assign the custom labels
    leaf_font_size=10,               # Adjust font size for readability
    show_leaf_counts=False,          # Do not show leaf counts
    no_labels=False,                 # Show labels
    link_color_func=lambda k: 'black'  # All links in black
)

# Style the plot
ax = plt.gca()
# Remove spines for a cleaner look
for spine in ['top', 'right', 'bottom', 'left']:
    ax.spines[spine].set_visible(False)
# Remove ticks
ax.tick_params(axis='both', which='both', length=0)
# Remove x-ticks
plt.xticks([])
# Adjust y-ticks font size
plt.yticks(fontsize=10)
plt.xlabel('Distance')  # Optionally add an axis label
plt.tight_layout()

# Display the dendrogram
plt.show()


## Study the first partitions in the gray matter

In [None]:
unique_sections = atlas['Section'].unique()

fig, axs = plt.subplots(4, 8, figsize=(32, 16))
axs = axs.flatten()

for i, section_value in enumerate(unique_sections):
    if i >= len(axs):
        break
    ax = axs[i]
    section = atlas[atlas["Section"] == section_value]
    
    filtered_section = section.loc[(section['level_1'] == 2),:] 

    ax.scatter(filtered_section['z_index'], -filtered_section['y_index'],
                    c=filtered_section['level_2'].astype("category").cat.codes, cmap="tab20", s=0.2,
                    alpha=1, zorder=1, rasterized=True)  

    filtered_section_contour = section.loc[section['boundary'] == 1,:]
    ax.scatter(filtered_section_contour['z_index'], -filtered_section_contour['y_index'],
                    c='black', s=0.01, rasterized=True, zorder=2, alpha=0.9)
 
    ax.set_aspect('equal')
    
for ax in axs:
    ax.axis('off') 

plt.tight_layout()
plt.show() 

In [None]:
for i in range(2, 12):
    atlas['level_'+str(i)] = atlas['level_'+str(i-1)].astype(str) + atlas['level_'+str(i)].astype(str)

unique_sections = atlas['Section'].unique()

fig, axs = plt.subplots(4, 8, figsize=(32, 16))
axs = axs.flatten()

for i, section_value in enumerate(unique_sections):
    if i >= len(axs):
        break
    ax = axs[i]
    section = atlas[atlas["Section"] == section_value]
    
    filtered_section = section.loc[(section['level_1'] == 2),:] 

    ax.scatter(filtered_section['z_index'], -filtered_section['y_index'],
                    c=filtered_section['level_3'].astype("category").cat.codes, cmap="tab20", s=0.2,
                    alpha=1, zorder=1, rasterized=True)  

    filtered_section_contour = section.loc[section['boundary'] == 1,:]
    ax.scatter(filtered_section_contour['z_index'], -filtered_section_contour['y_index'],
                    c='black', s=0.01, rasterized=True, zorder=2, alpha=0.9)
 
    ax.set_aspect('equal')
    
for ax in axs:
    ax.axis('off') 

plt.tight_layout()
plt.show() 

## Notice there's a cool double layering at the level of the cerebellar granule cells layer...

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import matplotlib.colors as mcolors

unique_levels = atlas['level_4'].astype("category").cat.categories

tab20_colors = ["blue", "orange", "green", "red", "purple", "brown", "pink", "gray",
                "olive", "cyan", "magenta", "gold", "lime", "navy", "maroon", "teal",
                "coral", "orchid", "indigo", "chartreuse"]

while len(tab20_colors) < len(unique_levels):
    tab20_colors.extend(tab20_colors)

color_mapping = {level: color for level, color in zip(unique_levels, tab20_colors[:len(unique_levels)])}

atlas["tab20col"] = atlas["level_4"].map(color_mapping)

unique_sections = atlas['Section'].unique()

fig, axs = plt.subplots(4, 8, figsize=(32, 16))
axs = axs.flatten()

for i, section_value in enumerate(unique_sections):
    if i >= len(axs):
        break
    ax = axs[i]
    section = atlas[atlas["Section"] == section_value]

    filtered_section = section.loc[(section['level_2'] == "2.02.0") & (section['tab20col'].isin(['teal', 'maroon'])),:]

    ax.scatter(filtered_section['z_index'], -filtered_section['y_index'],
               c=filtered_section['tab20col'], s=0.2, alpha=1, zorder=1, rasterized=True)

    filtered_section_contour = section.loc[section['boundary'] == 1, :]
    ax.scatter(filtered_section_contour['z_index'], -filtered_section_contour['y_index'],
               c='black', s=0.01, rasterized=True, zorder=2, alpha=0.9)

    ax.set_aspect('equal')

for ax in axs:
    ax.axis('off')

plt.tight_layout()
plt.savefig("basecereb.pdf")
plt.show()

In [None]:
plotnow = atlas['Section'].unique()[-5]

fig, axs = plt.subplots(1, 1, figsize=(10, 10))

for i, section_value in enumerate([plotnow]):
    ax = axs
    section = atlas[atlas["Section"] == section_value]

    filtered_section = section.loc[(section['level_2'] == "2.02.0") & (section['tab20col'].isin(['teal', 'maroon'])), :]
    filtered_section = filtered_section.loc[(filtered_section['z_index'] > 300) & (filtered_section['y_index'] < 200),:]

    ax.scatter(filtered_section['z_index'], -filtered_section['y_index'],
               c=["lightblue" if filtered_section['tab20col'][i] == 'teal' else "darkred" for i in range(len(filtered_section))], s=100, alpha=1, zorder=1, rasterized=True)

    filtered_section_contour = section.loc[section['boundary'] == 1, :]
    filtered_section_contour = filtered_section_contour.loc[(filtered_section_contour['z_index'] > 300) & (filtered_section_contour['y_index'] < 200),:]

axs.axis('off')
ax.set_aspect('equal', adjustable='box')
plt.tight_layout()
plt.savefig("doublelayer.pdf")
plt.show()

In [None]:
filtered_sections = [23.0, 24.0, 26.0, 27.0, 28.0, 29.0]
cb = atlas.loc[(atlas['Section'].isin(filtered_sections)) & (atlas['tab20col'].isin(['teal', 'maroon'])),:]
lipidata = cb.iloc[:, :173]
l1 = 'teal'
l2 = 'maroon'

labels = cb.loc[(cb['tab20col'] == l1) | (cb['tab20col'] == l2),'tab20col']
labels[labels == l1] = 1
labels[labels == l2] = 0

difflips = differential_lipids(lipidata, labels)
difflips.index = lipidata.columns

difflips.sort_values('log2fold_change')[:20]

In [None]:
tocolor = np.concatenate((difflips.sort_values('log2fold_change')[:5].index.values, difflips.sort_values('log2fold_change')[-5:].index.values))

for currentLipid in tocolor:

    results = []

    for section in cb['Section'].unique():
        subset = cb[cb['Section'] == section]

        perc_2 = subset[currentLipid].quantile(0.05)
        perc_98 = subset[currentLipid].quantile(0.95)

        results.append([section, perc_2, perc_98])
    percentile_df = pd.DataFrame(results, columns=['Section', '2-perc', '98-perc'])
    med2p = percentile_df['2-perc'].median()
    med98p = percentile_df['98-perc'].median()

    fig, axs = plt.subplots(1, 6, figsize=(24, 4))
    axs = axs.flatten()

    filtered_sections = [23.0, 24.0, 26.0, 27.0, 28.0, 29.0]

    for i, section_value in enumerate(filtered_sections):
        ax = axs[i]
        section = atlas[atlas["Section"] == section_value]

        filtered_section = section.loc[(section['level_2'] == "2.02.0") & (section['tab20col'].isin(['teal', 'maroon'])), :]

        ax.scatter(filtered_section['z_index'], -filtered_section['y_index'],
                   c=filtered_section[currentLipid], cmap="plasma", s=0.2, alpha=1, zorder=1, rasterized=True, vmin=med2p, vmax=med98p)

        filtered_section_contour = section.loc[section['boundary'] == 1, :]
        ax.scatter(filtered_section_contour['z_index'], -filtered_section_contour['y_index'],
                   c='black', s=0.01, rasterized=True, zorder=2, alpha=0.9)

        ax.set_aspect('equal')

    for j in range(len(filtered_sections), len(axs)):
        fig.delaxes(axs[j])

    for ax in axs:
        ax.axis('off')

    plt.suptitle(currentLipid)
    plt.tight_layout()
    plt.show()

In [None]:
plotnow = atlas['Section'].unique()[-5]
currentLipid = "PC 38:5"
fig, axs = plt.subplots(1, 1, figsize=(10, 10)) 

for i, section_value in enumerate([plotnow]):
    ax = axs
    section = atlas[atlas["Section"] == section_value]

    filtered_section = section.loc[(section['level_2'] == "2.02.0") & (section['tab20col'].isin(['teal', 'maroon'])), :]
    filtered_section = filtered_section.loc[(filtered_section['z_index'] > 300) & (filtered_section['y_index'] < 200),:]

    ax.scatter(filtered_section['z_index'], -filtered_section['y_index'],
               c=filtered_section[currentLipid], cmap="plasma", vmin = np.percentile(filtered_section[currentLipid], 5), vmax = np.percentile(filtered_section[currentLipid], 95), s=150, alpha=1, zorder=1, rasterized=True)

    filtered_section_contour = section.loc[section['boundary'] == 1, :]
    filtered_section_contour = filtered_section_contour.loc[(filtered_section_contour['z_index'] > 300) & (filtered_section_contour['y_index'] < 200),:]

axs.axis('off')
ax.set_aspect('equal', adjustable='box')
plt.savefig("pc385.pdf")
plt.tight_layout()
plt.show()

In [None]:
fig, axs = plt.subplots(4, 8, figsize=(32, 16))
axs = axs.flatten()

for i, section_value in enumerate(unique_sections):
    if i >= len(axs):
        break
    ax = axs[i]
    section = atlas[atlas["Section"] == section_value]
    
    filtered_section = section.loc[(section['level_1'] == 2),:] 

    ax.scatter(filtered_section['z_index'], -filtered_section['y_index'],
                    c=filtered_section['level_3'].astype("category").cat.codes, cmap="tab20", s=0.2,
                    alpha=1, zorder=1, rasterized=True)  

    filtered_section_contour = section.loc[section['boundary'] == 1,:]
    ax.scatter(filtered_section_contour['z_index'], -filtered_section_contour['y_index'],
                    c='black', s=0.01, rasterized=True, zorder=2, alpha=0.9)
 
    ax.set_aspect('equal')
    
for ax in axs:
    ax.axis('off') 

plt.tight_layout()
plt.show() 

## Some GM lipizones might be related to circuitry...

In [None]:
unique_sections = atlas['Section'].unique()

for xxxxxx in [
    "2.02.02.01.02.01.01.0",
    "2.02.02.02.02.01.01.0",
    "2.02.01.02.02.01.01.0",
    "2.02.02.02.02.01.02.0",
    "2.02.01.02.01.02.02.0",
    "2.02.01.02.01.02.01.0",
    "2.02.01.02.02.01.02.0",
    "2.02.02.01.02.01.01.0",
    "2.02.02.01.01.02.01.0"
]:
    fig, axs = plt.subplots(4, 8, figsize=(32, 16))
    axs = axs.flatten()

    for i, section_value in enumerate(unique_sections):
        if i >= len(axs):
            break
        ax = axs[i]
        section = atlas[atlas["Section"] == section_value]

        filtered_section = section.loc[(section['level_1'] == 2),:] 
        filtered_section = filtered_section.loc[filtered_section['level_7'] == xxxxxx,:]

        ax.scatter(filtered_section['z_index'], -filtered_section['y_index'],
                        c="red",s=0.2,
                        alpha=1, zorder=1, rasterized=True)  

        filtered_section_contour = section.loc[section['boundary'] == 1,:]
        ax.scatter(filtered_section_contour['z_index'], -filtered_section_contour['y_index'],
                        c='black', s=0.01, rasterized=True, zorder=2, alpha=0.9)

        ax.set_aspect('equal')

    for ax in axs:
        ax.axis('off') 

    plt.suptitle("Subclass: "+str(xxxxxx), fontsize=25)
    plt.tight_layout()
    plt.show() 