In [None]:
from scipy.spatial import Delaunay
import alphashape
import shapely.geometry as geom
import scanpy as sc
import pandas as pd
import numpy as np

# Boundary Defining
##### D7_normal as example, all the other samples were processed in the same way

### D7 S

In [None]:
adata = sc.read_h5ad('/workdir/jp2626/chickenheart/jy/graphst_res/noery/ST_D7_S/GraphST_ARI_mclust_cluster12.h5ad')
meta = pd.read_csv('/workdir/jp2626/chickenheart/jy/compareRL/Ventricle_sub/napari/noery/meta_D7_S_napari_ventricle.csv', 
                   index_col = 'Unnamed: 0')
adata.obs['region'] = meta['region']

# RV
adata_sub_pseudo = adata[adata.obs['region'] == 'region_1'].copy()
coords = pd.DataFrame(
    adata_sub_pseudo.obsm['spatial'],
    index=adata_sub_pseudo.obs_names,
    columns=['x', 'y']
)

bin_size = 30 
coords['y_bin'] = (coords['y'] / bin_size).round().astype(int)
min_bin = coords['y_bin'].min()
max_bin = coords['y_bin'].max()
ignore_margin = 2 
## Outer
valid_bins = range(min_bin + ignore_margin, max_bin - ignore_margin)
outer = coords.loc[coords.groupby('y_bin')['x'].idxmin()]
inner = coords.loc[coords.groupby('y_bin')['x'].idxmax()]
outer = outer[outer['y_bin'].isin(valid_bins)]
inner = inner[inner['y_bin'].isin(valid_bins)]
boundary_flags = pd.Series("none", index=coords.index)
boundary_flags.loc[outer.index] = "outer"
boundary_flags.loc[inner.index] = "inner"
adata_sub_pseudo.obs['boundary'] = boundary_flags
sc.pl.spatial(adata_sub_pseudo, color="boundary", spot_size=30,
              palette={"none":"lightgrey", "outer":"red", "inner":"blue"})
outerind = adata_sub_pseudo.obs_names[adata_sub_pseudo.obs['boundary'] == 'outer']
## Inner
valid_bins = range(min_bin + ignore_margin, max_bin - ignore_margin-4)
outer = coords.loc[coords.groupby('y_bin')['x'].idxmin()]
inner = coords.loc[coords.groupby('y_bin')['x'].idxmax()]
outer = outer[outer['y_bin'].isin(valid_bins)]
inner = inner[inner['y_bin'].isin(valid_bins)]
boundary_flags = pd.Series("none", index=coords.index)
boundary_flags.loc[outer.index] = "outer"
boundary_flags.loc[inner.index] = "inner"
adata_sub_pseudo.obs['boundary'] = boundary_flags
sc.pl.spatial(adata_sub_pseudo, color="boundary", spot_size=30,
              palette={"none":"lightgrey", "outer":"red", "inner":"blue"})
innerind = adata_sub_pseudo.obs_names[adata_sub_pseudo.obs['boundary'] == 'inner']
## Upper
coords = pd.DataFrame(
    adata_sub_pseudo.obsm['spatial'],
    index=adata_sub_pseudo.obs_names,
    columns=['x', 'y']
)
coords['x_bin'] = (coords['x'] / bin_size).round().astype(int)
min_bin = coords['x_bin'].min()
max_bin = coords['x_bin'].max()
valid_bins = range(min_bin, max_bin -3 )
outer = coords.loc[coords.groupby('x_bin')['y'].idxmin()]
inner = coords.loc[coords.groupby('x_bin')['y'].idxmax()]
outer = outer[outer['x_bin'].isin(valid_bins)]
inner = inner[inner['x_bin'].isin(valid_bins)]
boundary_flags = pd.Series("none", index=coords.index)
boundary_flags.loc[outer.index] = "upper"
boundary_flags.loc[inner.index] = "bottom"
adata_sub_pseudo.obs['boundary'] = boundary_flags
sc.pl.spatial(adata_sub_pseudo, color="boundary", spot_size=30,
              palette={"none":"lightgrey", "bottom":"red", "upper":"blue"})
upperind = adata_sub_pseudo.obs_names[adata_sub_pseudo.obs['boundary'] == 'upper']
## Bottom
valid_bins = range(min_bin+9, max_bin-2  )
outer = coords.loc[coords.groupby('x_bin')['y'].idxmin()]
inner = coords.loc[coords.groupby('x_bin')['y'].idxmax()]
outer = outer[outer['x_bin'].isin(valid_bins)]
inner = inner[inner['x_bin'].isin(valid_bins)]
boundary_flags = pd.Series("none", index=coords.index)
boundary_flags.loc[outer.index] = "upper"
boundary_flags.loc[inner.index] = "bottom"
adata_sub_pseudo.obs['boundary'] = boundary_flags
sc.pl.spatial(adata_sub_pseudo, color="boundary", spot_size=30,
              palette={"none":"lightgrey", "bottom":"red", "upper":"blue"})
bottomind = adata_sub_pseudo.obs_names[adata_sub_pseudo.obs['boundary'] == 'bottom']



# LV
adata_sub_pseudo = adata[adata.obs['region'] == 'region_2'].copy()
coords = pd.DataFrame(
    adata_sub_pseudo.obsm['spatial'],
    index=adata_sub_pseudo.obs_names,
    columns=['x', 'y']
)
bin_size = 30 
coords['y_bin'] = (coords['y'] / bin_size).round().astype(int)
min_bin = coords['y_bin'].min()
max_bin = coords['y_bin'].max()
## Outer
valid_bins = range(min_bin + 11, max_bin )
outer = coords.loc[coords.groupby('y_bin')['x'].idxmin()]
inner = coords.loc[coords.groupby('y_bin')['x'].idxmax()]
outer = outer[outer['y_bin'].isin(valid_bins)]
inner = inner[inner['y_bin'].isin(valid_bins)]
boundary_flags = pd.Series("none", index=coords.index)
boundary_flags.loc[outer.index] = "inner"
boundary_flags.loc[inner.index] = "outer"
adata_sub_pseudo.obs['boundary'] = boundary_flags
sc.pl.spatial(adata_sub_pseudo, color="boundary", spot_size=30,
              palette={"none":"lightgrey", "outer":"red", "inner":"blue"})
outerind = outerind.union(
    adata_sub_pseudo.obs_names[adata_sub_pseudo.obs['boundary'] == 'outer']
)
## Inner
valid_bins = range(min_bin , max_bin- 10 )
outer = coords.loc[coords.groupby('y_bin')['x'].idxmin()]
inner = coords.loc[coords.groupby('y_bin')['x'].idxmax()]
outer = outer[outer['y_bin'].isin(valid_bins)]
inner = inner[inner['y_bin'].isin(valid_bins)]
boundary_flags = pd.Series("none", index=coords.index)
boundary_flags.loc[outer.index] = "inner"
boundary_flags.loc[inner.index] = "outer"
adata_sub_pseudo.obs['boundary'] = boundary_flags
sc.pl.spatial(adata_sub_pseudo, color="boundary", spot_size=30,
              palette={"none":"lightgrey", "outer":"red", "inner":"blue"})
innerind = innerind.union(
    adata_sub_pseudo.obs_names[adata_sub_pseudo.obs['boundary'] == 'inner']
)
## 
coords = pd.DataFrame(
    adata_sub_pseudo.obsm['spatial'],
    index=adata_sub_pseudo.obs_names,
    columns=['x', 'y']
)
coords['x_bin'] = (coords['x'] / bin_size).round().astype(int)
min_bin = coords['x_bin'].min()
max_bin = coords['x_bin'].max()
valid_bins = range(min_bin+2, max_bin-20 )
outer = coords.loc[coords.groupby('x_bin')['y'].idxmin()]
inner = coords.loc[coords.groupby('x_bin')['y'].idxmax()]
outer = outer[outer['x_bin'].isin(valid_bins)]
inner = inner[inner['x_bin'].isin(valid_bins)]
boundary_flags = pd.Series("none", index=coords.index)
boundary_flags.loc[outer.index] = "upper"
boundary_flags.loc[inner.index] = "bottom"
adata_sub_pseudo.obs['boundary'] = boundary_flags
sc.pl.spatial(adata_sub_pseudo, color="boundary", spot_size=30,
              palette={"none":"lightgrey", "bottom":"red", "upper":"blue"})
bottomind = bottomind.union(
    adata_sub_pseudo.obs_names[adata_sub_pseudo.obs['boundary'] == 'bottom']
)
## Upper
valid_bins = range(min_bin+18, max_bin)
outer = coords.loc[coords.groupby('x_bin')['y'].idxmin()]
inner = coords.loc[coords.groupby('x_bin')['y'].idxmax()]
outer = outer[outer['x_bin'].isin(valid_bins)]
inner = inner[inner['x_bin'].isin(valid_bins)]
boundary_flags = pd.Series("none", index=coords.index)
boundary_flags.loc[outer.index] = "upper"
boundary_flags.loc[inner.index] = "bottom"
adata_sub_pseudo.obs['boundary'] = boundary_flags
sc.pl.spatial(adata_sub_pseudo, color="boundary", spot_size=30,
              palette={"none":"lightgrey", "bottom":"red", "upper":"blue"})
upperind = upperind.union(
    adata_sub_pseudo.obs_names[adata_sub_pseudo.obs['boundary'] == 'upper']
)

In [None]:
conditions = [
    adata.obs_names.isin(innerind),
    adata.obs_names.isin(outerind)
]
choices = ["inner", "outer"]

adata.obs['boundary'] = np.select(conditions, choices, default="none")

conditions = [
    adata.obs_names.isin(upperind),
    adata.obs_names.isin(bottomind)
]
choices = ["upper", "bottom"]

adata.obs['boundary_topbot'] = np.select(conditions, choices, default="none")

sc.pl.spatial(adata, color=["boundary","boundary_topbot"], spot_size=30)
adata.obs.to_csv('/workdir/jp2626/chickenheart/jy/compareRL/Ventricle_sub/napari/noery/meta_D7_S_napari_ventricle.csv')


# Calculate layer position + cell type distribution

In [None]:
from scipy.spatial import cKDTree

df = pd.read_csv('/workdir/jp2626/chickenheart/jy/compareRL/Ventricle_sub/napari/noery/meta_D7_S_napari_ventricle.csv', index_col = 'Unnamed: 0')
meta_df =  df[(df['region'] == 'RV') ]
# Split spots by annotation
inner_coords = meta_df.loc[meta_df['boundary'] == 'inner', ['x','y']].to_numpy()
outer_coords = meta_df.loc[meta_df['boundary'] == 'outer', ['x','y']].to_numpy()
all_coords = meta_df[['x','y']].to_numpy()

# Build KD-trees
tree_inner = cKDTree(inner_coords)
tree_outer = cKDTree(outer_coords)

# Distance to boundaries
dist_to_inner, _ = tree_inner.query(all_coords)
dist_to_outer, _ = tree_outer.query(all_coords)

# Normalize distance: outer = 0, inner = 1
total_dist = dist_to_inner + dist_to_outer
normalized_dist = dist_to_outer / total_dist

meta_df['layer_pos'] = normalized_dist

# Split spots by annotation
inner_coords = meta_df.loc[meta_df['boundary_topbot'] == 'upper', ['x','y']].to_numpy()
outer_coords = meta_df.loc[meta_df['boundary_topbot'] == 'bottom', ['x','y']].to_numpy()
all_coords = meta_df[['x','y']].to_numpy()

# Build KD-trees
tree_inner = cKDTree(inner_coords)
tree_outer = cKDTree(outer_coords)

# Distance to boundaries
dist_to_inner, _ = tree_inner.query(all_coords)
dist_to_outer, _ = tree_outer.query(all_coords)

# Normalize distance: outer = 0, inner = 1
total_dist = dist_to_inner + dist_to_outer
normalized_dist = dist_to_outer / total_dist

meta_df['layer_pos_topbot'] = normalized_dist
meta_rv = meta_df



meta_df =  df[(df['region'] == 'LV') ]
# Split spots by annotation
inner_coords = meta_df.loc[meta_df['boundary'] == 'inner', ['x','y']].to_numpy()
outer_coords = meta_df.loc[meta_df['boundary'] == 'outer', ['x','y']].to_numpy()
all_coords = meta_df[['x','y']].to_numpy()

# Build KD-trees
tree_inner = cKDTree(inner_coords)
tree_outer = cKDTree(outer_coords)

# Distance to boundaries
dist_to_inner, _ = tree_inner.query(all_coords)
dist_to_outer, _ = tree_outer.query(all_coords)

# Normalize distance: outer = 0, inner = 1
total_dist = dist_to_inner + dist_to_outer
normalized_dist = dist_to_outer / total_dist

meta_df['layer_pos'] = normalized_dist

# Split spots by annotation
inner_coords = meta_df.loc[meta_df['boundary_topbot'] == 'upper', ['x','y']].to_numpy()
outer_coords = meta_df.loc[meta_df['boundary_topbot'] == 'bottom', ['x','y']].to_numpy()
all_coords = meta_df[['x','y']].to_numpy()

# Build KD-trees
tree_inner = cKDTree(inner_coords)
tree_outer = cKDTree(outer_coords)

# Distance to boundaries
dist_to_inner, _ = tree_inner.query(all_coords)
dist_to_outer, _ = tree_outer.query(all_coords)

# Normalize distance: outer = 0, inner = 1
total_dist = dist_to_inner + dist_to_outer
normalized_dist = dist_to_outer / total_dist

meta_df['layer_pos_topbot'] = normalized_dist
meta_lv = meta_df


meta_combined = pd.concat([meta_lv, meta_rv])
meta_combined.head()
# Map layer_pos back to original df
df['layer_pos'] = df.index.map(meta_combined['layer_pos'])
df['layer_pos_topbot'] = df.index.map(meta_combined['layer_pos_topbot'])

adata = sc.read_h5ad('/workdir/jp2626/chickenheart/jy/graphst_res/noery/ST_D7_S/GraphST_ARI_mclust_cluster12.h5ad')
all(adata.obs_names == df.index)
adata.obs = df
sc.pl.spatial(adata, color = ['layer_pos','layer_pos_topbot'], spot_size = 30, cmap= 'BuPu_r')
df.to_csv('/workdir/jp2626/chickenheart/jy/compareRL/Ventricle_sub/napari/noery/meta_D7_S_napari_ventricle.csv')

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.interpolate import make_interp_spline

df = pd.read_csv('/workdir/jp2626/chickenheart/jy/compareRL/Ventricle_sub/napari/noery/meta_D7_S_napari_ventricle.csv', index_col = 'Unnamed: 0')
meta_df =  df[(df['region'] == 'RV') ]

n_bins = 10
n_binstop = 5

# Bin by layer position (already done)
meta_df['layer_bin'] = pd.cut(meta_df['layer_pos'], bins=n_bins)
meta_df['layer_topbot_bin'] = pd.cut(meta_df['layer_pos_topbot'], bins=n_binstop)

# Find relevant cell types
cell_types = meta_df['max_pred_celltype'].unique()
cell_types = [ct for ct in cell_types if 'Endo' in ct and 'Endo1' not in ct]

# Store profiles for each cell type and region
profiles = []

for ct in cell_types:
    # Compute fraction for each bin within each layer_topbot_bin
    for topbot_bin in meta_df['layer_topbot_bin'].unique():
        sub_df = meta_df[meta_df['layer_topbot_bin'] == topbot_bin]
        profile = (
            sub_df.groupby('layer_bin')
            .apply(lambda g: (g['max_pred_celltype'] == ct).sum() / len(g) if len(g) > 0 else np.nan)
            .reset_index(name='fraction')
        )
        profile['cell_type'] = ct
        profile['layer_topbot_bin'] = topbot_bin
        profiles.append(profile)

# Combine into one DataFrame
all_profiles = pd.concat(profiles, ignore_index=True)

# Compute mean and CI across layer_topbot_bin
summary_profiles = (
    all_profiles.groupby(['cell_type', 'layer_bin'])
    .agg(
        mean_fraction=('fraction', 'mean'),
        lower=('fraction', lambda x: np.nanpercentile(x, 2.5)),
        upper=('fraction', lambda x: np.nanpercentile(x, 97.5))
    )
    .reset_index()
)

# Plot
plt.figure(figsize=(5, 5))

for ct in cell_types:
    col = str_to_rgb_tuple(color_map[ct])
    df_plot = summary_profiles[summary_profiles['cell_type'] == ct]

    x = np.arange(len(df_plot))
    y = df_plot['mean_fraction'].values
    lower = df_plot['lower'].values
    upper = df_plot['upper'].values

    # Remove NaNs
    valid = ~np.isnan(y) & ~np.isnan(lower) & ~np.isnan(upper)
    x = x[valid]
    y = y[valid]
    lower = lower[valid]
    upper = upper[valid]

    if len(x) > 3:
        x_smooth = np.linspace(x.min(), x.max(), 300)
        spl = make_interp_spline(x, y, k=3)
        y_smooth = spl(x_smooth)

        plt.plot(x_smooth, y_smooth, color=col, label=ct)

        lower_smooth = make_interp_spline(x, lower, k=3)(x_smooth)
        upper_smooth = make_interp_spline(x, upper, k=3)(x_smooth)

        plt.fill_between(x_smooth, lower_smooth, upper_smooth, color=col, alpha=0.1)

# Set x-axis labels
plt.xticks(
    ticks=np.arange(len(df_plot)),
    labels=df_plot['layer_bin'].astype(str),
    rotation=45
)
plt.tick_params(axis='both', which='both', length=0)

# Remove padding
plt.xlim(x.min(), x.max())
ax = plt.gca()
ax.margins(x=0)
import matplotlib.ticker as mticker
ax.yaxis.set_major_formatter(mticker.FormatStrFormatter('%.3f'))


plt.ylabel('Fraction of Cells')
plt.xlabel('Layer position (outer→inner)')
plt.title('D7_Normal_RV - Endo Distribution')
plt.legend().set_visible(False)
plt.tight_layout(pad=0) 

plt.savefig('/workdir/jp2626/chickenheart/jy/compareRL/endothelial/D7_S_endo_distribution_RV_smooth_CI_bin10.pdf',
            format='pdf', bbox_inches='tight', dpi=300, pad_inches=0)
plt.show()


########################################################
meta_df =  df[(df['region'] == 'LV') ]

n_bins = 10
n_binstop = 5

# Bin by layer position (already done)
meta_df['layer_bin'] = pd.cut(meta_df['layer_pos'], bins=n_bins)
meta_df['layer_topbot_bin'] = pd.cut(meta_df['layer_pos_topbot'], bins=n_binstop)

# Find relevant cell types
cell_types = meta_df['max_pred_celltype'].unique()
cell_types = [ct for ct in cell_types if 'Endo' in ct and 'Endo1' not in ct]

# Store profiles for each cell type and region
profiles = []

for ct in cell_types:
    # Compute fraction for each bin within each layer_topbot_bin
    for topbot_bin in meta_df['layer_topbot_bin'].unique():
        sub_df = meta_df[meta_df['layer_topbot_bin'] == topbot_bin]
        profile = (
            sub_df.groupby('layer_bin')
            .apply(lambda g: (g['max_pred_celltype'] == ct).sum() / len(g) if len(g) > 0 else np.nan)
            .reset_index(name='fraction')
        )
        profile['cell_type'] = ct
        profile['layer_topbot_bin'] = topbot_bin
        profiles.append(profile)

# Combine into one DataFrame
all_profiles = pd.concat(profiles, ignore_index=True)

# Compute mean and CI across layer_topbot_bin
summary_profiles = (
    all_profiles.groupby(['cell_type', 'layer_bin'])
    .agg(
        mean_fraction=('fraction', 'mean'),
        lower=('fraction', lambda x: np.nanpercentile(x, 2.5)),
        upper=('fraction', lambda x: np.nanpercentile(x, 97.5))
    )
    .reset_index()
)

# Plot
plt.figure(figsize=(5, 5))

for ct in cell_types:
    col = str_to_rgb_tuple(color_map[ct])
    df_plot = summary_profiles[summary_profiles['cell_type'] == ct]

    x = np.arange(len(df_plot))
    y = df_plot['mean_fraction'].values
    lower = df_plot['lower'].values
    upper = df_plot['upper'].values

    # Remove NaNs
    valid = ~np.isnan(y) & ~np.isnan(lower) & ~np.isnan(upper)
    x = x[valid]
    y = y[valid]
    lower = lower[valid]
    upper = upper[valid]

    if len(x) > 3:
        x_smooth = np.linspace(x.min(), x.max(), 300)
        spl = make_interp_spline(x, y, k=3)
        y_smooth = spl(x_smooth)

        plt.plot(x_smooth, y_smooth, color=col, label=ct)

        lower_smooth = make_interp_spline(x, lower, k=3)(x_smooth)
        upper_smooth = make_interp_spline(x, upper, k=3)(x_smooth)

        plt.fill_between(x_smooth, lower_smooth, upper_smooth, color=col, alpha=0.1)

# Set x-axis labels
plt.xticks(
    ticks=np.arange(len(df_plot)),
    labels=df_plot['layer_bin'].astype(str),
    rotation=45
)
plt.tick_params(axis='both', which='both', length=0)

# Remove padding
plt.xlim(x.min(), x.max())
ax = plt.gca()
ax.margins(x=0)
import matplotlib.ticker as mticker
ax.yaxis.set_major_formatter(mticker.FormatStrFormatter('%.3f'))


plt.ylabel('Fraction of Cells')
plt.xlabel('Layer position (outer→inner)')
plt.title('D7_Normal_LV - Endo Distribution')
plt.legend().set_visible(False)
plt.tight_layout(pad=0) 

plt.savefig('/workdir/jp2626/chickenheart/jy/compareRL/endothelial/D7_S_endo_distribution_LV_smooth_CI_bin10.pdf',
            format='pdf', bbox_inches='tight', dpi=300, pad_inches=0)
plt.show()

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.interpolate import make_interp_spline

df = pd.read_csv('/workdir/jp2626/chickenheart/jy/compareRL/Ventricle_sub/napari/noery/meta_D7_S_napari_ventricle.csv', index_col = 'Unnamed: 0')
meta_df =  df[(df['region'] == 'RV') ]

n_bins = 10
n_binstop = 5

# Bin by layer position (already done)
meta_df['layer_bin'] = pd.cut(meta_df['layer_pos'], bins=n_bins)
meta_df['layer_topbot_bin'] = pd.cut(meta_df['layer_pos_topbot'], bins=n_binstop)

# Find relevant cell types
cell_types = meta_df['max_pred_celltype'].unique()
cell_types = [ct for ct in cell_types if 'Cardi1' in ct or 'Cardi2' in ct]

# Store profiles for each cell type and region
profiles = []

for ct in cell_types:
    # Compute fraction for each bin within each layer_topbot_bin
    for topbot_bin in meta_df['layer_topbot_bin'].unique():
        sub_df = meta_df[meta_df['layer_topbot_bin'] == topbot_bin]
        profile = (
            sub_df.groupby('layer_bin')
            .apply(lambda g: (g['max_pred_celltype'] == ct).sum() / len(g) if len(g) > 0 else np.nan)
            .reset_index(name='fraction')
        )
        profile['cell_type'] = ct
        profile['layer_topbot_bin'] = topbot_bin
        profiles.append(profile)

# Combine into one DataFrame
all_profiles = pd.concat(profiles, ignore_index=True)

# Compute mean and CI across layer_topbot_bin
summary_profiles = (
    all_profiles.groupby(['cell_type', 'layer_bin'])
    .agg(
        mean_fraction=('fraction', 'mean'),
        lower=('fraction', lambda x: np.nanpercentile(x, 2.5)),
        upper=('fraction', lambda x: np.nanpercentile(x, 97.5))
    )
    .reset_index()
)

# Plot
plt.figure(figsize=(5, 5))

for ct in cell_types:
    col = str_to_rgb_tuple(color_map[ct])
    df_plot = summary_profiles[summary_profiles['cell_type'] == ct]

    x = np.arange(len(df_plot))
    y = df_plot['mean_fraction'].values
    lower = df_plot['lower'].values
    upper = df_plot['upper'].values

    # Remove NaNs
    valid = ~np.isnan(y) & ~np.isnan(lower) & ~np.isnan(upper)
    x = x[valid]
    y = y[valid]
    lower = lower[valid]
    upper = upper[valid]

    if len(x) > 3:
        x_smooth = np.linspace(x.min(), x.max(), 300)
        spl = make_interp_spline(x, y, k=3)
        y_smooth = spl(x_smooth)

        plt.plot(x_smooth, y_smooth, color=col, label=ct)

        lower_smooth = make_interp_spline(x, lower, k=3)(x_smooth)
        upper_smooth = make_interp_spline(x, upper, k=3)(x_smooth)

        plt.fill_between(x_smooth, lower_smooth, upper_smooth, color=col, alpha=0.1)

# Set x-axis labels
plt.xticks(
    ticks=np.arange(len(df_plot)),
    labels=df_plot['layer_bin'].astype(str),
    rotation=45
)
plt.tick_params(axis='both', which='both', length=0)

# Remove padding
plt.xlim(x.min(), x.max())
ax = plt.gca()
ax.margins(x=0)
import matplotlib.ticker as mticker
ax.yaxis.set_major_formatter(mticker.FormatStrFormatter('%.3f'))


plt.ylabel('Fraction of Cells')
plt.xlabel('Layer position (outer→inner)')
plt.title('D7_Normal_RV - vCM Distribution')
plt.legend().set_visible(False)
plt.tight_layout(pad=0) 

plt.savefig('/workdir/jp2626/chickenheart/jy/compareRL/endothelial/D7_S_vCM_distribution_RV_smooth_CI_bin10.pdf',
            format='pdf', bbox_inches='tight', dpi=300, pad_inches=0)
plt.show()


########################################################
meta_df =  df[(df['region'] == 'LV') ]

n_bins = 10
n_binstop = 5

# Bin by layer position (already done)
meta_df['layer_bin'] = pd.cut(meta_df['layer_pos'], bins=n_bins)
meta_df['layer_topbot_bin'] = pd.cut(meta_df['layer_pos_topbot'], bins=n_binstop)

# Find relevant cell types
cell_types = meta_df['max_pred_celltype'].unique()
cell_types = [ct for ct in cell_types if 'Cardi1' in ct or 'Cardi2' in ct]

# Store profiles for each cell type and region
profiles = []

for ct in cell_types:
    # Compute fraction for each bin within each layer_topbot_bin
    for topbot_bin in meta_df['layer_topbot_bin'].unique():
        sub_df = meta_df[meta_df['layer_topbot_bin'] == topbot_bin]
        profile = (
            sub_df.groupby('layer_bin')
            .apply(lambda g: (g['max_pred_celltype'] == ct).sum() / len(g) if len(g) > 0 else np.nan)
            .reset_index(name='fraction')
        )
        profile['cell_type'] = ct
        profile['layer_topbot_bin'] = topbot_bin
        profiles.append(profile)

# Combine into one DataFrame
all_profiles = pd.concat(profiles, ignore_index=True)

# Compute mean and CI across layer_topbot_bin
summary_profiles = (
    all_profiles.groupby(['cell_type', 'layer_bin'])
    .agg(
        mean_fraction=('fraction', 'mean'),
        lower=('fraction', lambda x: np.nanpercentile(x, 2.5)),
        upper=('fraction', lambda x: np.nanpercentile(x, 97.5))
    )
    .reset_index()
)

# Plot
plt.figure(figsize=(5, 5))

for ct in cell_types:
    col = str_to_rgb_tuple(color_map[ct])
    df_plot = summary_profiles[summary_profiles['cell_type'] == ct]

    x = np.arange(len(df_plot))
    y = df_plot['mean_fraction'].values
    lower = df_plot['lower'].values
    upper = df_plot['upper'].values

    # Remove NaNs
    valid = ~np.isnan(y) & ~np.isnan(lower) & ~np.isnan(upper)
    x = x[valid]
    y = y[valid]
    lower = lower[valid]
    upper = upper[valid]

    if len(x) > 3:
        x_smooth = np.linspace(x.min(), x.max(), 300)
        spl = make_interp_spline(x, y, k=3)
        y_smooth = spl(x_smooth)

        plt.plot(x_smooth, y_smooth, color=col, label=ct)

        lower_smooth = make_interp_spline(x, lower, k=3)(x_smooth)
        upper_smooth = make_interp_spline(x, upper, k=3)(x_smooth)

        plt.fill_between(x_smooth, lower_smooth, upper_smooth, color=col, alpha=0.1)

# Set x-axis labels
plt.xticks(
    ticks=np.arange(len(df_plot)),
    labels=df_plot['layer_bin'].astype(str),
    rotation=45
)
plt.tick_params(axis='both', which='both', length=0)

# Remove padding
plt.xlim(x.min(), x.max())
ax = plt.gca()
ax.margins(x=0)
import matplotlib.ticker as mticker
ax.yaxis.set_major_formatter(mticker.FormatStrFormatter('%.3f'))


plt.ylabel('Fraction of Cells')
plt.xlabel('Layer position (outer→inner)')
plt.title('D7_Normal_LV - vCM Distribution')
plt.legend().set_visible(False)
plt.tight_layout(pad=0) 

plt.savefig('/workdir/jp2626/chickenheart/jy/compareRL/endothelial/D7_S_vCM_distribution_LV_smooth_CI_bin10.pdf',
            format='pdf', bbox_inches='tight', dpi=300, pad_inches=0)
plt.show()