In [None]:
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.ticker import LinearLocator
from matplotlib.colors import LinearSegmentedColormap
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.axes_grid1 import make_axes_locatable
import random
import os
import sys
import time
import csv
import re
import pandas as pd
import scanpy as sc
import numpy as np
import scipy.spatial as scisp
from scipy.sparse import coo_matrix, csr_matrix, csc_matrix
import math
import anndata as ad
import igraph as ig
import plotly.graph_objects as go
import scanpy.external as sce
import scipy.sparse as sp
from statsmodels.nonparametric.smoothers_lowess import lowess
from sklearn.metrics import r2_score
from scipy.interpolate import interp1d
import seaborn as sns
import os
from copy import copy
import matplotlib as mpl
import torch
from sklearn import metrics
import multiprocessing as mp
from GraphST import GraphST
import distinctipy


import sys
sys.path.append('/fs/cbsuvlaminck5/workdir/jp2626/graphst/Systematic/ARI/')
from Systematic_fromadata import *

reds = copy(mpl.cm.Reds)
reds.set_under("lightgray")
sc.settings.verbosity = 2 # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.settings.set_figure_params(dpi=100, facecolor="white", frameon=True, figsize=(5, 9))
sc.settings.n_jobs=20
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 500)

# Find Valve subdomains with ARI and order them based on Plasticity score

### Normal heart

In [None]:
import matplotlib.pyplot as plt
from matplotlib import cm, colors
import scanpy as sc
import numpy as np
import os
import glob

# Define valve regions per sample
Valve_region = {"D6_S": [7,8,12] , "D7_S": [12], "D10_S": [12], "D12_S": [2,8]}

# Load sample files
adata_files = glob.glob("/fs/cbsuvlaminck5/workdir/jp2626/chickenheart/jy/graphst_res/noery/*_S/*mclust*.h5ad", recursive=True)
adata_files = [f for f in adata_files if 'D4' not in f and 'D5' not in f]
sample_names = [path.split('/')[7].replace('ST_', '') for path in adata_files]

for i, adata_f in enumerate(adata_files):
    sample = adata_f.split('/')[9].replace('ST_', '')
    adata_whole = sc.read_h5ad(adata_f)

    print('Start GraphST ARI')
    file_path = './' + sample 
    isExist = os.path.exists(file_path)
    print(isExist)
    if not isExist:
        os.mkdir(file_path)
        print('made directory')
    
    adata_f_new = file_path + '/ARI.h5ad'
    print(adata_f_new)

    valve_region = Valve_region[sample]
    column_name = [col for col in adata_whole.obs.columns if 'domain_sub' in col]
    if column_name:
        adata_whole.obs.drop(columns=column_name, inplace=True)

    # Filter for valve regions
    adata_whole.obs['domain'] = adata_whole.obs['domain'].astype(int)
    adata_valve = adata_whole[adata_whole.obs['domain'].isin(valve_region)].copy()
    
    

    # GraphST preprocessing
    sc.pp.highly_variable_genes(adata_valve, flavor="seurat", n_top_genes=3000)
    GraphST.construct_interaction_KNN(adata_valve)
    GraphST.add_contrastive_label(adata_valve)
    GraphST.get_feature(adata_valve)
  

    # Set R environment for mclust (if needed)
    print(file_path)
    threshold = 0.65
    device = torch.device('cuda:3' if torch.cuda.is_available() else 'cpu')
    R_HOME = "/home/jp2626/miniconda3/envs/GraphST/lib/R"
    cutoff_point = 'Start'
    tools = 'mclust'
    adata_valve, number_cluster = detect_domain(adata_valve, file_path, cutoff_point ,R_HOME, tools = 'mclust' )

    # Merge back subcluster labels
    sub_labels = adata_valve.obs['domain'].astype(str)
    sub_labels.index = adata_valve.obs.index
    adata_whole.obs['domain_sub_valve_ari'] = 'sub_valve_' + sub_labels.reindex(adata_whole.obs.index).fillna('nan')

    # Remove 'nan' labels
    mask = adata_whole.obs['domain_sub_valve_ari'].str.contains('_nan', na=False)
    adata_whole.obs.loc[mask, 'domain_sub_valve_ari'] = np.nan


    domain_col = 'domain_sub_valve_ari'
    
    # Add Stage score of FB
    FBscore = pd.read_csv(f'/fs/cbsuvlaminck2/workdir/jp2626/chicken/scVelo/netflow_Stagescore_FB_{sample}.csv', index_col='Unnamed: 0')
    FBscore = FBscore.loc[FBscore.index.intersection(adata_whole.obs.index)]
    adata_whole.obs['Stagescore_FB'] = FBscore['Stagescore_FB']

    # Save domain mean scores
    adata_valve = adata_whole[~pd.isna(adata_whole.obs[domain_col])].copy()
    grouped_means = adata_valve.obs.groupby(domain_col)['Stagescore_FB'].mean().reset_index()
    grouped_means = grouped_means.sort_values(by='Stagescore_FB', ascending=False)
    grouped_means.to_csv(f'./{file_path}/netflow_plasticityscore_FB.csv', index=False)

    domain_order = grouped_means[domain_col].tolist()
    # Assign ordered categorical
    adata_whole.obs[domain_col + '_ordered'] = pd.Categorical(
    adata_whole.obs[domain_col],
    categories=domain_order,
    ordered=True
)

    domain_col =  'domain_sub_valve_ari_ordered'

    # Create proportion crosstab
    dfcount_prop = pd.crosstab(
        adata_whole.obs[domain_col],
        adata_whole.obs['max_pred_celltype'],
        normalize='index'
    )
    df_long = dfcount_prop.reset_index().melt(
    id_vars=domain_col,
    var_name='max_pred_celltype',
    value_name='proportion'
    )
    df_long.columns = ['Domain', 'max_pred_celltype', 'proportion']
    
    df_long.to_csv(f'./{file_path}/Valve_proportion_forbarplot.csv', index=False)
    
    # Overwrite .h5ad with updated obs
    adata_whole.write_h5ad(adata_f_new)



### LAL heart

In [None]:
import matplotlib.pyplot as plt
from matplotlib import cm, colors
import scanpy as sc
import numpy as np
import os
import glob

# Define valve regions per sample
Valve_region = {"D6_L": [11, 8] , "D7_L": [5,10,11], "D10_L": [5], "D12_L": [3]}

# Load sample files
adata_files = glob.glob("/fs/cbsuvlaminck5/workdir/jp2626/chickenheart/jy/graphst_res/noery/*_L/*mclust*.h5ad", recursive=True)
adata_files = [f for f in adata_files if 'D4' not in f and 'D5' not in f]
sample_names = [path.split('/')[7].replace('ST_', '') for path in adata_files]

for i, adata_f in enumerate(adata_files):
    sample = adata_f.split('/')[9].replace('ST_', '')
    adata_whole = sc.read_h5ad(adata_f)

    print('Start GraphST ARI')
    file_path = './' + sample 
    isExist = os.path.exists(file_path)
    print(isExist)
    if not isExist:
        os.mkdir(file_path)
        print('made directory')
    
    adata_f_new = file_path + '/ARI.h5ad'
    print(adata_f_new)

    valve_region = Valve_region[sample]
    column_name = [col for col in adata_whole.obs.columns if 'domain_sub' in col]
    if column_name:
        adata_whole.obs.drop(columns=column_name, inplace=True)

    # Filter for valve regions
    adata_whole.obs['domain'] = adata_whole.obs['domain'].astype(int)
    adata_valve = adata_whole[adata_whole.obs['domain'].isin(valve_region)].copy()
    
    

    # GraphST preprocessing
    sc.pp.highly_variable_genes(adata_valve, flavor="seurat", n_top_genes=3000)
    GraphST.construct_interaction_KNN(adata_valve)
    GraphST.add_contrastive_label(adata_valve)
    GraphST.get_feature(adata_valve)
  

    # Set R environment for mclust (if needed)
    print(file_path)
    threshold = 0.65
    device = torch.device('cuda:3' if torch.cuda.is_available() else 'cpu')
    R_HOME = "/home/jp2626/miniconda3/envs/GraphST/lib/R"
    cutoff_point = 'Start'
    tools = 'mclust'
    adata_valve, number_cluster = detect_domain(adata_valve, file_path, cutoff_point ,R_HOME, tools = 'mclust' )

    # Merge back subcluster labels
    sub_labels = adata_valve.obs['domain'].astype(str)
    sub_labels.index = adata_valve.obs.index
    adata_whole.obs['domain_sub_valve_ari'] = 'sub_valve_' + sub_labels.reindex(adata_whole.obs.index).fillna('nan')

    # Remove 'nan' labels
    mask = adata_whole.obs['domain_sub_valve_ari'].str.contains('_nan', na=False)
    adata_whole.obs.loc[mask, 'domain_sub_valve_ari'] = np.nan


    domain_col = 'domain_sub_valve_ari'
    
    # Add Stage score of FB
    FBscore = pd.read_csv(f'/fs/cbsuvlaminck2/workdir/jp2626/chicken/scVelo/netflow_Stagescore_FB_{sample}.csv', index_col='Unnamed: 0')
    FBscore = FBscore.loc[FBscore.index.intersection(adata_whole.obs.index)]
    adata_whole.obs['Stagescore_FB'] = FBscore['Stagescore_FB']

    # Save domain mean scores
    adata_valve = adata_whole[~pd.isna(adata_whole.obs[domain_col])].copy()
    grouped_means = adata_valve.obs.groupby(domain_col)['Stagescore_FB'].mean().reset_index()
    grouped_means = grouped_means.sort_values(by='Stagescore_FB', ascending=False)
    grouped_means.to_csv(f'./{file_path}/netflow_plasticityscore_FB.csv', index=False)

    domain_order = grouped_means[domain_col].tolist()
    # Assign ordered categorical
    adata_whole.obs[domain_col + '_ordered'] = pd.Categorical(
    adata_whole.obs[domain_col],
    categories=domain_order,
    ordered=True
)
    # Overwrite .h5ad with updated obs
    adata_whole.write_h5ad(adata_f_new)

    domain_col =  'domain_sub_valve_ari_ordered'

    # Create proportion crosstab
    dfcount_prop = pd.crosstab(
        adata_whole.obs[domain_col],
        adata_whole.obs['max_pred_celltype'],
        normalize='index'
    )
    df_long = dfcount_prop.reset_index().melt(
    id_vars=domain_col,
    var_name='max_pred_celltype',
    value_name='proportion'
    )
    df_long.columns = ['Domain', 'max_pred_celltype', 'proportion']
    
    df_long.to_csv(f'./{file_path}/Valve_proportion_forbarplot.csv', index=False)



# Figure generation

In [None]:
adata_files = glob.glob("/workdir/jp2626/chicken/graphst_valve_ari/*/ARI.h5ad", recursive=True)
adata_files = [f for f in adata_files if 'D4' not in f and 'D5' not in f and '_R' not in f]
sample_names = [path.split('/')[5].replace('ST_', '') for path in adata_files]

In [None]:
import matplotlib.pyplot as plt
from matplotlib import cm, colors
import scanpy as sc
import numpy as np
import pandas as pd
import glob

adata_files = glob.glob("/workdir/jp2626/chicken/graphst_valve_ari/*/ARI.h5ad", recursive=True)
adata_files = [f for f in adata_files if 'D4' not in f and 'D5' not in f and '_R' not in f]
sample_names = [path.split('/')[5].replace('ST_', '') for path in adata_files]

# Compute max range and centers
max_x_range, max_y_range = 0.0, 0.0
center_points = []

for adata_f in adata_files:
    adata = sc.read_h5ad(adata_f)
    coords = adata.obsm['spatial']
    x_max, x_min = coords[:, 0].max(), coords[:, 0].min()
    y_max, y_min = coords[:, 1].max(), coords[:, 1].min()
    x_range = x_max - x_min
    y_range = y_max - y_min
    max_x_range = max(max_x_range, x_range)
    max_y_range = max(max_y_range, y_range)
    center_points.append([(x_max + x_min) / 2, (y_max + y_min) / 2])

# Plotting
fig, axs = plt.subplots(1, len(sample_names), figsize=(4 * len(sample_names), 4), gridspec_kw={'hspace': 0.4})

for i, adata_f in enumerate(adata_files):
    sample = adata_f.split('/')[6].replace('ST_', '')
    print(sample)
    adata = sc.read_h5ad(adata_f)

    # Find the correct domain column
    domain_col = 'domain_sub_valve_ari_ordered'

    sc.pl.spatial(
        adata,
        color=domain_col ,
        spot_size=30,
        ax=axs[i],
        show=False,
        frameon=False,
        legend_loc=None, title = ""
    )

    cx, cy = center_points[i]
    axs[i].set_xlim(cx - max_x_range / 2, cx + max_x_range / 2)
    axs[i].set_ylim(cy + max_y_range / 2, cy - max_y_range / 2)  # flip Y for top-down

# Save and display
plt.tight_layout()
plt.savefig("/workdir/jp2626/chicken/graphst_valve_ari/single_valve_ordered_all_Sham_LAL.pdf", format='pdf', bbox_inches='tight', dpi=300, pad_inches=0)
plt.show()
