In [None]:
import os
# import plotly.express as px
import scanpy as sc
import scimap as sm
import seaborn as sns
import anndata as ad
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib import rcParams
%matplotlib inline
import warnings
from skimage.io import imread
warnings.filterwarnings("ignore")

In [None]:
base_dir = "/home/smith6jt/panc_CODEX"

In [None]:
adata = ad.read_h5ad(os.path.join(base_dir, 'CODEX_panc_scvi_BioCov.h5ad'))


In [None]:
adata_gated = ad.read_h5ad(os.path.join(base_dir, 'CODEX_scvi_BioCov_gated.h5ad'))

In [None]:
adata

In [None]:
adata_gated

In [None]:
import copy
import scipy.sparse as sp

def _indexer_from_src_to_dst(src: ad.AnnData, dst: ad.AnnData):
    """Return integer indexer that reorders/subsets src rows to match dst.obs_names.
    Raises if dst has cells not present in src.
    """
    idx = src.obs_names.get_indexer(dst.obs_names)
    if np.any(idx < 0):
        missing = list(dst.obs_names[np.where(idx < 0)[0]][:10])
        raise ValueError(
            f"Destination has cells not in source (showing up to 10): {missing}. "
            "Ensure dst was derived from src or set strict=False to allow intersections."
        )
    return idx

def transfer_from_source(
    src: ad.AnnData,
    dst: ad.AnnData,
    uns_keys=None,
    obsm_keys=None,
    copy_obsp: bool = True,
    strict: bool = True,
):
    """
    Copy selected metadata and embeddings/graphs from `src` to `dst` with safe index alignment.

    - uns_keys: list of `.uns` keys to copy (e.g., ['neighbors','umap','gates']).
    - obsm_keys: list of `.obsm` keys to copy (e.g., ['X_umap','spatial']).
    - copy_obsp: if True, copy all `.obsp` entries (e.g., 'connectivities','distances').
    - strict: if True, require dst.obs_names to be subset/reorder of src; else intersect.
    """
    if uns_keys is None:
        uns_keys = []
    if obsm_keys is None:
        obsm_keys = []

    # Build mapping from src rows to dst rows
    if strict:
        idx = _indexer_from_src_to_dst(src, dst)
    else:
        # Intersect mode: reduce dst to common cells for copying operations
        common = src.obs_names.intersection(dst.obs_names)
        if len(common) == 0:
            raise ValueError("No overlapping cells between src and dst.")
        # Reindexer for src->common and dst->common
        src_idx = src.obs_names.get_indexer(common)
        dst_order = pd.Index(common)

    # 1) Copy selected .uns (deepcopy to avoid shared references)
    for k in uns_keys:
        if k in src.uns:
            dst.uns[k] = copy.deepcopy(src.uns[k])

    # 2) Copy selected .obsm with alignment
    for k in obsm_keys:
        if k not in src.obsm:
            continue
        val = src.obsm[k]
        if isinstance(val, pd.DataFrame):
            if strict:
                # Align by index directly
                # Ensure DataFrame index matches src.obs_names; if not, try to align
                if not val.index.equals(src.obs_names):
                    # Try reindex to src order first
                    try:
                        val = val.reindex(src.obs_names)
                    except Exception:
                        pass
                df2 = val.iloc[idx]
                dst.obsm[k] = df2
            else:
                common = val.index.intersection(dst.obs_names)
                df2 = val.loc[common]
                # Reindex to dst order; missing rows become NaN
                df2 = df2.reindex(dst.obs_names)
                dst.obsm[k] = df2
        else:
            # Assume array-like of shape (n_obs, n_dims)
            arr = np.asarray(val)
            if arr.shape[0] != src.n_obs:
                raise ValueError(f".obsm['{k}'] has shape {arr.shape} not matching src.n_obs={src.n_obs}.")
            if strict:
                dst.obsm[k] = arr[idx]
            else:
                # Build arr for common cells and reindex to dst order with NaNs for missing
                # For arrays, we cannot create NaNs for integer dtype; cast to float
                src_idx = src.obs_names.get_indexer(dst.obs_names)
                take_mask = src_idx >= 0
                out = np.full((dst.n_obs, arr.shape[1]), np.nan, dtype=float)
                out[take_mask] = arr[src_idx[take_mask]]
                dst.obsm[k] = out

    # 3) Copy all .obsp with alignment
    if copy_obsp and len(src.obsp.keys()) > 0:
        if strict:
            for k, mat in src.obsp.items():
                if sp.issparse(mat):
                    dst.obsp[k] = mat[idx][:, idx]
                else:
                    dst.obsp[k] = np.asarray(mat)[np.ix_(idx, idx)]
        else:
            # Intersect mode: restrict to common, then place into dst order (missing rows/cols become 0)
            # For graphs, filling missing with 0 is typically safe (no edges)
            common = src.obs_names.intersection(dst.obs_names)
            if len(common) == 0:
                raise ValueError("No overlapping cells to copy .obsp.")
            src_idx = src.obs_names.get_indexer(common)
            # Map dst to common
            pos_in_common = pd.Index(common).get_indexer(dst.obs_names)
            has_row = pos_in_common >= 0
            for k, mat in src.obsp.items():
                if sp.issparse(mat):
                    sub = mat[src_idx][:, src_idx].tocsr()
                    # Build full-size sparse matrix for dst by placing sub into positions
                    # Create an empty matrix and insert blocks
                    n = dst.n_obs
                    out = sp.csr_matrix((n, n), dtype=sub.dtype)
                    # Map rows/cols
                    rmap = np.where(has_row, pos_in_common, -1)
                    cmap = rmap
                    # Insert via fancy indexing is non-trivial; fall back to dense for small cases
                    # If too big, recommend strict mode for performance
                    out = out.tolil()
                    rr, cc = sub.nonzero()
                    for r, c in zip(rr, cc):
                        dr = rmap[r]
                        dc = cmap[c]
                        if dr >= 0 and dc >= 0:
                            out[dr, dc] = sub[r, c]
                    dst.obsp[k] = out.tocsr()
                else:
                    sub = np.asarray(mat)[np.ix_(src_idx, src_idx)]
                    n = dst.n_obs
                    out = np.zeros((n, n), dtype=sub.dtype)
                    rmap = np.where(has_row, pos_in_common, -1)
                    cmap = rmap
                    for i in range(len(common)):
                        ri = rmap[i]
                        if ri < 0:
                            continue
                        for j in range(len(common)):
                            cj = cmap[j]
                            if cj < 0:
                                continue
                            out[ri, cj] = sub[i, j]
                    dst.obsp[k] = out

    return dst


In [None]:
uns_to_copy = ['_scvi_manager_uuid', '_scvi_uuid', 'donor_id_colors', 'neighbors', 'umap']
obsm_to_copy = ['X_scVI', 'X_umap', '_scvi_extra_categorical_covs']                      

adata = transfer_from_source(
    src=adata,
    dst=adata_gated,
    uns_keys=uns_to_copy,
    obsm_keys=obsm_to_copy,
    copy_obsp=True,
    strict=True,
)

In [None]:
raw = adata.layers['raw_mfi']
adata = ad.AnnData(
X=raw.copy(),
obs=adata.obs[['imageid', 'Object ID', 'Object type', 'Classification', 'Parent', 'X_centroid', 'Y_centroid', 'Nucleus Area', 'Nucleus Length', 'Nucleus Circularity', 'Nucleus Solidity', 'Nucleus: Max diameter µm', 'Nucleus: Min diameter µm', 'Cell Area', 'Cell Length', 'Cell Circularity', 'Cell Solidity', 'Cell: Max diameter µm', 'Cell: Min diameter µm', 'Dist to Closest Peri-Islet', 'Dist to Closest Tissue', 'Dist to Closest Capillary', 'Dist to Closest Lymphatic', 'Dist to Closest Nerve', 'Dist to Closest Islet', 'Donor Status', 'Age', 'Gender', 'GADA', 'ZnT8A', 'IA2A', 'mIAA', 'None', 'n_genes_by_counts', 'total_counts', 'n_genes', '_scvi_batch', '_scvi_labels', 'donor_id']].copy(),
var=adata.var.copy()
)
adata.uns["all_markers"] = ['DAPI','CD31', 'CD8a', 'CD3e', 'SMA', 'Ki67','CD4', 'CD34', 'HLADR', 'PDPN', 'panCK',
       'ECAD', 'CD163', 'SST', 'ColIV', 'VIM', 'CD20', 'LGALS3', 'B3TUBB',
       'GCG', 'KRT14', 'GAP43', 'CD35', 'CHGA', 'PGP9.5', 'INS', 'CD44',
       'CD45', 'NaKATPase', 'BCatenin', 'CD68', 'BActin', 'CK19', 'epCAM', 'KRT8-18']

In [None]:
adata.raw = adata
adata = sm.pp.log1p(adata)

In [None]:
image_path = os.path.join(base_dir, 'local_images/Aab_6450.ome.tiff')

In [None]:
%gui qt

In [None]:
sm.pl.napariGater(
    image_path,
    adata,
    subset="6450",
    centroid_units='microns', 
    # calculate_contrast=False, 
    flip_y=False,              
    verbose=True,
    point_size=15,
    layer='raw',
    log=True
)

In [None]:
adata.uns['gates']

In [None]:
# Normalize gates: ensure dict keys and DataFrame column names are strings (data stays numeric)
_gates = adata.uns.get('gates')
if _gates is not None:
    import pandas as pd

    def _stringify(obj):
        # If DataFrame, make column names strings (including MultiIndex)
        if isinstance(obj, pd.DataFrame):
            try:
                obj.columns = obj.columns.map(lambda c: str(c))
            except Exception:
                obj.columns = [str(c) for c in obj.columns]
            return obj
        # If dict, make keys strings and recurse into values
        if isinstance(obj, dict):
            return {str(k): _stringify(v) for k, v in obj.items()}
        # If list/tuple, recurse into items (keeps order and type)
        if isinstance(obj, (list, tuple)):
            t = type(obj)
            return t(_stringify(v) for v in obj)
        # Leave all other types (including numeric arrays/Series) unchanged
        return obj

    adata.uns['gates'] = _stringify(_gates)


In [None]:
# Rename the single column in adata.uns['gates'] from '6450' to 'gates'
import pandas as pd
_g = adata.uns.get('gates')
if isinstance(_g, pd.DataFrame):
    if _g.shape[1] == 1:
        _g.columns = ['gates']
    else:
        _g.rename(columns={'6450': 'gates', 6450: 'gates'}, inplace=True)
    adata.uns['gates'] = _g


In [None]:
adata.write_h5ad(os.path.join(base_dir, "CODEX_scvi_BioCov_gated.h5ad"))

In [None]:
sm.pl.densityPlot2D(adata, markerA='CD3e', markerB='CD68', layer='log')

In [None]:
manual_gate = adata.uns['gates']
adata = sm.pp.rescale(adata, gate=manual_gate)

In [None]:
phenotype = pd.read_csv(os.path.join(base_dir, 'phenotype_rules.csv'))
adata = sm.tl.phenotype_cells(adata, phenotype=phenotype, label="phenotype")

In [None]:
adata.obs['phenotype'].value_counts()

In [None]:
adata.write_h5ad(os.path.join(base_dir, "CODEX_scvi_BioCov_phenotyped_newDuctal.h5ad"))

In [None]:
marker_ordered = ['GCG', 'INS', 'SST', 'CHGA','PGP9.5','B3TUBB','GAP43','CD44','LGALS3','BCatenin','panCK', 'NaKATPase','CK19','PDPN','CD31', 'CD34','CD20', 'CD8a', 'CD4','CD3e','HLADR', 'CD35',  'CD163','CD68','CD45', 'BActin', 'SMA', 'ColIV', 'VIM', 'KRT14']

In [None]:
row_ordered = ['Alpha cell', 'Beta cell', 'Delta cell', 'Endocrine', 'Neural', 'Acinar', 'Ductal', #'Exocrine', 
               'Lymphatic', 'Blood Vessel', 'Endothelial', 'B cell', 'CD8a Tcell', 'CD4 Tcell', 'T cell', 'APCs', 'Macrophage', 'Immune', 'ECAD+', 'SMA+', 'Structural', 'Unknown']

In [None]:
sm.pl.heatmap(adata, groupBy='phenotype', standardScale='column',  figsize=(15,8), showPrevalence=True, vmin=-2, vmax=2.5, orderRow=row_ordered, orderColumn=marker_ordered, clusterRows=False, clusterColumns=False)

In [None]:
sm.pl.markerCorrelation(adata, figsize=(18,14))

In [None]:

import seaborn as sns
from matplotlib.colors import to_hex

if 'phenotype' in adata.obs:
    try:
        categories = list(adata.obs['phenotype'].cat.categories)
    except Exception:
        categories = sorted(adata.obs['phenotype'].astype(str).unique())
    colors = sns.color_palette('tab20', n_colors=len(categories))
    phenotype_palette = {cat: to_hex(col) for cat, col in zip(categories, colors)}
else:
    raise KeyError("'phenotype' not found in adata.obs")


In [None]:
sc.set_figure_params(dpi=200, dpi_save=300, fontsize=28)
plt.rcParams['legend.markerscale'] = 4

In [None]:
adata.obs['imageid'] = adata.obs['imageid'].astype('category')
adata.obs['Age'] = adata.obs['Age'].astype('category')

In [None]:
adata_sampled = sc.pp.sample(adata, n=55000, rng=42, copy=True)

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(25,18))

sc.pl.umap(
    adata_sampled,
    color='Donor Status',
    ax=axes[0,0],
    show=False,
    frameon=False,     size=40,
    # alpha=0.4, wspace=0.4,
    legend_loc='right margin'
)
sc.pl.umap(
    adata_sampled,
    color='imageid',
    ax=axes[0,1],
    show=False,
    frameon=False,     size=40,
    # alpha=0.4, wspace=0.4,
    legend_loc='right margin'
)
sc.pl.umap(
    adata_sampled,
    color='phenotype',
    ax=axes[1,0],
    show=False,
    palette=phenotype_palette,
    frameon=False,     size=40,
    # alpha=0.4, wspace=0.4,
    legend_loc='right margin'
)
sc.pl.umap(
    adata_sampled,
    color='Gender',
    ax=axes[1,1],
    show=False,
    frameon=False,     size=40,
    # alpha=0.4, wspace=0.4,
    legend_loc='right margin'
)
plt.tight_layout()
plt.show()

In [None]:
sc.set_figure_params(dpi=200, dpi_save=300, fontsize=26)
plt.rcParams['legend.markerscale'] = 3.5

targets = ['Alpha cell', 'Beta cell', 'Delta cell', 'Endocrine']  # replace with your phenotypes
obs_col = 'phenotype'  # column in adata.obs

# Recode: keep the 3 targets, send everything else to "Other"
adata_sampled.obs['Endocrine Plot'] = adata_sampled.obs[obs_col].astype(str).where(
    adata_sampled.obs[obs_col].isin(targets), 'Other'
)

palette = {
    'Alpha cell': '#e41a1c',
    'Beta cell': '#377eb8',
    'Delta cell': '#4daf4a',
    'Endocrine': '#984ea3',
    'Other': '#d3d3d3'  # light grey for everything else
}
targets2 = ['CD8a Tcell', 'CD4 Tcell', 'T cell', 'Macrophage', 'B cell', 'APC','Immune']  # replace with your phenotypes
obs_col = 'phenotype'  # column in adata.obs


adata_sampled.obs['Immune Plot'] = adata_sampled.obs[obs_col].astype(str).where(
    adata_sampled.obs[obs_col].isin(targets2), 'Other'
)

palette2 = {
    'CD8a Tcell': '#e41a1c',
    'CD4 Tcell': '#377eb8',
    'T cell': '#4daf4a',
    'Macrophage': '#984ea3',
    'B cell': '#ff7f00',
    'APC': '#ffff33',
    'Immune': '#a65628',
    'Other': '#d3d3d3'  
}
targets3 = ['Ductal', 'Acinar', 'Structural']  # replace with your phenotypes
obs_col = 'phenotype'  # column in adata.obs


adata_sampled.obs['Exocrine Plot'] = adata_sampled.obs[obs_col].astype(str).where(
    adata_sampled.obs[obs_col].isin(targets3), 'Other'
)

palette3 = {
    'Ductal': '#e41a1c',
    'Acinar': '#377eb8',
    'Structural': '#4daf4a',
    'Other': '#d3d3d3'  
}
targets4 = ['Lymphatic', 'Blood Vessel', 'SMA+', 'Endothelial', 'Neural']  # replace with your phenotypes
obs_col = 'phenotype'  # column in adata.obs


adata_sampled.obs['Vascular/Neural Plot'] = adata_sampled.obs[obs_col].astype(str).where(
    adata_sampled.obs[obs_col].isin(targets4), 'Other'
)

palette4 = {
    'Lymphatic': '#e41a1c',
    'Blood Vessel': '#377eb8',
    'SMA+': '#4daf4a',
    'Endothelial': '#984ea3',
    'Neural': '#ff7f00',
    'Other': '#d3d3d3'  
}
fig, axes = plt.subplots(2, 2, figsize=(20,14))

sc.pl.umap(
    adata_sampled,
    color='Endocrine Plot',
    ax=axes[0, 0],
    show=False,
    palette=palette,
    groups=targets + ['Other'],
    frameon=False, size=30,
    # alpha=0.8, #wspace=0.4,
    legend_loc='right margin'
)
sc.pl.umap(
    adata_sampled,
    color='Immune Plot',
    ax=axes[0, 1],
    show=False,
    palette=palette2,
    groups=targets2 + ['Other'],
    frameon=False, size=30,
    # alpha=0.2, #wspace=0.4,
    legend_loc='right margin'
)
sc.pl.umap(
    adata_sampled,
    color='Exocrine Plot',
    ax=axes[1, 0],
    show=False,
    palette=palette3,
    groups=targets3 + ['Other'],
    frameon=False,     size=30,
    #alpha=0.6, #wspace=0.4,
    legend_loc='right margin'
)
sc.pl.umap(
    adata_sampled,
    color='Vascular/Neural Plot',
    ax=axes[1, 1],
    show=False,
    palette=palette4,
    groups=targets4 + ['Other'],
    frameon=False,     size=30,
    #alpha=0.6, #wspace=0.4,
    legend_loc='right margin'
)
plt.tight_layout()
plt.show()

In [None]:
adata.write_h5ad(os.path.join(base_dir, "CODEX_scvi_BioCov_phenotyped_UMAP.h5ad"))

In [None]:
sm.pl.stacked_barplot (adata,
                       x_axis='imageid',
                       y_axis='phenotype',
                       method='percent',
                       subset_yaxis=['Alpha cell', 'Beta cell', 'Delta cell', 'Endocrine', 
               'CD8a Tcell', 'CD4 Tcell', 'T cell'],
               order_xaxis=['112', '6356', '6479', '6516', '6548', '6450', '6505', '6521', '6538', '6549', '6533', '6534', '6550', '6551', '6563'],
                       figsize=(18,12)
                )

In [None]:
sm.pl.stacked_barplot(adata,
                       x_axis='Donor Status',
                       y_axis='phenotype',
                       figsize=(11,11),
                       subset_yaxis=['Neural', 'Ductal', 
               'Lymphatic', 'Blood Vessel', 'Endothelial'],
               order_xaxis=['ND', 'Aab+', 'T1D'],
                       method='percent')