In [None]:
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.ticker import LinearLocator
from matplotlib.colors import LinearSegmentedColormap
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.axes_grid1 import make_axes_locatable
import random
import os
import sys
import time
import csv
import re
import pandas as pd
import scanpy as sc
import numpy as np
import scipy.spatial as scisp
from scipy.sparse import coo_matrix, csr_matrix, csc_matrix
import math
import anndata as ad
import igraph as ig
import plotly.graph_objects as go
import scanpy.external as sce
import scipy.sparse as sp
from statsmodels.nonparametric.smoothers_lowess import lowess
from sklearn.metrics import r2_score
from scipy.interpolate import interp1d
import seaborn as sns
from copy import copy
import matplotlib as mpl
reds = copy(mpl.cm.Reds)
reds.set_under("lightgray")

# Preprocess

In [None]:
# raw data
adata = sc.read_h5ad('/fs/cbsuvlaminck2/workdir/ra574/slideseq/Parse_Chick_Heart/DGE_unfiltered.h5ad')

# Doublet removing, qc metrics
sce.pp.scrublet(adata,n_prin_comps=6) 
sce.pl.scrublet_score_distribution(adata) 
adata= adata[adata.obs['doublet_score']<0.25] 
mito_genes = ("ND1", "ND2", "COX1", "COII", "ATP8", "ATP6", "COX3", "ND3", "ND4L", "ND4", "ND5", "CYTB", "ND6") 
adata.var['mt'] = [x in mito_genes for x in adata.var_names]
sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True) 
adata = adata[(adata.obs.n_genes_by_counts < 10000) & (adata.obs.n_genes_by_counts > 300), :] 
adata = adata[adata.obs.pct_counts_mt < 10, :] 

# Add raw count layer
adata.layers['count'] = adata.X

# Normalization
sc.pp.normalize_total(adata, target_sum=1e4) 
sc.pp.log1p(adata)
adata.layers['lognorm'] = adata.X

# Add metadata

In [None]:
adata.obs['site'] = np.where(
    adata.obs['sample'].str.contains('RAL', na=False), 'RAL',
    np.where(
        adata.obs['sample'].str.contains('LAL', na=False), 'LAL',
        'Sham'
    )
)
adata.obs['day'] = adata.obs['sample'].str.replace('_RAL|_LAL|_Sham', '', regex=True)

# PCA

In [None]:
sc.pp.highly_variable_genes(adata, n_top_genes=2000)
sc.pp.scale(adata)
sc.tl.pca(adata)
sc.pl.pca_variance_ratio(adata, n_pcs=50, log=True)

# Clustering

In [None]:
sc.pp.neighbors(adata, n_pcs=30)
sc.tl.umap(adata)
sc.tl.leiden(adata, resolution = 0.7, key_added='leiden_0.7')

# Global Annotation

In [None]:
import os
import scanpy as sc
import matplotlib.pyplot as plt

# Figure params
reds = "Reds"
ncols = 6
nrows = 9  
figsize = 5
wspace = 0

fig, axs = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * figsize + figsize * wspace * (ncols - 1), nrows * figsize),
)

plt.subplots_adjust(wspace=wspace)

# Marker genes
cell_types_markers = [
    # Epicardial cells
    ['TBX18', 'ALDH1A2', 'WT1', 'TGFB2', 'TCF21', None],
    # Mural cells
    ['RGS5','MYH11','TAGLN','ACTA2',None,None],
    # Cardiomyocytes cells
    ['TNNT2', 'MYH7', 'ACTN2', 'TTN', 'ACTC1', None],
    # Endothelial cells
    ['PECAM1', 'CD34', 'CDH5', 'TEK', 'KDR', 'ACE'],
    # Valve cells
    ['MSX1','SOX9'],
    # Erythrocytes
    ['HBBA', 'HBAD', 'HBA1', None, None, None],
    # Macrophages
    ['MERTK', 'CSF1R', 'PTPRC', None, None, None],
    # Fibroblasts
    ['COL1A1', 'PDGFRA', 'COL3A1', None, None, None],
    # Myocardial
    ['TNNC1','TNNT2',None,None,None,None]
]

# Plot marker gene exp
for i, markers in enumerate(cell_types_markers):
    for j, marker in enumerate(markers):
        if marker is not None:
            # Plot each marker individually for the current cell type
            sc.pl.umap(
                adata,
                color=[marker],  # Plot each marker individually
                cmap=reds,
                legend_loc="on data",
                ax=axs[i, j],  # Specify the axis for each marker
                show=False
            )

for j in range(len(cell_types_markers) * 6, len(axs.flat)):
    fig.delaxes(axs.flat[j])
plt.tight_layout()
plt.savefig('/workdir/jp2626/chicken/figures/chicken_sc_global_markergenexp.png', format='png', dpi=300)
plt.show()


In [None]:
# Define categories
fibroblasts = {'3', '4', '5'}
endothelial = {'1','10','11'}
cardiomyocytes = {'0','2','7','12'}
macrophages = {'15', '20'}
parathyroid = {'19'}
neural = {'16','17'}
epi = {'6','8','9','14'}
mural = {'18'}


# Convert to string and apply conditions
conditions = [
    adata.obs['leiden_0.7'].astype(str).isin(fibroblasts),
    adata.obs['leiden_0.7'].astype(str).isin(endothelial),
    adata.obs['leiden_0.7'].astype(str).isin(cardiomyocytes),
    adata.obs['leiden_0.7'].astype(str).isin(macrophages),
    adata.obs['leiden_0.7'].astype(str).isin(parathyroid),
    adata.obs['leiden_0.7'].astype(str).isin(neural),
    adata.obs['leiden_0.7'].astype(str).isin(epi),
    adata.obs['leiden_0.7'].astype(str).isin(mural)
]
choices = ['Fibroblasts', 'Endothelial cells', 'Cardiomyocytes', 'Macrophages', 'Parathyroid cells', 'Neural cells','Epicardial cells','Mural cells']

adata.obs['global_celltype'] = np.select(conditions, choices, default='Erythrocytes')

# Subset Data

In [None]:
adata_fib = adata[adata.obs['global_celltype'] == 'Fibroblasts'].copy()
adata_fib.write_h5ad('./chicken_qc_processed_rohitadd_fib.h5ad')
adata_cardi = adata[adata.obs['global_celltype'] == 'Cardiomyocytes'].copy()
adata_cardi.write_h5ad('./chicken_qc_processed_rohitadd_cardi.h5ad')
adata_endo = adata[adata.obs['global_celltype'] == 'Endothelial cells'].copy()
adata_endo.write_h5ad('./chicken_qc_processed_rohitadd_endo.h5ad')
adata_endo = adata[adata.obs['global_celltype'] == 'Epicardial cells'].copy()
adata_endo.write_h5ad('./chicken_qc_processed_rohitadd_epi.h5ad')

# Transfer subtype annotation

In [None]:
epi = pd.read_csv('./epi/meta_fin.csv',index_col =  'Unnamed: 0')
fib = pd.read_csv('./fib/meta_fin.csv',index_col =  'Unnamed: 0')
endo = pd.read_csv('./endo/meta_fin.csv',index_col =  'Unnamed: 0')
cardi = pd.read_csv('./cardi/meta_fin.csv',index_col =  'Unnamed: 0')

for meta in [epi, fib, endo, cardi]:
    common_indices = meta.index.intersection(adata.obs.index) 
    adata.obs.loc[common_indices, 'sub_celltype_fin'] = meta.loc[common_indices, 'sub_celltype_v3_rename_fin'].astype(str).str.replace('_0', '', regex=False)
