In [None]:
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.ticker import LinearLocator
from matplotlib.colors import LinearSegmentedColormap
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.axes_grid1 import make_axes_locatable
import random
import os
import sys
import time
import csv
import re
import pandas as pd
import scanpy as sc
import numpy as np
import scipy.spatial as scisp
from scipy.sparse import coo_matrix, csr_matrix, csc_matrix
import math
import anndata as ad
import igraph as ig
import plotly.graph_objects as go
import scanpy.external as sce
import scipy.sparse as sp
from statsmodels.nonparametric.smoothers_lowess import lowess
from sklearn.metrics import r2_score
from scipy.interpolate import interp1d
import seaborn as sns
from copy import copy
import matplotlib as mpl
reds = copy(mpl.cm.Reds)
reds.set_under("lightgray")

# Cardiomyocytes

## Preprocess + PCA

In [None]:
adata = sc.read_h5ad('chicken_qc_processed_rohitadd_cardi.h5ad')
adata.X = adata.layers['count'].copy()
print(np.min(adata.X), np.max(adata.X))

sc.pp.normalize_total(adata, target_sum=1e4) 
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, n_top_genes=2000)

sc.pp.scale(adata)
sc.tl.pca(adata)
sc.pl.pca_variance_ratio(adata, n_pcs=50, log=True,save = '_pca_cardi.png')

adata.write_h5ad('chicken_qc_processed_rohitadd_cardi.h5ad')

## Clustering

In [None]:
sc.pp.neighbors(adata, n_pcs=20)
sc.tl.umap(adata)
sc.tl.leiden(adata, resolution = 0.7, key_added='leiden_0.7')

## DEG and Annotation

In [None]:
# First round
sc.tl.rank_genes_groups(adata, groupby="leiden_0.5", method="wilcoxon",n_genes=5)
rank_genes_df = sc.get.rank_genes_groups_df(adata, group = adata.obs['leiden_0.5'].unique())
cell_types_markers = rank_genes_df['names'].tolist()
sc.pl.dotplot(adata, cell_types_markers, groupby='leiden_0.5', dendrogram=False)
fb1 = {'8','1','10','5','6'}
fb2 = {'3','11','7'}
fb3 = {'2'}
fb4 = {'4'}
fb5 = {'0'}
fb6 = {'9'}
conditions = [
    adata.obs['leiden_0.5'].astype(str).isin(fb1),
    adata.obs['leiden_0.5'].astype(str).isin(fb2),
    adata.obs['leiden_0.5'].astype(str).isin(fb3),
    adata.obs['leiden_0.5'].astype(str).isin(fb4),
    adata.obs['leiden_0.5'].astype(str).isin(fb5),
    adata.obs['leiden_0.5'].astype(str).isin(fb6)
]
choices = ['Cardi1', 'Cardi2','Cardi3','Cardi4','Cardi5','Cardi6']
adata.obs['sub_celltype_v1'] = np.select(conditions, choices, default='Cardi')

# Second round
adata.obs['sub_celltype_v2'] = adata.obs['sub_celltype_v1'].copy()
leiden_str = adata.obs['leiden_0.5'].astype(str)
conditions = [
    leiden_str == '8',
    leiden_str == '1',
    leiden_str == '10',
    leiden_str == '5',
    leiden_str == '6',
]
choices = ['Cardi1_1', 'Cardi1_2', 'Cardi1_2','Cardi1_3','Cardi1_4']
adata.obs['sub_celltype_v2'] = np.select(
    conditions, 
    choices, 
    default=adata.obs['sub_celltype_v2'].astype(str)  
)

# Third round
sc.tl.leiden(adata, resolution = 0.7, key_added='leiden_0.7')
sc.tl.rank_genes_groups(adata, groupby="leiden_0.7", method="wilcoxon",n_genes=5)
rank_genes_df = sc.get.rank_genes_groups_df(adata, group = adata.obs['leiden_0.7'].unique())
cell_types_markers = rank_genes_df['names'].tolist()
sc.pl.dotplot(adata, cell_types_markers, groupby='leiden_0.7', dendrogram=False)
adata.obs['sub_celltype_v3'] = adata.obs['sub_celltype_v2'].copy()
leiden_str = adata.obs['leiden_0.7'].astype(str)
conditions = [
    leiden_str == '5',
    leiden_str == '11'
]
choices = ['Cardi3_2', 'Cardi4_2']
adata.obs['sub_celltype_v3'] = np.select(
    conditions, 
    choices, 
    default=adata.obs['sub_celltype_v2'].astype(str)  # Append '_0' to unmatched values
)
adata.obs.loc[adata.obs['sub_celltype_v3'] == 'Cardi3', 'sub_celltype_v3'] = 'Cardi3_1'
adata.obs.loc[adata.obs['sub_celltype_v3'] == 'Cardi4', 'sub_celltype_v3'] = 'Cardi4_1'

# Final annotaion
mapping = {'Cardi1_1': 'vCM-LAL',
 'Cardi1_2': 'vCM-compact',
 'Cardi1_3': 'vCM-inner',
 'Cardi1_4': 'vCM_pro',
 'Cardi2': 'vCM-immature',
 'Cardi3_1': 'aCM-immature',
 'Cardi3_2': 'aCM-mature',
 'Cardi4_1': 'CM1-1',
 'Cardi4_2': 'CM1-2',
 'Cardi5': 'CM2',
 'Cardi6': 'AVC'}
adata.obs['sub_celltype_v3_fin'] = adata.obs['sub_celltype_v3'].map(mapping)

# Fibroblasts

## Preprocess + PCA

In [None]:
adata = sc.read_h5ad('chicken_qc_processed_rohitadd_fib.h5ad')
adata.X = adata.layers['count'].copy()
print(np.min(adata.X), np.max(adata.X))

sc.pp.normalize_total(adata, target_sum=1e4) 
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, n_top_genes=2000)

sc.pp.scale(adata)
sc.tl.pca(adata)
sc.pl.pca_variance_ratio(adata, n_pcs=50, log=True,save = '_pca_fib.png')

adata.write_h5ad('chicken_qc_processed_rohitadd_fib.h5ad')

## Clustering

In [None]:
sc.pp.neighbors(adata, n_pcs=20)
sc.tl.umap(adata)
sc.tl.leiden(adata, resolution = 0.7, key_added='leiden_0.3')

## DEG and Annotation

In [None]:
# First round
fb1 = {'2','3'}
fb2 = {'4','1'}
conditions = [
    adata.obs['leiden_0.3'].astype(str).isin(fb1),
    adata.obs['leiden_0.3'].astype(str).isin(fb2)
]
choices = ['FB1', 'FB2']
adata.obs['sub_celltype_v1'] = np.select(conditions, choices, default='FB3')
sc.tl.rank_genes_groups(adata, groupby="sub_celltype_v1", method="wilcoxon",n_genes=5)
rank_genes_df = sc.get.rank_genes_groups_df(adata, group = adata.obs['sub_celltype_v1'].unique())
rank_genes_df.to_csv('fib/chicken_qc_processed_rohitadd_fib_deg_top5_subv1.csv')

# Second round
adata.obs['sub_celltype_v2'] = adata.obs['sub_celltype_v1'].copy()
leiden_str = adata.obs['leiden_0.3'].astype(str)
conditions = [
    leiden_str == '3',
    leiden_str == '2',
    leiden_str == '1',
    leiden_str == '4'
]
choices = ['FB1_1', 'FB1_2', 'FB2_2', 'FB2_1']
adata.obs['sub_celltype_v2'] = np.select(
    conditions, 
    choices, 
    default=adata.obs['sub_celltype_v2'].astype(str) + '_0' )

# Third round
sc.tl.leiden(adata, resolution = 0.6, key_added='leiden_0.6')
sc.tl.rank_genes_groups(adata, groupby="leiden_0.6", method="wilcoxon",n_genes=5)
rank_genes_df = sc.get.rank_genes_groups_df(adata, group = adata.obs['leiden_0.6'].unique())
cell_types_markers = rank_genes_df['names'].tolist()
adata.obs['sub_celltype_v3'] = adata.obs['sub_celltype_v2'].copy()
leiden_str = adata.obs['leiden_0.6'].astype(str)
conditions = [
    leiden_str == '4',
    leiden_str == '0'
]
choices = ['FB1_3', 'FB1_2']
adata.obs['sub_celltype_v3'] = np.select(
    conditions, 
    choices, 
    default=adata.obs['sub_celltype_v2'].astype(str)  
)

# Final annotaion
mapping = {'FB1_1': 'FN1+ Valve',
 'FB1_2': 'Valve-mature',
 'FB1_3': 'Valve-immature',
 'FB2_1': 'myoFB-1',
 'FB2_2': 'myoFB-2',
 'FB3_0': 'FB'}
adata.obs['sub_celltype_v3_fin'] = adata.obs['sub_celltype_v3'].map(mapping)

# Epithelial 

## Preprocess + PCA

In [None]:
adata = sc.read_h5ad('chicken_qc_processed_rohitadd_epi.h5ad')
adata.X = adata.layers['count'].copy()
print(np.min(adata.X), np.max(adata.X))

sc.pp.normalize_total(adata, target_sum=1e4) 
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, n_top_genes=2000)

sc.pp.scale(adata)
sc.tl.pca(adata)
sc.pl.pca_variance_ratio(adata, n_pcs=50, log=True,save = '_pca_epi.png')

adata.write_h5ad('chicken_qc_processed_rohitadd_epi.h5ad')

## Clustering 

In [None]:
sc.pp.neighbors(adata, n_pcs=20)
sc.tl.umap(adata)
sc.tl.leiden(adata, resolution = 0.5, key_added='leiden_0.5')

## DEG + Annotation

In [None]:
# First round
fb1 = {'7','1'}
fb2 = {'4','2'}
fb3 = {'0','5'}
fb4 = {'6'}
fb5 = {'8'}
conditions = [
    adata.obs['leiden_0.5'].astype(str).isin(fb1),
    adata.obs['leiden_0.5'].astype(str).isin(fb2),
    adata.obs['leiden_0.5'].astype(str).isin(fb3),
    adata.obs['leiden_0.5'].astype(str).isin(fb4),
    adata.obs['leiden_0.5'].astype(str).isin(fb5)
]
choices = ['Epi2', 'Epi4','Epi5','Epi1','Erythrocytes']
adata.obs['sub_celltype_v1'] = np.select(conditions, choices, default='Epi3')
sc.tl.rank_genes_groups(adata, groupby="sub_celltype_v1", method="wilcoxon",n_genes=5)
rank_genes_df = sc.get.rank_genes_groups_df(adata, group = adata.obs['sub_celltype_v1'].unique())

# Second round
adata.obs['sub_celltype_v2'] = adata.obs['sub_celltype_v1'].copy()
leiden_str = adata.obs['leiden_0.5'].astype(str)
conditions = [
    leiden_str == '7',
    leiden_str == '1',
    leiden_str == '0',
    leiden_str == '5'
]
choices = ['Epi2_1', 'Epi2_2', 'Epi5_2', 'Epi5_1']
adata.obs['sub_celltype_v2'] = np.select(
    conditions, 
    choices, 
    default=adata.obs['sub_celltype_v2'].astype(str) + '_0'  # Append '_0' to unmatched values
)

# Third round
adata.obs['sub_celltype_v3'] = adata.obs['sub_celltype_v2']

# Final annotation
mapping = {'Epi1_0': 'Epi-mature',
 'Epi2_1': 'KRT7+ EpiCM',
 'Epi2_2': 'Epi-immature',
 'Epi3_0': 'KRT7- EpiCM',
 'Epi4_0': 'Pericytes',
 'Epi5_1': 'EpiM-1',
 'Epi5_2': 'EpiM-2',
 'Erythrocytes_0': 'Erythrocytes_0'}
adata.obs['sub_celltype_v3_fin'] = adata.obs['sub_celltype_v3'].map(mapping)

# Endothelial cells

## Preprocess + PCA

In [None]:
adata = sc.read_h5ad('chicken_qc_processed_rohitadd_endo.h5ad')
adata.X = adata.layers['count'].copy()
print(np.min(adata.X), np.max(adata.X))

sc.pp.normalize_total(adata, target_sum=1e4) 
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, n_top_genes=2000)

sc.pp.scale(adata)
sc.tl.pca(adata)
sc.pl.pca_variance_ratio(adata, n_pcs=50, log=True,save = '_pca_endo.png')

adata.write_h5ad('chicken_qc_processed_rohitadd_endo.h5ad')

## Clustering

In [None]:
sc.pp.neighbors(adata, n_pcs=20)
sc.tl.umap(adata)
sc.tl.leiden(adata, resolution = 0.5, key_added='leiden_0.5')

## DEG + Annotation

In [None]:
# First round
sc.tl.rank_genes_groups(adata, groupby="leiden_0.5", method="wilcoxon",n_genes=5)
rank_genes_df = sc.get.rank_genes_groups_df(adata, group = adata.obs['leiden_0.5'].unique())
fb1 = {'3', '7', '8','10','12'}
fb2 = {'2','11','5','9'}
conditions = [
    adata.obs['leiden_0.5'].astype(str).isin(fb1),
    adata.obs['leiden_0.5'].astype(str).isin(fb2)
]
choices = ['Endo1', 'Endo2']
adata.obs['sub_celltype_v1'] = np.select(conditions, choices, default='Endo3')

# Second round
adata.obs['sub_celltype_v2'] = adata.obs['sub_celltype_v1'].copy()
leiden_str = adata.obs['leiden_0.5'].astype(str)
conditions = [
    leiden_str == '3',
    leiden_str == '8',
    leiden_str == '7',
    leiden_str == '10',
    leiden_str == '5',
    leiden_str == '6',
    leiden_str.isin( ['9','2','11','12'])
]
choices = ['Endo1_1', 'Endo1_2', 'Endo1_3', 'Endo1_4', 'Endo2_1', 'Endo3_1','Endo2_2']
adata.obs['sub_celltype_v2'] = np.select(
    conditions, 
    choices, 
    default=adata.obs['sub_celltype_v2'].astype(str) + '_0' 
)

# Third round
adata.obs['sub_celltype_v3'] = adata.obs['sub_celltype_v2'].copy()
leiden_str = adata.obs['leiden_0.7'].astype(str)
conditions = [
    leiden_str == '0',
    leiden_str == '1',
    leiden_str == '9',
    leiden_str == '2'
]
choices = ['Endo3_1', 'Endo_pro', 'Endo3_3','Endo3_4']
adata.obs['sub_celltype_v3'] = np.select(
    conditions, 
    choices, 
    default=adata.obs['sub_celltype_v2'].astype(str)  
)

# Final annotation
mapping = {'Endo1_1': 'VEC-1',
 'Endo1_2': 'VEC-2',
 'Endo1_3': 'VEC-3',
 'Endo1_4': 'VEC-4',
 'Endo2_1': 'TBX20+ Endo-mature',
 'Endo2_2': 'TBX20+ Endo-immature',
 'Endo3_1': 'FN1+ Endo',
 'Endo3_2': 'FN1+ Endo-LAL',
 'Endo3_3': 'FN1+ Endo-immature',
 'Endo3_4': 'FN1+ Endo-mature',
 'Endo_pro': 'Endo-pro'}
adata.obs['sub_celltype_v3_fin'] = adata.obs['sub_celltype_v3'].map(mapping)