In [None]:
#Title: Fowler and Zheng et al. Qiu embryo analysis
#Author: Sherry Li Zheng (slzheng@stanford.edu)
#last updated: 2024-01-17

In [None]:
import scanpy as sc
import pandas as pd

In [None]:
#Part 1: whole embryo analysis

In [41]:
#Read in data
adata = sc.read_h5ad("adata_JAX_dataset_1.h5ad") # THIS CONTAINS E8.5 to E10.75 (Fig S1)
                     #adata_JAX_dataset_2.h5ad   # THIS CONTAINS E11.0 (Fig S1)          

In [42]:
#These contain annotations for all the datasets. Annotations by author (Qiu et al.: https://www.biorxiv.org/content/10.1101/2023.04.05.535726v1)
gene_names = pd.read_csv("df_gene.csv")
cell_anno = pd.read_csv("df_cell.csv")

In [43]:
#Annotate the datasets
adata.var_names = gene_names['gene_short_name']
adata.obs_names = adata.obs['cell_id']
adata.obs['cell_type'] = adata.obs_names.map(cell_anno.set_index('cell_id')['celltype_update'])
adata.obs['major_trajectory'] = adata.obs_names.map(cell_anno.set_index('cell_id')['major_trajectory'])

In [44]:
#Slicing adata object to only contain subsets of timepoints:
time_to_keep = ['E8.5'] #insert timepoint of interest here
adata = adata[adata.obs['day'].isin(time_to_keep)]

In [None]:
#Generate whole-embryo UMAPs
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
sc.tl.pca(adata, svd_solver='arpack')
sc.pp.neighbors(adata, n_neighbors=10, n_pcs=40)
sc.tl.umap(adata)
sc.pl.umap(adata, color=['cell_type'])
sc.pl.umap(adata, color=['Gja5'])

In [None]:
#import seaborn as sns

# Combine tab10 and tab20 palettes to get 30 distinct colors
#combined_palette = sns.color_palette("tab10") + sns.color_palette("tab20")

# Repeat the combined palette to get more than 112 distinct colors
#full_palette = combined_palette * (112 // len(combined_palette) + 1)

# Slice the palette to get exactly 112 colors
#final_palette = full_palette[:112]

# Assuming adata is your AnnData object
#sc.pl.umap(adata, color='cell_type')

In [None]:
#Part 2: cluster analysis

In [None]:
#Read in data
adata = sc.read_h5ad("adata_JAX_dataset_1.h5ad") # THIS CONTAINS E8.5 to E10.75 (Fig S1)
                     #adata_JAX_dataset_2.h5ad   # THIS CONTAINS E11.0 (Fig S1)          

In [None]:
#These contain annotations for all the datasets. Annotations by author (Qiu et al.: https://www.biorxiv.org/content/10.1101/2023.04.05.535726v1)
gene_names = pd.read_csv("df_gene.csv")
cell_anno = pd.read_csv("df_cell.csv")

In [None]:
#Annotate the datasets
adata.var_names = gene_names['gene_short_name']
adata.obs_names = adata.obs['cell_id']
adata.obs['cell_type'] = adata.obs_names.map(cell_anno.set_index('cell_id')['celltype_update'])
adata.obs['major_trajectory'] = adata.obs_names.map(cell_anno.set_index('cell_id')['major_trajectory'])

In [None]:
#Slicing adata object to only contain subsets of timepoints or celltypes:
time_to_keep = ['E8.5'] #insert timepoint of interest here
adata = adata[adata.obs['day'].isin(time_to_keep)]
annotations_to_keep = ['Arterial endothelial cells'] #insert cluster(s) of interest here
adata = adata[adata.obs['cell_type'].isin(annotations_to_keep)]

In [None]:
#Generate UMAPs
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
sc.tl.pca(adata, svd_solver='arpack')
sc.pp.neighbors(adata, n_neighbors=10, n_pcs=40)
sc.tl.umap(adata)
sc.pl.umap(adata, color=['Gja5'])