In [2]:
import scanpy as sc
import anndata as ad
import pandas as pd
import numpy as np

In [None]:
adata = sc.read_csv("/slurm/home/yrd/liaolab/wangtianhao/data/embryo/data_clean_sub_v2_mnn.csv").T
meta = pd.read_csv("/slurm/home/yrd/liaolab/wangtianhao/data/embryo/meta_clean_sub_v2_mnn.csv")
reduction = pd.read_csv("/slurm/home/yrd/liaolab/wangtianhao/data/embryo/umap_clean_sub_v2_mnn.csv")
umap = pd.read_csv("/slurm/home/yrd/liaolab/wangtianhao/data/embryo/reduction_clean_sub_v2_mnn.csv")

In [5]:
meta.set_index("Unnamed: 0", inplace=True)
umap.set_index("Unnamed: 0", inplace=True)
reduction.set_index("Unnamed: 0", inplace=True)

In [7]:
adata.obs = meta
adata.obsm["mnn"] = np.array(reduction)
adata.obsm["X_umap"] = np.array(umap)

In [12]:
sc.set_figure_params(dpi=300, color_map='viridis')  # low dpi (dots per inch) yields small inline figures
sc.settings.verbosity = 3  # verbosity: errors (0), warnings (1), info (2), hints (3)
#sc.logging.print_versions()

In [13]:
sc.pp.neighbors(adata, n_pcs=50, use_rep="mnn", knn=15)

computing neighbors


  from .autonotebook import tqdm as notebook_tqdm


    finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:00:11)


In [None]:
adata.obs["stage"] = pd.Categorical(adata.obs["stage"])
adata.obs["stage"] = adata.obs["stage"].cat.reorder_categories(["Zygote","E1","E2","E3","E4","E5","E6","E7","E8","E9","E10","E12","E14","CS7","CS8_CytOrgin","CS9","CS10","CS11","CS12","CS13-14","CS15-16"])
adata.obs["stage_celltype"] = adata.obs["cell_types_final_ordered"].str.cat(adata.obs["stage"], sep="_")
adata.obs["stage_celltype"] = ["_".join(i.split(" ")) for i in adata.obs["stage_celltype"]]
adata.obs["stage_celltype"] = pd.Categorical(adata.obs["stage_celltype"])

In [14]:
sc.tl.paga(adata, groups='stage_celltype', model='v1.0')

running PAGA
    finished: added
    'paga/connectivities', connectivities adjacency (adata.uns)
    'paga/connectivities_tree', connectivities subtree (adata.uns) (0:00:00)


In [15]:
tree = pd.DataFrame(adata.uns["paga"]["connectivities"].todense())

In [16]:
tree.columns = adata.obs["stage_celltype"].cat.categories
tree.index = adata.obs["stage_celltype"].cat.categories

In [42]:
tree.to_csv("~/data/embryo/tree.csv")

In [None]:
import matplotlib as mpl
import numpy as np
import matplotlib.pyplot as plt
from matplotlib_venn import venn3, venn3_circles
import warnings

plt.rcParams['font.family'] = 'Arial'

warnings.filterwarnings("ignore")

In [None]:
features = ["Zygote","E1","E2","E3","E4","E5","E6","E7","E8","E9","E10","E12","E14","CS7","CS8_CytOrgin","CS9","CS10","CS11","CS12","CS13-14","CS15-16"]
lscmap = mpl.colormaps.get_cmap("RdBu")
features_hex_list = [mpl.colors.to_hex(lscmap(i)) for i in np.linspace(0, 1, len(features))]
features_colors = {i: j for i, j in zip(features, features_hex_list)}

In [3]:
from pycirclize import Circos
from pycirclize.utils import ColorCycler
import numpy as np
import pandas as pd
np.random.seed(0)

In [None]:
TF_data = pd.read_csv("/slurm/home/yrd/liaolab/wangtianhao/data/embryo/Morphogen_data_avg.csv", index_col=0)
#genes = ["GATA6", "GATA3", "NANOG", "SOX17", "TBXT", "EOMES", "FOXA2", "PAX6", "OTX2", "SOX2"]
#genes.reverse() # for morphogens
genes = ['BMP2', 'BMP4', 'BMP7', 'NODAL', 'WNT3', 'WNT6', 'FGF2', 'FGF8', 'SHH', 'ALDH1A2']
celltypes = TF_data["celltype"]
groups = TF_data["group"].unique().tolist()

mean_exp_datas, qv_datas = [], []
for group in groups:
    group_data = TF_data[TF_data["group"] == group]
    group_exp = group_data[genes]
    group_exp.index = group_data["celltype"]
    mean_exp_datas.append(group_exp)
    #
    #qv_datas.append(group_qvals)group_qvals = group_data["qval"].values
print("mean_exp_datas shapes:", [data.shape for data in mean_exp_datas])
#print("qv_datas lengths:", [len(qvs) for qvs in qv_datas])

mean_exp_datas shapes: [(1, 10), (1, 10), (1, 10), (1, 10), (3, 10), (6, 10), (4, 10), (4, 10), (5, 10), (4, 10), (4, 10), (6, 10), (6, 10), (11, 10), (13, 10), (16, 10), (18, 10), (30, 10), (21, 10), (22, 10), (21, 10)]


In [None]:
sectors = {"Zygote": mean_exp_datas[0].shape[0],
           "E1": mean_exp_datas[1].shape[0],
           "E2": mean_exp_datas[2].shape[0],
           "E3": mean_exp_datas[3].shape[0],
           "E4": mean_exp_datas[4].shape[0],
           "E5": mean_exp_datas[5].shape[0],
           "E6": mean_exp_datas[6].shape[0],
           "E7": mean_exp_datas[7].shape[0],
           "E8": mean_exp_datas[8].shape[0],
           "E9": mean_exp_datas[9].shape[0],
           "E10": mean_exp_datas[10].shape[0],
           "E12": mean_exp_datas[11].shape[0],
           "E14": mean_exp_datas[12].shape[0],
           "CS7": mean_exp_datas[13].shape[0],
           "CS8_CytOrgin": mean_exp_datas[14].shape[0],
           "CS9": mean_exp_datas[15].shape[0],
           "CS10": mean_exp_datas[16].shape[0],
           "CS11": mean_exp_datas[17].shape[0],
           "CS12": mean_exp_datas[18].shape[0],
           "CS13-14": mean_exp_datas[19].shape[0],
           "CS15-16": mean_exp_datas[20].shape[0]}
circos = Circos(sectors, space=2, start=30, end=360, endspace=False)
circos.textprops = {
    "color": "black",
    "weight": "normal",
    #"family": "monospace",
    "ha": "center",
    "va": "center",
    "fontsize": 2}

#vmin_qv, vmax_qv = 1e-20, 1e-10
vmin_exp, vmax_exp = 0, 1.2

for i, sector in enumerate(circos.sectors):
    start_col = 205
    track_exp = sector.add_track((start_col-25, start_col))
    track_exp.axis()
    sub_exp_data = mean_exp_datas[i].T
    sub_exp_data[sub_exp_data>=1] = 1
    track_exp.heatmap(sub_exp_data, vmin=vmin_exp, vmax=vmax_exp, cmap="OrRd", text_kws=dict(size=10), rect_kws=dict(ec="white", lw=0)) # GnBu OrRd
    x = np.linspace(1, int(track_exp.size), int(track_exp.size)) - 0.5
    xlabels = list(sub_exp_data.columns)
    track_exp.xticks(x, xlabels, outer=True, label_size=10, label_orientation="vertical")
    if i + 1 == len(sectors):
        track_exp.yticks([9.5, 8.5, 7.5, 6.5, 5.5, 4.5, 3.5, 2.5, 1.5, 0.5], genes, label_size=10, vmin=0, vmax=10)
        
    
    
    #track_qv = sector.add_track((start_col-25, start_col-22.5))
    #x = np.linspace(1, int(track_qv.size), int(track_qv.size)) - 0.5
    #y = np.ones(shape=(int(track_qv.size),)) - 0.5
    #c = np.asarray(qv_datas[i])
    #track_qv.scatter(x, y, s=15, marker="D", cmap="gist_heat_r")
    #if i + 1 == len(sectors):
        #track_qv.yticks([1], ["qval"], vmin=0, vmax=1)
    track_label = sector.add_track((start_col-30, start_col-25))
    track_label.axis(fc=features_colors[sector.name], alpha=0.4)
    track_label.text(sector.name, color="black", size=10, r=start_col-27.5)
circos.colorbar(bounds=(0.9, 0.6, 0.02, 0.2), label="Expression\nfraction", label_kws={"labelpad": -60,"size": 10,"ha": "center", "va": "top"}, vmin=vmin_exp, vmax=vmax_exp, orientation="vertical", cmap="RdBu_r")
#circos.colorbar(bounds=(0.9, 0.2, 0.02, 0.2), label="qval", label_kws={"labelpad": -50,"size": 10,"ha": "left"}, vmin=vmin_qv, vmax=vmax_qv, orientation="vertical", cmap="gist_heat_r")
fig = circos.plotfig()
plt.show()

In [None]:
fig.savefig("/slurm/home/yrd/liaolab/wangtianhao/data/embryo/circos_plot_outer.pdf",
            bbox_inches="tight",
            dpi=100,
            facecolor="white")
plt.close()

In [13]:
data = pd.read_csv("/slurm/home/yrd/liaolab/wangtianhao/data/embryo/ratio_data.csv", index_col=0)
data = data

In [None]:
batches = data.columns.to_list()[:-2]
celltypes = data["celltype"]
groups = data["group"].unique().tolist()

mean_exp_datas, qv_datas =[], []
for group in groups:
    group_data = data[data["group"] == group]
    group_exp = np.array(group_data[batches])
    mean_exp_datas.append(group_exp)
    #
    #qv_datas.append(group_qvals)group_qvals = group_data["qval"].values
print("mean_exp_datas shapes:", [len(data) for data in mean_exp_datas])
#print("qv_datas lengths:", [len(qvs) for qvs in qv_datas])

In [None]:
sectors = {"Zygote": mean_exp_datas[0].shape[0],
           "E1": mean_exp_datas[1].shape[0],
           "E2": mean_exp_datas[2].shape[0],
           "E3": mean_exp_datas[3].shape[0],
           "E4": mean_exp_datas[4].shape[0],
           "E5": mean_exp_datas[5].shape[0],
           "E6": mean_exp_datas[6].shape[0],
           "E7": mean_exp_datas[7].shape[0],
           "E8": mean_exp_datas[8].shape[0],
           "E9": mean_exp_datas[9].shape[0],
           "E10": mean_exp_datas[10].shape[0],
           "E12": mean_exp_datas[11].shape[0],
           "E14": mean_exp_datas[12].shape[0],
           "CS7": mean_exp_datas[13].shape[0],
           "CS8_CytOrgin": mean_exp_datas[14].shape[0],
           "CS9": mean_exp_datas[15].shape[0],
           "CS10": mean_exp_datas[16].shape[0],
           "CS11": mean_exp_datas[17].shape[0],
           "CS12": mean_exp_datas[18].shape[0],
           "CS13-14": mean_exp_datas[19].shape[0],
           "CS15-16": mean_exp_datas[20].shape[0]}
circos = Circos(sectors, space=2, start=30, end=360, endspace=False)
circos.textprops = {
    "color": "black",
    "weight": "normal",
    #"family": "monospace",
    "ha": "center",
    "va": "center",
    "fontsize": 2}
vmin, vmax = 0, 1
number_of_batch = 55
for i, sector in enumerate(circos.sectors):
    start_col = 100
    
    sub_exp_data = mean_exp_datas[i].T
    x = np.linspace(sector.start + 0.5, sector.end - 0.5, int(sector.size))
    
    # Plot stacked bar with user-specified params
    track2 = sector.add_track((95, 100))
    track2.axis()
    #track2.xticks_by_interval(1, outer=False)

    ColorCycler.set_cmap("tab20c")
    tab10_colors = [ColorCycler() for _ in range(number_of_batch)]
    bottom = np.zeros(len(x))
    for j in range(number_of_batch):
        y = sub_exp_data[j,:]
        track2.bar(x, y, bottom=bottom, width=1.0, color=tab10_colors[j], ec="white", lw=0.1, vmax=vmax)
        bottom += y

fig = circos.plotfig()
plt.show()

In [16]:
fig.savefig("/slurm/home/yrd/liaolab/wangtianhao/data/embryo/circos_plot_inner.pdf",
            bbox_inches="tight",
            dpi=100,
            facecolor="white")
plt.close()

In [17]:
data = pd.read_csv("/slurm/home/yrd/liaolab/wangtianhao/data/embryo/stage_ct_ratio_data.csv", index_col=0)

In [None]:
batches = data.columns.to_list()[:-2]
celltypes = data["celltype"]
groups = data["group"].unique().tolist()

mean_exp_datas, qv_datas =[], []
for group in groups:
    group_data = data.loc[data["group"] == group, group].to_list()
    mean_exp_datas.append(group_data)
    #
    #qv_datas.append(group_qvals)group_qvals = group_data["qval"].values
print("mean_exp_datas shapes:", [len(data) for data in mean_exp_datas])
#print("qv_datas lengths:", [len(qvs) for qvs in qv_datas])

In [None]:
# celltype ratio of each celltype in the corresponding stage
sectors = {"Zygote": len(mean_exp_datas[0]),
           "E1": len(mean_exp_datas[1]),
           "E2": len(mean_exp_datas[2]),
           "E3": len(mean_exp_datas[3]),
           "E4": len(mean_exp_datas[4]),
           "E5": len(mean_exp_datas[5]),
           "E6": len(mean_exp_datas[6]),
           "E7": len(mean_exp_datas[7]),
           "E8": len(mean_exp_datas[8]),
           "E9": len(mean_exp_datas[9]),
           "E10": len(mean_exp_datas[10]),
           "E12": len(mean_exp_datas[11]),
           "E14": len(mean_exp_datas[12]),
           "CS7": len(mean_exp_datas[13]),
           "CS8_CytOrgin": len(mean_exp_datas[14]),
           "CS9": len(mean_exp_datas[15]),
           "CS10": len(mean_exp_datas[16]),
           "CS11": len(mean_exp_datas[17]),
           "CS12": len(mean_exp_datas[18]),
           "CS13-14": len(mean_exp_datas[19]),
           "CS15-16": len(mean_exp_datas[20])}
circos = Circos(sectors, space=2, start=30, end=360, endspace=False)
circos.textprops = {
    "color": "black",
    "weight": "normal",
    #"family": "monospace",
    "ha": "center",
    "va": "center",
    "fontsize": 2}
vmin, vmax = 0, 1
for i, sector in enumerate(circos.sectors):
    start_col = 100
    
    y = mean_exp_datas[i]
    x = np.linspace(sector.start + 0.5, sector.end - 0.5, int(sector.size))
    
    # Plot stacked bar with user-specified params
    track2 = sector.add_track((92, 100))
    track2.axis()
    #track2.xticks_by_interval(1, outer=False)

    track2.bar(x, y, width=1.0, color="#cccccc", ec="white", lw=0.1, vmax=vmax)

fig = circos.plotfig()
plt.show()

In [20]:
fig.savefig("/slurm/home/yrd/liaolab/wangtianhao/data/embryo/circos_plot_cellratio.pdf",
            bbox_inches="tight",
            dpi=100,
            facecolor="white")
plt.close()