In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scanpy as sc
import numpy as np
import scanpy.external as sce

In [None]:
sc.settings.verbosity = 3  
# verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_versions()
sc.settings.set_figure_params(dpi=80, facecolor='white', frameon=True, figsize=(5, 5))

In [None]:
# samples_dict = {"Immature": "A0021_043", "0hr": "A0008_041", "1hr": "A0021_044", "4hr": "A0008_045", "4hr_replicate": "A0021_042", "6hr": "A0021_045", "8hr": "A0008_044", "8hr_replicate": "A0021_038", "11hr": "A0008_046", "12hr": "A0021_046"}
samples_dict = {"Immature": "A0021_043", "0hr": "A0008_041", "1hr": "A0021_044", "4hr": "A0021_042", "6hr": "A0021_045", "8hr": "A0008_044", "11hr": "A0008_046", "12hr": "A0021_046"}

In [None]:
s_genes = list(pd.read_csv("./../../../../MouseInfection/s.genes.txt")["x"])
g2m_genes = list(pd.read_csv("./../../../../MouseInfection/g2m.genes.txt")["x"])
print(s_genes)
print(g2m_genes)

In [None]:
adata_dict = dict()
for sample in samples_dict.keys():
        adata_dict[sample] = sc.read_h5ad("./../../pyobjs_github//adata_"+ sample + "_with_image_processed_undertissue_filtered_reprocessed_refiltered_processed.h5ad")
adata_dict

In [None]:
adata_ovary_combined = adata_dict[list(samples_dict.keys())[0]].raw.to_adata().concatenate([adata_dict[x].raw.to_adata() for x in list(samples_dict.keys())[1:]], batch_key= "Sample", batch_categories=list(samples_dict.keys()))
adata_ovary_combined
del adata_dict

In [None]:
adata_ovary_combined.obs.rename(columns={"leiden_1.0": "individual_leiden_1.0", 
                                     "leiden_1.2": "individual_leiden_1.2",
                                     "leiden_1.5": "individual_leiden_1.5"}, inplace=True)
adata_ovary_combined

In [None]:
sc.pp.highly_variable_genes(adata_ovary_combined, max_mean=3, batch_key = "Sample")
sc.pl.highly_variable_genes(adata_ovary_combined)
sc.tl.score_genes_cell_cycle(adata_ovary_combined, s_genes=s_genes, g2m_genes=g2m_genes)
adata_ovary_combined.raw = adata_ovary_combined
adata_ovary_combined = adata_ovary_combined[:, adata_ovary_combined.var.highly_variable]
print(adata_ovary_combined.shape)
sc.pp.scale(adata_ovary_combined, max_value=10)
sc.pp.pca(adata_ovary_combined, random_state= 0)
sc.pl.pca_variance_ratio(adata_ovary_combined, n_pcs = 50)
sc.pp.neighbors(adata_ovary_combined, n_pcs=20)
sc.tl.umap(adata_ovary_combined)

In [None]:
sc.settings.set_figure_params(dpi=200, dpi_save= 300, fontsize=8, facecolor='white', frameon=True, figsize=(2.0, 2.0), vector_friendly = False, transparent=True, format="pdf")
sc.pl.umap(adata_ovary_combined, color = ["Sample", "individual_leiden_1.2"], wspace= 0.5)

In [None]:
sc.settings.set_figure_params(dpi=200, dpi_save= 300, fontsize=8, facecolor='white', frameon=True, figsize=(2.0, 2.0), vector_friendly = False, transparent=True, format="pdf")
sc.pl.umap(adata_ovary_combined, color = ["Sample", "individual_leiden_1.2"], wspace= 0.5)

In [None]:
sce.pp.harmony_integrate(adata_ovary_combined, 'Sample')

In [None]:
sc.pp.neighbors(adata_ovary_combined, use_rep = "X_pca_harmony", n_pcs=50, key_added="harmony_nn")
sc.tl.umap(adata_ovary_combined, neighbors_key="harmony_nn")

In [None]:
sc.tl.leiden(adata_ovary_combined, key_added="leiden_1.0", neighbors_key="harmony_nn", resolution = 1.0)
sc.tl.leiden(adata_ovary_combined, key_added="leiden_1.5", neighbors_key="harmony_nn", resolution = 1.5)

In [None]:
sc.settings.set_figure_params(dpi=200, dpi_save= 300, fontsize=8, facecolor='white', frameon=True, figsize=(2.0, 2.0), vector_friendly = False, transparent=True, format="pdf")
sc.pl.umap(adata_ovary_combined, color = ['n_genes_by_counts', 'total_counts'], wspace= 0.4, palette="tab20")

In [None]:
sc.settings.set_figure_params(dpi=200, dpi_save= 300, fontsize=8, facecolor='white', frameon=True, figsize=(2.0, 2.0), vector_friendly = False, transparent=True, format="pdf")
sc.pl.umap(adata_ovary_combined, color = ["Sample", "phase", "leiden_1.5"], wspace= 0.6, palette="tab20")

In [None]:
sc.settings.set_figure_params(dpi=300, dpi_save= 300, fontsize=10, facecolor='white', frameon=False, figsize=(2.0, 2.0), vector_friendly = False, transparent=True, format="pdf")
grouping_var = "individual_leiden_1.5"
fig, axs = plt.subplots(2,4, figsize = (20,8))
for i, sample in enumerate(samples_dict.keys()):
    subset = adata_ovary_combined[adata_ovary_combined.obs["Sample"] == sample]
#     subset.obs[grouping_var] = subset.obs[grouping_var].cat.set_categories(adata_ovary_combined.obs[grouping_var].cat.categories)
#     print(subset.obs[grouping_var].cat.categories)
    if i == 1:
        sc.pl.spatial(subset, color = [grouping_var], wspace= 0.0, hspace = 0.0, spot_size = 30, frameon=False, title=sample, show=False, ax=axs[int(i/4), int(i%4)], cmap="tab20")
        handles, labels = axs[int(i/5), int(i%5)].get_legend_handles_labels()
        axs[int(i/5), int(i%5)].get_legend().remove()
    else: 
        sc.pl.spatial(subset, color = [grouping_var], wspace= 0.0, hspace = 0.0, spot_size = 30, frameon=False, title=sample, show=False, legend_loc="none", ax=axs[int(i/4), int(i%4)], cmap="tab20")  
fig.legend(handles, labels, loc='right', bbox_to_anchor = (0.98,0.5))
plt.subplots_adjust(wspace=0, hspace=0)

In [None]:
adata_ovary_combined.write_h5ad("./../../pyobjs_github/adata_ovary_combined_processed0.h5ad")

In [None]:
adata_ovary_combined = sc.read_h5ad("./../../pyobjs_github/adata_ovary_combined_processed0.h5ad")
adata_ovary_combined

In [None]:
adata_ovary_combined.obs["Sample"].value_counts()

In [None]:
fig, axs = plt.subplots(2,4, figsize = (20,8))
for i, sample in enumerate(samples_dict.keys()):
    sc.settings.set_figure_params(dpi=200, dpi_save= 300, fontsize=10, facecolor='white', frameon=False, figsize=(2.0, 2.0), vector_friendly = False, transparent=True, format="pdf")
    sc.pl.spatial(adata_ovary_combined[adata_ovary_combined.obs["Sample"] == sample], color = ["Kctd14"], wspace= 0.5, spot_size = 30, frameon=False, title=sample, show=False, ax=axs[int(i/4), int(i%4)], cmap=sns.blend_palette(["lightgrey", sns.xkcd_rgb["crimson"]], as_cmap=True), vmax='p99.2')  

In [None]:
validation_markers = {"Increase from immature to 0h and then decrease" : ["Cyp19a1", "Hsd3b1"],
                      "Continuously increase from immature to 12h" : ["Vegfa", "Cebpb"],
                      "Increase from immature to 4h and then decrease" : ["Lhcgr", "Star"], 
                      "Gene peaks at 12h" : ["Edn2"],
                      "Gene peaks at 4hr" : ["Ptgs2"],
                      "Gene peaks at 8hr" : ["Kdr"]
                     }

In [None]:
sc.settings.set_figure_params(dpi=200, dpi_save= 300, fontsize=10, facecolor='white', frameon=False, figsize=(5.0,5.0), vector_friendly = False, transparent=True, format="pdf")
sc.pl.dotplot(adata_ovary_combined, validation_markers, groupby="Sample", cmap="Blues", var_group_rotation=90)

In [None]:
sc.tl.rank_genes_groups(adata_ovary_combined, 'leiden_1.5', key_added='rank_genes_groups_leiden_1.5')

In [None]:
result = adata_ovary_combined.uns['rank_genes_groups_leiden_1.5']
groups = result['names'].dtype.names
dge_results = pd.DataFrame(
    {group + '_' + key: result[key][group]
    for group in groups for key in ['names']}).head(100)
print(dge_results.head(30))

In [None]:
sc.settings.set_figure_params(dpi=200, dpi_save= 300, fontsize=20, facecolor='white', frameon=True, figsize=(2.0, 2.0), vector_friendly = False, transparent=True, format="pdf")
sc.pl.rank_genes_groups_dotplot(adata_ovary_combined, key="rank_genes_groups_leiden_1.5", n_genes=5, values_to_plot='logfoldchanges', min_logfoldchange=1, vmax=7, vmin=-7, cmap='bwr')

In [None]:
adata_ovary_combined

In [None]:
adata_ovary_combined.write_h5ad("./../../pyobjs_github/adata_ovary_combined_processed0.h5ad")

In [None]:
adata_ovary_combined = sc.read_h5ad("./../../pyobjs_github/adata_ovary_combined_processed0.h5ad")
adata_ovary_combined

In [None]:
Level1_obsm = dict(sc.read_h5ad("./../pyobjs/ovary_cell2location_ss_combined_level1.h5ad")[adata_ovary_combined.obs_names,:].obsm)
Level1_uns = dict(sc.read_h5ad("./../pyobjs/ovary_cell2location_ss_combined_level1.h5ad")[adata_ovary_combined.obs_names,:].uns)

Level1_obsm['q05_cell_proportions'] = Level1_obsm['q05_cell_abundance_w_sf'].div(Level1_obsm['q05_cell_abundance_w_sf'].sum(axis=1), axis=0)
# add 5% quantile, representing confident cell abundance, 'at least this amount is present',
# to adata.obs with nice names for plotting
adata_ovary_combined.obs["Level1_total_abundance"] = Level1_obsm['q05_cell_abundance_w_sf'].sum(axis = 1)
ct_list = list(Level1_uns['mod']['factor_names'])
ct_list = ["Level1_" + x for x in ct_list]
adata_ovary_combined.obs[ct_list] = Level1_obsm['q05_cell_proportions']
# # for ct in ct_list:
# #     data = adata_vis.obs[ct].values
# #     adata_ovary_combined.obs[ct] = np.clip(data,0, np.quantile(data, 0.90))
adata_ovary_combined.obs["Level1_max_pred"] = adata_ovary_combined.obs[ct_list].max(axis=1)
adata_ovary_combined.obs["Level1_max_pred_celltype"] = adata_ovary_combined.obs[ct_list].idxmax(axis=1)
adata_ovary_combined.obs["Level1_max_pred_celltype_group"]  = [x.split("_")[1] for x in adata_ovary_combined.obs["Level1_max_pred_celltype"]]

In [None]:
Level0_obsm = dict(sc.read_h5ad("./../pyobjs/ovary_cell2location_ss_combined_level0.h5ad")[adata_ovary_combined.obs_names,:].obsm)
Level0_uns = dict(sc.read_h5ad("./../pyobjs/ovary_cell2location_ss_combined_level0.h5ad")[adata_ovary_combined.obs_names,:].uns)

Level0_obsm['q05_cell_proportions'] = Level0_obsm['q05_cell_abundance_w_sf'].div(Level0_obsm['q05_cell_abundance_w_sf'].sum(axis=1), axis=0)
# add 5% quantile, representing confident cell abundance, 'at least this amount is present',
# to adata.obs with nice names for plotting
adata_ovary_combined.obs["Level0_total_abundance"] = Level0_obsm['q05_cell_abundance_w_sf'].sum(axis = 1)
ct_list = list(Level0_uns['mod']['factor_names'])
ct_list = ["Level0_" + x for x in ct_list]
adata_ovary_combined.obs[ct_list] = Level0_obsm['q05_cell_proportions']
# # for ct in ct_list:
# #     data = adata_vis.obs[ct].values
# #     adata_ovary_combined.obs[ct] = np.clip(data,0, np.quantile(data, 0.90))
adata_ovary_combined.obs["Level0_max_pred"] = adata_ovary_combined.obs[ct_list].max(axis=1)
adata_ovary_combined.obs["Level0_max_pred_celltype"] = adata_ovary_combined.obs[ct_list].idxmax(axis=1)


In [None]:
adata_ovary_combined.write_h5ad("./../../pyobjs_github/adata_ovary_combined_processed_deconvoluted_newscaling.h5ad")

In [None]:
adata_ovary_combined = sc.read_h5ad("./../../pyobjs_github/adata_ovary_combined_processed_deconvoluted_newscaling.h5ad")
adata_ovary_combined

In [None]:
adata_ovary_combined.obs["Coarse celltype"] = [x.split("Level0_")[1] for x in adata_ovary_combined.obs["Level0_max_pred_celltype"]]
adata_ovary_combined.obs["Coarse celltype"] = adata_ovary_combined.obs["Coarse celltype"].astype(str)
adata_ovary_combined.obs["Coarse celltype"] = adata_ovary_combined.obs["Coarse celltype"].astype("category")
adata_ovary_combined.obs["Coarse celltype"] = adata_ovary_combined.obs["Coarse celltype"].cat.set_categories(['Endothelium', 'Epithelium', 'Granulosa', 'Mesenchyme', 'Immune' ,'Oocyte'])

In [None]:
adata_ovary_combined.obs["Coarse celltype"].value_counts()

In [None]:
sc.settings.set_figure_params(dpi=200, dpi_save= 300, fontsize=6, facecolor='white', frameon=False, figsize=(2.0, 2.0), vector_friendly = False, transparent=True, format="pdf")
grouping_var = "Coarse celltype"
fig, axs = plt.subplots(2,4, figsize = (20,8))
for i, sample in enumerate(samples_dict.keys()):
    subset = adata_ovary_combined[adata_ovary_combined.obs["Sample"] == sample]
    subset.obs[grouping_var] = subset.obs[grouping_var].cat.set_categories(adata_ovary_combined.obs[grouping_var].cat.categories)
    print(subset.obs[grouping_var].cat.categories)
    if i == 0:
        sc.pl.spatial(subset, color = [grouping_var], wspace= 0.5, spot_size = 30, frameon=False, title=sample, show=False, ax=axs[int(i/4), int(i%4)])
        handles, labels = axs[int(i/4), int(i%4)].get_legend_handles_labels()
        axs[int(i/4), int(i%4)].get_legend().remove()
    else: 
        sc.pl.spatial(subset, color = [grouping_var], wspace= 0.5, spot_size = 30, frameon=False, title=sample, show=False, legend_loc="none", ax=axs[int(i/4), int(i%4)])  
fig.legend(handles, labels, ncol = 6, loc='lower center', bbox_to_anchor = (0.50,0.1), title = "Coarse celltype")
plt.subplots_adjust(wspace=0, hspace=0)

In [None]:
adata_ovary_combined.obs["Level1_max_pred_celltype_group"]  = [x.split("_")[1] for x in adata_ovary_combined.obs["Level1_max_pred_celltype"]]
adata_ovary_combined.obs["Fine celltype"] = [x.split("Level1_")[1] for x in adata_ovary_combined.obs["Level1_max_pred_celltype"]]
adata_ovary_combined.obs["Fine celltype"] = adata_ovary_combined.obs["Fine celltype"].astype(str)
adata_ovary_combined.obs["Fine celltype"][adata_ovary_combined.obs["Level1_max_pred_celltype_group"] == "I"] = "Immune"
adata_ovary_combined.obs["Fine celltype"] = adata_ovary_combined.obs["Fine celltype"].astype("category")

In [None]:
adata_ovary_combined.obs["Fine celltype"] = adata_ovary_combined.obs["Fine celltype"].cat.set_categories(['GC_Preantral', 'GC_Antral','GC_Estrous', 'GC_CL_Lytic', 'GC_Atretic',
       'GC_CL_Active', 'GC_Mitotic', 'GC_Mural', 
         'M_Immature Theca', 'M_Steroidogenic Theca', 
        'M_Dividing Mesenchyme', 'M_Smooth Muscle', 'M_Pericyte',
           'M_Medullary Stroma', 'M_Cortical Stroma',
        "Immune", 'EN_Blood', 'EN_Lymph', 'Epithelium', 'Oocyte'])

In [None]:
sc.settings.set_figure_params(dpi=300, dpi_save= 300, fontsize=10, facecolor='white', frameon=False, figsize=(2.0, 2.0), vector_friendly = False, transparent=True, format="pdf")
grouping_var = "Fine celltype"
fig, axs = plt.subplots(2,4, figsize = (20,8))
for i, sample in enumerate(samples_dict.keys()):
    subset = adata_ovary_combined[adata_ovary_combined.obs["Sample"] == sample]
#     subset.obs[grouping_var] = subset.obs[grouping_var].cat.set_categories(adata_ovary_combined.obs[grouping_var].cat.categories)
#     print(subset.obs[grouping_var].cat.categories)
    if i == 1:
        sc.pl.spatial(subset, color = [grouping_var], wspace= 0.0, hspace = 0.0, spot_size = 30, frameon=False, title=sample, show=False, ax=axs[int(i/4), int(i%4)])
        handles, labels = axs[int(i/4), int(i%4)].get_legend_handles_labels()
        axs[int(i/4), int(i%4)].get_legend().remove()
    else: 
        sc.pl.spatial(subset, color = [grouping_var], wspace= 0.0, hspace = 0.0, spot_size = 30, frameon=False, title=sample, show=False, legend_loc="none", ax=axs[int(i/4), int(i%4)])  
fig.legend(handles, labels, ncol = 6, loc='lower center', bbox_to_anchor = (0.55, 0), title = "Fine celltype")
plt.subplots_adjust(wspace=0, hspace=0)

In [None]:
adata_ovary_combined = sc.read_h5ad("./../../pyobjs_github/adata_ovary_combined_processed_deconvoluted_newscaling.h5ad")
adata_ovary_combined

In [None]:
for sample in samples_dict.keys():
        print(adata_ovary_combined[adata_ovary_combined.obs["Sample"] == sample].shape)
        adata_ovary_combined[adata_ovary_combined.obs["Sample"] == sample].write_h5ad("./../../pyobjs_github/adata_"+ sample + "_with_image_processed_undertissue_filtered_reprocessed_refiltered_integrated.h5ad")          

In [None]:
adata_dict["4hr"].obs_names

In [None]:
adata_dict = dict()
for sample in samples_dict.keys():
        adata_dict[sample] = sc.read_h5ad("./../../pyobjs_github/adata_"+ sample + "_with_image_processed_undertissue_filtered_reprocessed_refiltered_integrated.h5ad")
        print(adata_dict[sample].shape)

In [None]:
adata_ovary_combined = sc.read_h5ad("./../../pyobjs_github/adata_ovary_combined_processed_deconvoluted_newscaling.h5ad")
adata_ovary_combined.uns['log1p'] = {'base' : None}
adata_ovary_combined

In [None]:
sc.tl.rank_genes_groups(adata_ovary_combined, groupby="Coarse celltype", key_added="rank_gene_groups_coarse_celltype", pts=True)

In [None]:
sc.settings.set_figure_params(dpi=80, fontsize=16, facecolor='white', frameon=True, figsize=(8, 5))
sc.pl.rank_genes_groups_dotplot(adata_ovary_combined, n_genes=10, key="rank_gene_groups_coarse_celltype", values_to_plot='logfoldchanges', min_logfoldchange=1.0, cmap='bwr', vmin = -3, vmax = 3.0)

In [None]:
result = adata_ovary_combined.uns['rank_gene_groups_Level0_max_pred_celltype']
groups = result['names'].dtype.names
dge_results = pd.DataFrame(
    {group + '_' + key: result[key][group]
    for group in groups for key in ['names', "logfoldchanges", "pvals_adj"]}).head(30)
dge_results