In [None]:
import numpy as np
import pandas as pd
import scanpy as sc

In [None]:
import matplotlib.pyplot as plt

In [None]:
import matplotlib as mpl

In [None]:
import seaborn as sns

In [None]:
sc.settings.verbosity = 4
sc.logging.print_header()
sc.settings.set_figure_params(dpi=300, facecolor='white', format = 'pdf', vector_friendly = True)

In [None]:
figure = "Figure_5"

In [None]:
sc.settings.figdir = './'+figure

In [None]:
umap_cmap = sns.blend_palette(['lightgrey', 'xkcd:medium blue'], as_cmap = True)

In [None]:
paga_cmap = sns.blend_palette(['xkcd:darkish pink','#E1E1E1','xkcd:medium blue','xkcd:medium blue'], as_cmap = True)

In [None]:
paga_cmap

In [None]:
deg_cmap = sns.blend_palette(['purple', 'green', 'yellow'], as_cmap = True)

In [None]:
deg_cmap

In [None]:
score_cmap = sns.blend_palette(['lightgrey', 'lightgray', 'red'], as_cmap = True)

In [None]:
score_cmap

In [None]:
empty_labels = []
for i in range(49):
    empty_labels.append('')

# Input file

In [None]:
adata = sc.read_h5ad('../../pristina_atlas_coloured_subcl.h5ad')

In [None]:
adata

In [None]:
clusteringlayer = 'leiden_1.5'

In [None]:
adata_paga = sc.read_h5ad('../../pristina_atlas_cut_PAGA.h5ad')

In [None]:
adata_subset = sc.read_h5ad('../../pristina_piwi_subcl.h5ad')

In [None]:
adata_subset

In [None]:
adata_subset.obs

In [None]:
piwi_pos_genes = ['PrileiEVm003567t1', 'PrileiEVm016887t1', 'PrileiEVm016982t1', 'PrileiEVm003521t1', 'PrileiEVm022498t1', 'PrileiEVm023936t1']

In [None]:
df = pd.read_excel('../../Names, Broad Names, Color Palette, Order, Pristina.xlsx', index_col = 'Cluster')

In [None]:
dff = df.drop(df[df['Names (leiden_1.5)'] == 'unnanotated'].index)

In [None]:
dff['order_cells'] = dff.index.astype(int)

In [None]:
adata_paga.uns['leiden_1.5_colors'] = np.array(dff.sort_values(by = 'order_cells')['Colours'])

# Panel A and B

In [None]:
name = 'piwi1'
transcript = 'PrileiEVm003567t1'

sc.pl.umap(adata, color= transcript, color_map = umap_cmap, frameon = False, size = 5,
           save = figure+'_'+transcript+'_'+name+'_feature.pdf')

sc.pl.paga(adata_paga,
    labels = empty_labels,
    color = transcript, cmap = paga_cmap,
    threshold=0.25,      
    solid_edges='connectivities_tree',
    root=1,
    layout='rt',
    #node_size_scale=0.1,
    #node_size_power=1,
    max_edge_width=1,
    edge_width_scale=0.5,
    fontsize=3,
    #fontoutline= 0,
    frameon = False,
    save = figure+'_'+transcript+'_'+name+'_paga.pdf')

In [None]:
name = 'nanos'
transcript = 'PrileiEVm016887t1'

sc.pl.umap(adata, color= transcript, color_map = umap_cmap, frameon = False, size = 5,
           save = figure+'_'+transcript+'_'+name+'_feature.pdf')

sc.pl.paga(adata_paga,
    labels = empty_labels,
    color = transcript, cmap = paga_cmap,
    threshold=0.25,      
    solid_edges='connectivities_tree',
    root=1,
    layout='rt',
    #node_size_scale=0.1,
    #node_size_power=1,
    max_edge_width=1,
    edge_width_scale=0.5,
    fontsize=3,
    #fontoutline= 0,
    frameon = False,
    save = figure+'_'+transcript+'_'+name+'_paga.pdf')

In [None]:
name = 'pcna'
transcript = 'PrileiEVm016982t1'

sc.pl.umap(adata, color= transcript, color_map = umap_cmap, frameon = False, size = 5,
           save = figure+'_'+transcript+'_'+name+'_feature.pdf')

sc.pl.paga(adata_paga,
    labels = empty_labels,
    color = transcript, cmap = paga_cmap,
    threshold=0.25,      
    solid_edges='connectivities_tree',
    root=1,
    layout='rt',
    #node_size_scale=0.1,
    #node_size_power=1,
    max_edge_width=1,
    edge_width_scale=0.5,
    fontsize=3,
    #fontoutline= 0,
    frameon = False,
    save = figure+'_'+transcript+'_'+name+'_paga.pdf')

In [None]:
name = 'mcm2'
transcript = 'PrileiEVm003521t1'

sc.pl.umap(adata, color= transcript, color_map = umap_cmap, frameon = False, size = 5,
           save = figure+'_'+transcript+'_'+name+'_feature.pdf')

sc.pl.paga(adata_paga,
    labels = empty_labels,
    color = transcript, cmap = paga_cmap,
    threshold=0.25,      
    solid_edges='connectivities_tree',
    root=1,
    layout='rt',
    #node_size_scale=0.1,
    #node_size_power=1,
    max_edge_width=1,
    edge_width_scale=0.5,
    fontsize=3,
    #fontoutline= 0,
    frameon = False,
    save = figure+'_'+transcript+'_'+name+'_paga.pdf')

In [None]:
name = 'h2a'
transcript = 'PrileiEVm022498t1'

sc.pl.umap(adata, color= transcript, color_map = umap_cmap, frameon = False, size = 5,
           save = figure+'_'+transcript+'_'+name+'_feature.pdf')

sc.pl.paga(adata_paga,
    labels = empty_labels,
    color = transcript, cmap = paga_cmap,
    threshold=0.25,      
    solid_edges='connectivities_tree',
    root=1,
    layout='rt',
    #node_size_scale=0.1,
    #node_size_power=1,
    max_edge_width=1,
    edge_width_scale=0.5,
    fontsize=3,
    #fontoutline= 0,
    frameon = False,
    save = figure+'_'+transcript+'_'+name+'_paga.pdf')

In [None]:
name = 'h2b'
transcript = 'PrileiEVm023936t1'

sc.pl.umap(adata, color= transcript, color_map = umap_cmap, frameon = False, size = 5,
           save = figure+'_'+transcript+'_'+name+'_feature.pdf')

sc.pl.paga(adata_paga,
    labels = empty_labels,
    color = transcript, cmap = paga_cmap,
    threshold=0.25,      
    solid_edges='connectivities_tree',
    root=1,
    layout='rt',
    #node_size_scale=0.1,
    #node_size_power=1,
    max_edge_width=1,
    edge_width_scale=0.5,
    fontsize=3,
    #fontoutline= 0,
    frameon = False,
    save = figure+'_'+transcript+'_'+name+'_paga.pdf')

In [None]:
adata.obs[clusteringlayer].value_counts().loc[['1','2','8']].sum()

In [None]:
adata.obs[clusteringlayer].shape[0]

In [None]:
adata.obs[clusteringlayer].value_counts().loc[['1','2','8']].sum() / adata.obs[clusteringlayer].shape[0] * 100

In [None]:
adata.obs['Experiment'].cat.categories

In [None]:
perc_exp = {}
for i in adata.obs['Experiment'].cat.categories:
    adata_obs_exp = adata.obs[adata.obs['Experiment'] == i]
    perc_exp[i] = adata_obs_exp[clusteringlayer].value_counts().loc[['1','2','8']].sum() / adata_obs_exp[clusteringlayer].shape[0] * 100
    

In [None]:
perc_exp

In [None]:
counts = pd.read_csv('../../figures/pristina_atlas_counts/Pristina_atlas_counts.tsv',  sep="\t", index_col = 'Unnamed: 0' )

In [None]:
counts

In [None]:
counts.loc['PrileiEVm016982t1', ['1','2','8']].sum()/counts.loc['PrileiEVm016982t1'].sum() * 100

In [None]:
exp_di = {}
for i in piwi_pos_genes[2:6]:
    exp_di[i] = counts.loc[i, ['1','2','8']].sum()/counts.loc[i].sum() *100

In [None]:
exp_di

# Panel C

In [None]:
list(adata.uns)

In [None]:
order_li =adata.obs['broad_names'].value_counts().index.to_list()

In [None]:
order_li.append(order_li.pop(10))

In [None]:
sc.pl.dotplot(adata, var_names = piwi_pos_genes[0:2], groupby = 'broad_names', cmap = umap_cmap, categories_order = order_li,
    swap_axes = True,
    save = figure+'_'+str(piwi_pos_genes[0:2])+'.pdf')

In [None]:
sc.pl.dotplot(adata, var_names = piwi_pos_genes[2:4], groupby = 'broad_names', cmap = umap_cmap, categories_order = order_li,
    swap_axes = True,
    save = figure+'_'+str(piwi_pos_genes[2:4])+'.pdf')

In [None]:
sc.pl.dotplot(adata, var_names = piwi_pos_genes[4:6], groupby = 'broad_names', cmap = umap_cmap, categories_order = order_li,
    swap_axes = True,
    save = figure+'_'+str(piwi_pos_genes[4:6])+'.pdf')

# Panel D

In [None]:
with plt.rc_context({'figure.figsize': (10, 10)}):
    sc.pl.paga(adata_paga,
    color = 'degree_solid',
    cmap = 'viridis',
    threshold = 0.25,
    solid_edges='connectivities_tree', 
    root=1,
    layout='rt',
    node_size_scale=2,
    node_size_power=0.9,
    max_edge_width=3,
    fontsize=10,
    fontoutline = 2,
    frameon = False,
    save = figure+'_'+'potency.pdf')

In [None]:
fig, ax = plt.subplots(figsize=(0.5, 10))
fig.subplots_adjust(bottom=0.5)

cmap = mpl.cm.viridis
norm = mpl.colors.Normalize(vmin=adata_paga.uns['paga']['connectivities'].sum(axis = 0).min(), vmax=adata_paga.uns['paga']['connectivities'].sum(axis = 0).max())

fig.colorbar(mpl.cm.ScalarMappable(norm=norm, cmap=cmap),
             cax=ax, orientation='vertical', label='Potency Score')
fig.savefig('./'+figure+'/paga_colorbar.pdf')

In [None]:
markers_w = pd.DataFrame(adata.uns['rank_genes_groups_wilcox_'+clusteringlayer]['names']).head(10)

In [None]:
markers_w

In [None]:
with plt.rc_context({'figure.figsize': (10, 10)}):
    sc.pl.paga(adata_paga,
    color = 'n_genes',
    cmap = 'viridis',
    threshold = 0.25,
    solid_edges='connectivities_tree', 
    root=1,
    layout='rt',
    node_size_scale=2,
    node_size_power=0.9,
    max_edge_width=3,
    fontsize=10,
    fontoutline = 2,
    frameon = False)
    #save = figure+'_'+'potency.pdf')

# Panel E

In [None]:
with plt.rc_context({'figure.figsize': (12, 12)}):
    sc.pl.umap(adata, color='subset clusters', legend_fontoutline = 3, legend_fontsize = 10,
        size = 40,
        frameon=False, add_outline = False, save = '_'+figure+'_subset.pdf')

In [None]:
adata_subset.obs.columns

In [None]:
with plt.rc_context({'figure.figsize': (12, 12)}):
    sc.pl.umap(adata_subset, color= 'leiden_piwi_pos_0.4', legend_fontoutline = 3, legend_fontsize = 10,
        size = 100,
        frameon=False, add_outline = False, save = '_'+figure+'_subset_subset.pdf')

# Panel F and supplemental

In [None]:
for i in adata.obs['subset clusters'].cat.categories:
    score_cmap = sns.blend_palette(['white', 'lightgrey', adata.uns['subset clusters_colors'][int(i)]], as_cmap = True)
    sc.pl.umap(adata, color= 'score_scl_'+i, color_map = score_cmap, frameon = False, size = 10,
          save = '_'+figure+'_'+'score_w_subset_cluster_'+i+'_.pdf')
    
    

In [None]:
for i in adata.obs['subset clusters'].cat.categories:
    score_cmap = sns.blend_palette(['white', 'lightgrey', adata.uns['subset clusters_colors'][int(i)]], as_cmap = True)
    sc.pl.umap(adata, color= 'score_scl_'+i+'_l', color_map = score_cmap, frameon = False, size = 10,
          save = '_'+figure+'_'+'score_l_subset_cluster_'+i+'_.pdf')

In [None]:
for i in adata.obs['subset clusters'].cat.categories:
    with plt.rc_context({'figure.figsize': (12, 12)}):
        sc.pl.umap(adata, color='subset clusters', legend_fontoutline = 3, legend_fontsize = 50,
            size = 40, groups = i, na_in_legend=False,
            frameon=False, add_outline = False,
            save = '_'+figure+'_'+'whole_subset_cluster_'+i+'_.pdf')

In [None]:
for i in adata.obs['subset clusters'].cat.categories:
    with plt.rc_context({'figure.figsize': (12, 12)}):
        sc.pl.umap(adata_subset, color='leiden_piwi_pos_0.4', legend_fontoutline = 3, legend_fontsize = 50,
            size = 80, groups = i, na_in_legend=False,
            frameon=False, add_outline = False,
            save = '_'+figure+'_'+'piwipos_subset_cluster_'+i+'_.pdf')

In [None]:
adata.uns['subset clusters_colors']