In [None]:
import numpy as np
import pandas as pd
import scanpy as sc

In [None]:
import matplotlib.pyplot as plt

In [None]:
import seaborn as sns

In [None]:
sc.settings.verbosity = 4
sc.logging.print_header()
sc.settings.set_figure_params(dpi=300, facecolor='white', format = 'svg', vector_friendly = True)

In [None]:
umap_cmap = sns.light_palette('xkcd:medium blue', as_cmap = True)

In [None]:
umap_cmap

In [None]:
module_cmap = sns.blend_palette(['whitesmoke', 'lightgray', 'cyan','blue'], as_cmap = True)

In [None]:
module_cmap

In [None]:
f_cat_cmap = sns.blend_palette(['whitesmoke', 'lightgray', 'pink', 'red'], as_cmap = True)

In [None]:
f_cat_cmap

# Declaring the input and output files

In [None]:
name_of_analysis = 'pristina_atlas'

In [None]:
sc.settings.figdir = './figures/'+name_of_analysis+'_broad_groups_colors_gene_names_scores'

In [None]:
results_file = './'+name_of_analysis+'_coloured.h5ad'

In [None]:
results_file2 = './pristina_atlas_cut_PAGA.h5ad'

In [None]:
adata = sc.read_h5ad('./'+name_of_analysis+'.h5ad')

In [None]:
adata_paga = sc.read_h5ad('./pristina_atlas_cut_PAGA.h5ad')

In [None]:
adata

In [None]:
adata.var

In [None]:
clusteringlayer = 'leiden_1.5'

# Transferring the gene annotation

In [None]:
annot = pd.read_csv('annot.tsv', sep='\t', index_col = "query")

In [None]:
annot

In [None]:
for i in adata.var.index:
    if i in annot.index:
        adata.var.loc[i, 'Description'] = annot.loc[i]['Description']
        adata.var.loc[i, 'Preferred_name'] = annot.loc[i]['Preferred_name']
    

In [None]:
adata.var

# Transferring the broad clusters and colours

In [None]:
df = pd.read_excel('Names, Broad Names, Color Palette, Order, Pristina.xlsx', index_col = 'Cluster')

In [None]:
df.index = df.index.astype('string')

In [None]:
df['order_cells'] = df.index.astype('int')

In [None]:
df['order_sorted'] = list(range(0,60))

In [None]:
df

In [None]:
palette = df.sort_values(by = 'order_cells')['Colours'].to_list()

In [None]:
adata.uns['leiden_1.5_colors'] = np.array(palette)

In [None]:
adata.uns['leiden_1.5_colors']

In [None]:
sorted_palette = df['Colours'].to_list()

In [None]:
sorted_palette

In [None]:
adata.uns['leiden_1.5_colors_sorted'] = np.array(sorted_palette)

In [None]:
adata.uns['leiden_1.5_colors_sorted']

In [None]:
sorted_clusters = df.index.to_list()

In [None]:
adata.uns['leiden_1.5_sorted'] = np.array(sorted_clusters)

In [None]:
list(adata.uns)

In [None]:
with plt.rc_context({'figure.figsize': (12, 12)}):
    sc.pl.umap(adata, color=clusteringlayer, legend_loc= 'on data', legend_fontoutline = 3,
        title= 'Clustering layer '+str(clusteringlayer), size = 10,
        frameon=False, add_outline = False, save = '_colored.svg')

In [None]:
adata.obs[clusteringlayer]

In [None]:
for i in adata.obs.index:
    adata.obs.loc[i, 'leiden_1.5_names'] = df['Names (leiden_1.5)'][adata.obs.loc[i, clusteringlayer]]
    adata.obs.loc[i, 'broad_names'] = df['Broad Type'][adata.obs.loc[i, clusteringlayer]]

In [None]:
adata.obs

In [None]:
df['Colour group'].dropna()

In [None]:
with plt.rc_context({'figure.figsize': (12, 12)}):
    sc.pl.umap(adata, color='broad_names', legend_loc= 'on data', legend_fontoutline = 3,
        title= 'Broad Types', size = 10,
        frameon=False, add_outline = False)

In [None]:
adata.obs['broad_names'].cat.categories

In [None]:
adata.uns['broad_names_colors'] = np.array(df[['Broad Type', 'Colour group']].dropna().sort_values(by = 'Broad Type')['Colour group'])

In [None]:
adata.uns['broad_names_colors']

In [None]:
with plt.rc_context({'figure.figsize': (12, 12)}):
    sc.pl.umap(adata, color='broad_names', legend_loc= 'on data', legend_fontoutline = 3,
        title= 'Broad Types', size = 10, palette = list(adata.uns['broad_names_colors']),
        frameon=False, add_outline = False, save = 'broad_colored.svg')

# Transferring colors to adata PAGA object

In [None]:
dff = df.drop(df[df['Names (leiden_1.5)'] == 'unnanotated'].index)

In [None]:
dff

In [None]:
adata_paga.uns['leiden_1.5_colors'] = np.array(dff.sort_values(by = 'order_cells')['Colours'])

In [None]:
with plt.rc_context({'figure.figsize': (10, 10)}):
    sc.pl.paga(adata_paga,
    threshold=0.25,      
    solid_edges='connectivities_tree',
    #dashed_edges='connectivities', 
    root=1,
    layout='rt',
    node_size_scale=1,
    node_size_power=1,
    max_edge_width=3,
    edge_width_scale=0.5,
    #min_edge_width=3,
    fontsize=15,
    fontoutline= 2,
    frameon = False)

# Markers of Broad Types

In [None]:
sc.tl.rank_genes_groups(adata, 'broad_names', method='wilcoxon', key_added = 'rank_genes_groups_wilcox_broad_names')
sc.pl.rank_genes_groups(adata, key='rank_genes_groups_wilcox_broad_names', n_genes = 10, sharey = False)

In [None]:
sc.tl.rank_genes_groups(adata, 'broad_names', method='logreg', key_added = 'rank_genes_groups_logreg_broad_names')
sc.pl.rank_genes_groups(adata, key='rank_genes_groups_logreg_broad_names', n_genes = 10, sharey = False)

In [None]:
sc.pl.rank_genes_groups_dotplot(adata, n_genes=3, key = 'rank_genes_groups_wilcox_broad_names', cmap = umap_cmap,
                               values_to_plot = 'scores', save = '_wilcox_broad_names')

# Scores modules

In [None]:
with plt.rc_context({'figure.figsize': (12, 12)}):
    sc.pl.umap(adata, color=clusteringlayer, legend_loc='on data', legend_fontoutline = 5, title= 'Clustering layer '+str(clusteringlayer), size = 30,
        frameon=False, add_outline = True)

In [None]:
modules = pd.read_csv('20221114_plei_id_module_newname.tsv', sep='\t')

In [None]:
modules

In [None]:
modules['newname'] = modules['newname'].astype('category')

In [None]:
modules

In [None]:
len(modules['newname'].cat.categories)

In [None]:
modules

In [None]:
for i in modules['newname'].cat.categories:
    print ('________________________________________')
    print ('module '+i)
    li = modules[modules['newname'] == i]['id'].to_list()
    sc.tl.score_genes(adata, li, ctrl_size= len(li), gene_pool=None, n_bins=25, score_name='score_module_'+i)
    sc.pl.umap(adata, color= 'score_module_'+i, color_map = module_cmap, size = 5)

# Scores COGS

In [None]:
cogs = pd.read_csv('20221114_plei_id_cog_newname.tsv', sep='\t')

In [None]:
cogs

In [None]:
cogs['functional_category'] = cogs['functional_category'].astype('category')

In [None]:
cogs

In [None]:
for i in cogs['functional_category'].cat.categories:
    print ('________________________________________')
    print ('functional_category_'+i)
    li = cogs[cogs['functional_category'] == i]['id'].to_list()
    sc.tl.score_genes(adata, li, ctrl_size= len(li), gene_pool=None, n_bins=25, score_name='score FC '+i)
    sc.pl.umap(adata, color= 'score FC '+i, color_map = f_cat_cmap, size = 3, save = '_score_functional_category_'+i+'.pdf')

# Save the data

In [None]:
adata.write(results_file)

In [None]:
adata_paga.write(results_file2)