In [None]:
import numpy as np
import pandas as pd
import scanpy as sc

In [None]:
import matplotlib.pyplot as plt

In [None]:
pd.set_option('display.max_colwidth', None)

In [None]:
import seaborn as sns

In [None]:
sc.settings.verbosity = 4
sc.logging.print_header()
sc.settings.set_figure_params(dpi=300, facecolor='white', format = 'pdf', vector_friendly = True)

In [None]:
figure = "Figure_3"

In [None]:
sc.settings.figdir = './'+figure

In [None]:
umap_cmap = sns.blend_palette(['lightgrey', 'xkcd:medium blue'], as_cmap = True)

# Functions for diamond blast querying

In [None]:
def check_for_species(df, species):
    if df[4].str.contains(species).any():
        contained = df[df[4].str.contains(species)].sort_values(3)
        return contained.iloc[0, 4]
    else:
        return None

In [None]:
def contains_words (hit_string, li):
    ret = False
    for word in li:
        if word.lower() in hit_string:
            ret = True
    return ret

In [None]:
def get_informative (df, li):
    if len(df.index) == 0:
        return "not in Diamond"
    else:
        found = False
        i = 0
        while found == False and i in range(len(df.index)):
            se = df.iloc[i, 4].lower()
            if contains_words(se, li) == False:
                found = True
                return df.iloc[i, 4]
            i +=1  
        if found == False:
            return "no informative hits: "+df.iloc[0, 4]

In [None]:
def get_best_hit (transcript, species, li):
    search = diamond[diamond[0] == transcript]
    if check_for_species(search, species) is not None:
        return check_for_species(search, species)
    else:
        return get_informative (search, li)

In [None]:
list_of_words = ['hypothetical', 'uncharacterized', 'unnamed', 'Dimorphilus']

In [None]:
preferred_species = 'Platynereis'

In [None]:
diamond = pd.read_csv('../../diamond_pristina.tsv', sep='\t', header = None)

# Eggnog annotation

In [None]:
annot = pd.read_csv('../../annot.tsv', sep='\t', index_col = "query")

# Input file

In [None]:
adata = sc.read_h5ad('../../pristina_atlas_coloured_subcl.h5ad')

In [None]:
clusteringlayer = 'leiden_1.5'

In [None]:
tr = 'PrileiEVm000278t1'
sc.pl.umap(adata, color= tr, color_map = umap_cmap, frameon = False,
           save = '_feature_'+tr+'.pdf')

In [None]:
get_best_hit (tr, preferred_species, list_of_words)

In [None]:
if tr in annot.index:
    print(annot.loc[tr])

In [None]:
tr = 'PrileiEVm001525t1'
sc.pl.umap(adata, color= tr, color_map = umap_cmap, frameon = False,
           save = '_feature_'+tr+'.pdf')

In [None]:
get_best_hit (tr, preferred_species, list_of_words)

In [None]:
if tr in annot.index:
    print(annot.loc[tr])

In [None]:
tr = 'PrileiEVm000939t1'
sc.pl.umap(adata, color= tr, color_map = umap_cmap, frameon = False,
           save = '_feature_'+tr+'.pdf')

In [None]:
get_best_hit (tr, preferred_species, list_of_words)

In [None]:
if tr in annot.index:
    print(annot.loc[tr])

In [None]:
tr = 'PrileiEVm015446t1'
sc.pl.umap(adata, color= tr, color_map = umap_cmap, frameon = False,
           save = '_feature_'+tr+'.pdf')

In [None]:
get_best_hit (tr, preferred_species, list_of_words)

In [None]:
if tr in annot.index:
    print(annot.loc[tr])

In [None]:
tr = 'PrileiEVm005033t1'
sc.pl.umap(adata, color= tr, color_map = umap_cmap, frameon = False,
           save = '_feature_'+tr+'.pdf')

In [None]:
get_best_hit (tr, preferred_species, list_of_words)

In [None]:
if tr in annot.index:
    print(annot.loc[tr])

In [None]:
tr = 'PrileiEVm004079t1'
sc.pl.umap(adata, color= tr, color_map = umap_cmap, frameon = False,
           save = '_feature_'+tr+'.pdf')

In [None]:
get_best_hit (tr, preferred_species, list_of_words)

In [None]:
if tr in annot.index:
    print(annot.loc[tr])

In [None]:
tr = 'PrileiEVm014251t1'
sc.pl.umap(adata, color= tr, color_map = umap_cmap, frameon = False,
           save = '_feature_'+tr+'.pdf')

In [None]:
get_best_hit (tr, preferred_species, list_of_words)

In [None]:
if tr in annot.index:
    print(annot.loc[tr])

In [None]:
tr = 'PrileiEVm012391t1'
sc.pl.umap(adata, color= tr, color_map = umap_cmap, frameon = False,
           save = '_feature_'+tr+'.pdf')

In [None]:
get_best_hit (tr, preferred_species, list_of_words)

In [None]:
if tr in annot.index:
    print(annot.loc[tr])

In [None]:
tr = 'PrileiEVm010163t1'
sc.pl.umap(adata, color= tr, color_map = umap_cmap, frameon = False,
           save = '_feature_'+tr+'.pdf')

In [None]:
get_best_hit (tr, preferred_species, list_of_words)

In [None]:
if tr in annot.index:
    print(annot.loc[tr])

In [None]:
tr = 'PrileiEVm002621t1'
sc.pl.umap(adata, color= tr, color_map = umap_cmap, frameon = False,
           save = '_feature_'+tr+'.pdf')

In [None]:
get_best_hit (tr, preferred_species, list_of_words)

In [None]:
if tr in annot.index:
    print(annot.loc[tr])

# Violin plot vigilin + cells

In [None]:
df = pd.read_csv('nuclei-area-measurements.csv')

In [None]:
df

In [None]:
rest = df[df['cell type'].str.contains('negative')].reset_index()['nuclei area'].rename('other cells')

In [None]:
vig = df[df['cell type'].str.contains('positive')].reset_index()['nuclei area'].rename('vigilin+ cells')

In [None]:
df2 = pd.concat([rest, vig], axis = 1)

In [None]:
df2

In [None]:
with plt.rc_context({'figure.figsize': (3, 6)}):
    sns.boxplot(data = df2, palette = ['grey', '#ffff00'])#, width = 0.3)
    sns.stripplot(data = df2, color = 'black', size = 3, jitter = 0.20)
    plt.ylabel('Nuclei Area (µm2)')
    plt.savefig('./'+figure+'/barplot_'+figure+'_vigilinposcells.pdf', bbox_inches = 'tight')

In [None]:
a_n = ['35', '21', '22', '23', '10', '12', '17', '29', '4', '33', '34', '44', '37']

In [None]:
with plt.rc_context({'figure.figsize': (12, 12)}):
    sc.pl.umap(adata, color=clusteringlayer, groups = a_n,
        legend_loc= None, na_color = 'lightgrey', na_in_legend = False,
        size = 10,
        frameon=False, add_outline = False, save = '_'+figure+'_annelid_novel.pdf')