In [11]:
import yaml
import pandas as pd
from bokeh.io import output_notebook
from bokeh.io import show, output_file
from bokeh.models import ColumnDataSource, Whisker
from bokeh.plotting import figure
from bokeh.transform import dodge
from bokeh.layouts import gridplot
from bokeh.models.widgets import Panel, Tabs

In [22]:
    def get_config():
        with open("config.yaml", 'r') as con:
            config = yaml.safe_load(con)
        return config

    config = get_config()

In [23]:
def blast_horizontal_barplot(subject,taxa_counts):
    """ Author: Andre de la Rambelje
    Creates horizontal barplot of specified subject on every taxonomic level.
    Keyword arguments
    subject -- An string containing an ID
    taxa_counts -- Pandas dataframe containing counts of scientific names on each taxomomic level of all subjects.
    """
    taxa = ["superkingdom", "kingdom", "phylum", "class", "order","family",'genus',"species"]
    tabs = []
    #select specified subject
    taxa_counts = taxa_counts.loc[subject]
    for tax in taxa:
        #selects taxonomic level
        mold = taxa_counts.index.get_level_values("taxa")==tax
        subject_taxa = taxa_counts[mold].set_index(["Scientific Name"])
        #Reverse top to bottom
        subject_taxa = subject_taxa.iloc[10::-1]
        #Creating columndatasource
        source = ColumnDataSource(data=subject_taxa[["Vegan","Control"]])
        scientific_names = source.data["Scientific Name"]
        # Make figure
        p = figure(y_range=scientific_names,x_range=(0,subject_taxa.values.max()+5) ,plot_height=700, title=f"Subject {subject} abundance {tax}", 
                   x_axis_label=f"{tax} abundance in %")
        #Adding horizontal bars
        p.hbar(right='Control', y=dodge('Scientific Name',0.11,
            range=p.y_range), height=0.2,color="#abdfff" ,source=source, width=0.3,line_color="#75cbff", legend_label="Control Diet")

        p.hbar(right='Vegan', y=dodge('Scientific Name',-0.11, 
            range=p.y_range), height=0.2,color="#ceffc4" ,source=source, width=0.3,line_color="#8dff75", legend_label="Vegan Diet")
        #Adding legend location
        p.legend.location = "bottom_right"
        tab = (Panel(child=p, title=tax))
        tabs.append(tab)
    tabs=Tabs(tabs=tabs)
    #Writing output to html
    #output_file(f"taxa_subject_{subject}.html")
    show(tabs)
    return tabs

In [26]:
def create_blast_plots():
    """Creates taxonomic barplots for each subject
    PATH needs to be adjusted to select the taxonomic counts
    """
    #Reading in taxonomic count file
    taxa_counts = pd.read_csv(config["blast_taxa_file"], sep="\t").set_index(["subject", "taxa"])
    print(taxa_counts)
    #Convert to percentage
    taxa_counts[["Control","Vegan"]] = taxa_counts[["Control","Vegan"]] * 100
    subjects = ["A", "B", "C", "D", "E"]
    for subject in subjects:
        blast_horizontal_barplot(subject,taxa_counts)
create_blast_plots()

                                   Scientific Name   Control     Vegan
subject taxa                                                          
A       superkingdom                      Bacteria  0.847801  0.765565
        superkingdom                  Unclassified  0.110494  0.190981
        superkingdom                     Eukaryota  0.026664  0.027573
        superkingdom                       Viruses  0.014547  0.015550
        superkingdom                       Archaea  0.000495  0.000330
...                                            ...       ...       ...
E       species           Bifidobacterium animalis  0.000017  0.000190
        species             Brachyspira pilosicoli  0.000017  0.000190
        species             Parabacteroides merdae  0.000017  0.000095
        species               Olsenella sp. LZLJ-2  0.000017  0.000095
        species       Paraclostridium bifermentans  0.000017  0.000095

[3563 rows x 3 columns]
