In [41]:
import yaml
import pandas as pd
from bokeh.io import output_notebook
from bokeh.core.properties import value
from bokeh.io import show, output_file
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure
from bokeh.transform import dodge
import math
from bokeh.layouts import gridplot
from bokeh.models.widgets import Panel, Tabs
from functools import reduce
output_notebook()

In [54]:
def get_config():
    with open("config.yaml", 'r') as con:
        config = yaml.safe_load(con)
    return config

config = get_config()
taxa_counts = pd.read_csv(config['blast_taxa_file'], sep="\t").set_index(["subject", "taxa"])
taxa_counts[["Control","Vegan"]] = taxa_counts[["Control","Vegan"]] * 100

In [3]:
def blast_horizontal_barplot(subject,taxa_counts):    
    taxa = ["superkingdom", "kingdom", "phylum", "class", "order","family",'genus',"species"]
    tabs = []
    taxa_counts = taxa_counts.loc[subject]
    for tax in taxa:
        mold = taxa_counts.index.get_level_values("taxa")==tax
        subject_taxa = taxa_counts[mold].set_index(["Scientific Name"])
        subject_taxa = subject_taxa.iloc[10::-1]
        source = ColumnDataSource(data=subject_taxa[["Vegan","Control"]])
        scientific_names = source.data["Scientific Name"]
        p = figure(y_range=scientific_names,x_range=(0,subject_taxa.values.max()+5) ,plot_height=700, title=f"Subject {subject} abundance {tax}", 
                   x_axis_label=f"{tax} abundance in %")
        p.hbar(right='Control', y=dodge('Scientific Name',0.11,
            range=p.y_range), height=0.2,color="#abdfff" ,source=source, width=0.3,line_color="#75cbff", legend_label="Control Diet")

        p.hbar(right='Vegan', y=dodge('Scientific Name',-0.11, 
            range=p.y_range), height=0.2,color="#ceffc4" ,source=source, width=0.3,line_color="#8dff75", legend_label="Vegan Diet")

        p.legend.location = "bottom_right"
        tab = (Panel(child=p, title=tax))
        tabs.append(tab)
    tabs=Tabs(tabs=tabs)
    output_file("taxa_subject_a.html")
    show(tabs)
    return tabs

In [76]:
def pooled_vbarchart(taxa_counts):
    taxa = ["superkingdom", "kingdom", "phylum", "class", "order","family",'genus',"species"]
    subjects = ["A", "B", "C", "D", "E"]
    
    #display(taxa_counts)
    dfs = []
    df = pd.DataFrame([])
    for subject in subjects:
        print(subject)
        taxa_profile_subject = taxa_counts.loc[subject].reset_index()
        display(taxa_profile_subject)
        dfs.append(taxa_profile_subject)
    dfs = [df.set_index('Scientific Name') for df in dfs]
    display(dfs[0].join(dfs[1:]))
    #df = reduce(lambda x, y: pd.merge(x, y,on = 'Scientific Name'), dfs)
    
    display(df)

In [77]:
pooled_vbarchart(taxa_counts)

A


Unnamed: 0,taxa,Scientific Name,Control,Vegan
0,superkingdom,Bacteria,84.780069,76.556509
1,superkingdom,Unclassified,11.049427,19.098128
2,superkingdom,Eukaryota,2.666378,2.757294
3,superkingdom,Viruses,1.454669,1.555034
4,superkingdom,Archaea,0.049458,0.033036
...,...,...,...,...
1568,species,Methylococcus sp. IM1,0.000618,0.001180
1569,species,Herbaspirillum frisingense,0.000618,0.002360
1570,species,Ralstonia pickettii,0.000618,0.002360
1571,species,Streptomyces fodineus,0.000618,0.001180


B


Unnamed: 0,taxa,Scientific Name,Control,Vegan
0,superkingdom,Bacteria,83.058939,77.853107
1,superkingdom,Unclassified,11.076161,14.896422
2,superkingdom,Eukaryota,4.311114,6.139360
3,superkingdom,Viruses,1.552670,1.111111
4,kingdom,Unclassified,95.691117,93.860640
...,...,...,...,...
430,species,Porphyromonas asaccharolytica,0.001115,0.018832
431,species,Caloramator sp. E03,0.001115,0.018832
432,species,Flavobacteriaceae bacterium 10Alg115,0.001115,0.018832
433,species,Fusobacterium varium,0.001115,0.018832


C


Unnamed: 0,taxa,Scientific Name,Control,Vegan
0,superkingdom,Bacteria,80.599665,86.347955
1,superkingdom,Unclassified,14.192721,7.097092
2,superkingdom,Eukaryota,4.201598,5.618531
3,superkingdom,Viruses,0.986291,0.936422
4,kingdom,Unclassified,95.788539,94.430754
...,...,...,...,...
296,species,Dialister hominis,0.009863,0.049285
297,species,Burkholderiales bacterium YL45,0.009863,0.049285
298,species,Enterocloster clostridioformis,0.009863,0.098571
299,species,Lachnospiraceae bacterium Choco86,0.009863,0.049285


D


Unnamed: 0,taxa,Scientific Name,Control,Vegan
0,superkingdom,Bacteria,94.713792,93.313736
1,superkingdom,Unclassified,3.794720,4.789583
2,superkingdom,Viruses,0.899334,0.787857
3,superkingdom,Eukaryota,0.588453,1.108122
4,superkingdom,Archaea,0.003701,0.000702
...,...,...,...,...
685,species,Gordonibacter pamelaeae,0.001234,0.001754
686,species,Streptococcus sanguinis,0.001234,0.000351
687,species,Olsenella sp. GAM18,0.001234,0.000702
688,species,Geobacter sulfurreducens,0.001234,0.001754


E


Unnamed: 0,taxa,Scientific Name,Control,Vegan
0,superkingdom,Bacteria,86.244618,86.528792
1,superkingdom,Unclassified,10.658177,9.866237
2,superkingdom,Eukaryota,2.024904,2.305284
3,superkingdom,Viruses,1.063989,1.290200
4,superkingdom,Archaea,0.008312,0.009487
...,...,...,...,...
559,species,Bifidobacterium animalis,0.001662,0.018974
560,species,Brachyspira pilosicoli,0.001662,0.018974
561,species,Parabacteroides merdae,0.001662,0.009487
562,species,Olsenella sp. LZLJ-2,0.001662,0.009487


Unnamed: 0_level_0,taxa_x,Control_x,Vegan_x,taxa_y,Control_y,Vegan_y,taxa_x,Control_x,Vegan_x,taxa_y,Control_y,Vegan_y,taxa,Control,Vegan
Scientific Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Acetobacteraceae,family,0.004946,0.008259,family,0.002231,0.018832,,,,family,0.002467,0.001754,,,
Acetobacterium,genus,0.003709,0.001180,,,,,,,genus,0.001234,0.000351,,,
Acetobacterium sp. KB-1,species,0.002473,0.001180,,,,,,,,,,,,
Acholeplasma,genus,0.001236,0.003540,,,,,,,,,,genus,0.001662,0.018974
Acholeplasmataceae,family,0.001236,0.003540,,,,,,,,,,family,0.001662,0.018974
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
uncultured murine large bowel bacterium BAC 31B,species,0.001236,0.001180,,,,,,,species,0.001234,0.000702,,,
uncultured organism,species,3.016290,2.194509,species,1.247044,2.128060,species,2.485452,2.267127,species,2.140390,3.078116,species,4.194444,3.671378
uncultured prokaryote,species,0.096442,0.080229,species,0.029001,0.112994,species,0.098629,0.147856,species,0.027140,0.029115,species,0.224435,0.170762
uncultured rumen bacterium,species,0.001855,0.004719,,,,,,,,,,,,


In [None]:
pooled