In [1]:
from functools import reduce
import numpy as np
import pandas as pd
from bokeh.plotting import figure, show, output_notebook
import panel as pn
import bokeh.models as bm
from bokeh.models.widgets import DataTable, TableColumn
from bokeh.models import HoverTool
from bokeh.io import curdoc
from bokeh.palettes import Colorblind
import itertools

path = "/students/2024-2025/master/chili/output/kraken2/merged"
dfs = []
rank_list = ["super_kingdom", "kingdom", "phylum", "class", "order", "family", "genus", "species"]

for barcode in range(1,21):
    df = pd.read_csv(f"{path}/barcode{str(barcode).zfill(2)}.tsv", sep="\t", index_col=False,  usecols=[1, 2])
 
    df = df.rename(columns={"fragments": f"fragments{barcode}"})

    eukaryote_filter = df["tax"].str.contains("d__Eukaryota")
    df = df[~eukaryote_filter]

    ratio_dict = {"d__Archaea" : max(df[df["tax"] == "d__Archaea"][f"fragments{barcode}"]),
                  "d__Bacteria" : max(df[df["tax"] == "d__Bacteria"][f"fragments{barcode}"]),
                  "d__Viruses" : max(df[df["tax"] == "d__Viruses"][f"fragments{barcode}"])}
    
    #df[f"ratio{barcode}"] = (round(df[f"fragments{barcode}"] / ratio_dict[f"{df['tax'].split('|')[0]}"], 10))
    # print(df["fragments1"])
    df[f"ratio{barcode}"] = df.apply(lambda row: (round(row[f"fragments{barcode}"] / ratio_dict[f"{row['tax'].split('|')[0]}"], 10)), axis=1)
    dfs.append(df)  

merged_df = reduce(lambda df1,df2: pd.merge(df1,df2,on="tax", how="outer"), dfs)
merged_df = merged_df.sort_values("tax")
merged_df = merged_df.fillna(0)
merged_df[rank_list] = merged_df['tax'].str.split('|', expand=True)
merged_df = merged_df.set_index("tax")
merged_df.to_csv(f"{path}/merged-all.tsv", sep="\t", index=False)



In [2]:
# p1 = [1,5,9,13,17]
# p2 = [2,6,10,14,18]
# p3 = [3,7,11,15,19]
# p4 = [4,8,12,16,20]

ratio_cols = [[] for _ in range(0,4)]

for i in range(1,21):
    rem = i % 4
    if rem == 0:
        ratio_cols[3].append(f"ratio{i}")
    if rem == 1:
        ratio_cols[0].append(f"ratio{i}")
    if rem == 2:
        ratio_cols[1].append(f"ratio{i}")
    if rem == 3:
        ratio_cols[2].append(f"ratio{i}")

def get_delta(df, cols):
    for part, part_cols in enumerate(cols):
        delta_cols = []
        for i in range(0, len(part_cols)-1):
            delta_col = f"delta_{part_cols[i]}_{part_cols[i+1]}"
            df[delta_col] = abs(df[part_cols[i]] - df[part_cols[i+1]])
            delta_cols.append(delta_col)
        df[f"max_delta{part}"] = df[delta_cols].max(axis=1)

get_delta(merged_df, ratio_cols)
    

In [3]:
# merged_df[["delta_ratio3_ratio7",	"delta_ratio7_ratio11", "ratio3", "ratio7", "ratio11"]].to_csv("test_ratios.csv", sep="\t", index=False)
merged_df["max_delta_all"] = merged_df[["max_delta0", "max_delta1", "max_delta2", "max_delta3"]].max(axis=1)

In [4]:
merged_df["max_delta_all"].sort_values(ascending=False)


tax
d__Viruses|k__Heunggongvirae|p__Uroviricota|c__Caudoviricetes|o__Crassvirales                                                                                                      0.974596
d__Viruses|k__Heunggongvirae|p__Uroviricota|c__Caudoviricetes|o__Crassvirales|f__Intestiviridae                                                                                    0.960870
d__Viruses|k__Heunggongvirae|p__Uroviricota|c__Caudoviricetes|o__Crassvirales|f__Intestiviridae|g__Carjivirus                                                                      0.960870
d__Viruses|k__Heunggongvirae|p__Uroviricota|c__Caudoviricetes|o__Crassvirales|f__Intestiviridae|g__Carjivirus|s__Carjivirus communis                                               0.784783
d__Viruses|k__Heunggongvirae|p__Uroviricota|c__Caudoviricetes|o__Crassvirales|f__Suoliviridae|g__Burzaovirus                                                                       0.782609
                                                        

In [5]:
# # Family or genus
# superkingdom, kingdom, phylum, class, order, family, genus, species
merged_df.index = pd.MultiIndex.from_tuples(merged_df.index.str.split("|").tolist())
merged_df.index = merged_df.index.set_names(rank_list)
merged_df.to_csv(f"test.tsv", sep="\t")


In [None]:
pn.extension()

def update_table(event):
    filtered_df = merged_df
    for key in selectors.keys():
        selection_val =  str(selectors.get(key).value)
        
        # Changing options based on prior selections
        # options=filtered_df[key].unique().tolist())
        # selectors.get(key).param.watch(update_table, 'value')

        if selection_val != "nan" and selection_val != "None" and selection_val != None:
            filtered_df = merged_df[merged_df[f"{key}"] == selection_val]
        else:
            selectors.get(f"{key}").options = filtered_df[key].unique().tolist()

    flat_ratios = rank_list + ["max_delta_all"] + list(itertools.chain.from_iterable(ratio_cols))
    filtered_df = filtered_df[flat_ratios]
    source.data = bm.ColumnDataSource.from_df(filtered_df)
    data_table.columns = [TableColumn(field=col, title=col) for col in filtered_df]

def update_plot(attr, old, new):
    plot.renderers = []
    if not new:
        return
    
    selected_index = new[0]
    selected_row = source.to_df().iloc[selected_index]
    
    for idx, cols in enumerate(ratio_cols):
        plot.line(
            [1, 2, 3, 4, 5], 
            selected_row[cols], 
            line_width=2, 
            legend_label=str(idx), 
            color=Colorblind.get(5)[idx]
        )

selectors = {}
for rank_i, rank in enumerate(rank_list):
    selectors[f"{rank}"] = pn.widgets.Select(
        name=f"{rank}", 
        options=merged_df.index.get_level_values(rank_i).unique().tolist(),
        width=200)
    selectors.get(rank).param.watch(update_table, 'value')

source = bm.ColumnDataSource(pd.DataFrame(columns=merged_df.index.names))
plot = figure(title="Line Chart", x_axis_label="Sample", y_axis_label="Micro-organism ratio", width=800, height=400)
plot.add_tools(HoverTool(tooltips=[("Sample", "$x"), ("Ratio", "$y")]))

source.selected.on_change("indices", update_plot)

columns = [TableColumn(field=col, title=col) for col in merged_df.index.names]
data_table = DataTable(source=source, columns=columns, width=1880)

selectors_row = pn.Row(*selectors.values())
layout = pn.Column(selectors_row, data_table, plot)

served = pn.serve(layout)



   pip install jupyter_bokeh

or:
    conda install jupyter_bokeh

and try again.
  pn.extension()


Launching server at http://localhost:34837




In [7]:
merged_df[merged_df["super_kingdom"] == "d__Bacteria"]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,fragments1,ratio1,fragments2,ratio2,fragments3,ratio3,fragments4,ratio4,fragments5,ratio5,...,delta_ratio7_ratio11,delta_ratio11_ratio15,delta_ratio15_ratio19,max_delta2,delta_ratio4_ratio8,delta_ratio8_ratio12,delta_ratio12_ratio16,delta_ratio16_ratio20,max_delta3,max_delta_all
super_kingdom,kingdom,phylum,class,order,family,genus,species,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
d__Bacteria,,,,,,,,34387.0,1.000000,94585.0,1.0,95051.0,1.000000,164167.0,1.000000,72055.0,1.000000,...,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
d__Bacteria,c__Candidatus Babeliae,,,,,,,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,...,0.000000,0.000000,0.0,0.000000,0.000000,0.000019,0.000016,0.000004,0.000019,0.000019
d__Bacteria,c__Candidatus Babeliae,o__Candidatus Babeliales,,,,,,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,...,0.000000,0.000000,0.0,0.000000,0.000000,0.000019,0.000016,0.000004,0.000019,0.000019
d__Bacteria,c__Candidatus Babeliae,o__Candidatus Babeliales,f__Candidatus Chromulinivoraceae,,,,,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,...,0.000000,0.000000,0.0,0.000000,0.000000,0.000019,0.000016,0.000004,0.000019,0.000019
d__Bacteria,c__Candidatus Babeliae,o__Candidatus Babeliales,f__Candidatus Chromulinivoraceae,g__Candidatus Chromulinivorax,,,,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,...,0.000000,0.000000,0.0,0.000000,0.000000,0.000019,0.000016,0.000004,0.000019,0.000019
d__Bacteria,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
d__Bacteria,p__Vulcanimicrobiota,c__Vulcanimicrobiia,o__Vulcanimicrobiales,,,,,1.0,0.000029,0.0,0.0,1.0,0.000011,0.0,0.000000,1.0,0.000014,...,0.000065,0.000065,0.0,0.000065,0.000015,0.000015,0.000000,0.000000,0.000015,0.000065
d__Bacteria,p__Vulcanimicrobiota,c__Vulcanimicrobiia,o__Vulcanimicrobiales,f__Vulcanimicrobiaceae,,,,1.0,0.000029,0.0,0.0,1.0,0.000011,0.0,0.000000,1.0,0.000014,...,0.000065,0.000065,0.0,0.000065,0.000015,0.000015,0.000000,0.000000,0.000015,0.000065
d__Bacteria,p__Vulcanimicrobiota,c__Vulcanimicrobiia,o__Vulcanimicrobiales,f__Vulcanimicrobiaceae,g__Vulcanimicrobium,,,1.0,0.000029,0.0,0.0,1.0,0.000011,0.0,0.000000,1.0,0.000014,...,0.000065,0.000065,0.0,0.000065,0.000015,0.000015,0.000000,0.000000,0.000015,0.000065
d__Bacteria,p__Vulcanimicrobiota,c__Vulcanimicrobiia,o__Vulcanimicrobiales,f__Vulcanimicrobiaceae,g__Vulcanimicrobium,s__Vulcanimicrobium alpinum,,1.0,0.000029,0.0,0.0,1.0,0.000011,0.0,0.000000,1.0,0.000014,...,0.000065,0.000065,0.0,0.000065,0.000015,0.000015,0.000000,0.000000,0.000015,0.000065
