In [646]:
import pandas as pd
import numpy as np

In [653]:
## Open file
number_samples = 6
bracken_file=pd.read_csv(filepath_or_buffer='/Volumes/PiconCossio/biocomp_tools/MAUS/utils/braken_output_G1_G2.txt', 
                        delimiter='\t', skiprows=8)

## Alpha diversity
alpha_file=pd.read_csv(filepath_or_buffer='/Volumes/PiconCossio/biocomp_tools/MAUS/utils/alpha_diversity.txt', 
                        delimiter='\t', header=None)
alpha_file.head()

Unnamed: 0,0,1
0,shannon,5.0
1,simpson,3.5
2,shannon,3.0
3,simpson,2.5


In [648]:
## compute relative abundance (RelAbun)
def compute_relative_abun (df, number_samples=6):
    for i in range(1, number_samples+1):
        i = str(i)
        df[f"G{i}_1_RelAbun"] = df[f"G{i}_1_lvl"]/df[f"G{i}_1_lvl"].sum() * 100
    return df

In [649]:
bracken_file=compute_relative_abun(df=bracken_file, number_samples=number_samples)
min_abundance=1
def filter_by_abundance_conditional (df, number_samples, min_abundance, min_samples):
    ## This is intended to mimic the taxa filter by abundance conditionally of qiime2
    ## Select taxa that have at least n abundance in at least n samples 
    df = df[(df.iloc[:,number_samples*-1:] >= min_abundance).sum(axis=1) >= min_samples]
    return df

bracken_file = filter_by_abundance_conditional (df=bracken_file, 
                                                number_samples=number_samples,
                                                min_abundance=min_abundance, 
                                                min_samples=2)
bracken_file=compute_relative_abun(df=bracken_file, number_samples=number_samples)

In [650]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px


In [651]:
alpha_file

Unnamed: 0,0,1
0,shannon,5.0
1,simpson,3.5
2,shannon,3.0
3,simpson,2.5


In [652]:
bracken_file_relative_abun=bracken_file.loc[:,"name":]
fig = make_subplots(rows=2, cols=1, shared_xaxes=True)

sample_names=bracken_file_relative_abun.iloc[:,1:].columns
bracken_file_relative_abun["name"] = bracken_file_relative_abun["name"].apply(lambda x:x.strip())

# Number of bars
num_bars = len(bracken_file_relative_abun["name"])

# Generate colors using a predefined colorscale (Viridis, Cividis, etc.)
colors = px.colors.sample_colorscale('spectral', np.linspace(0, 1, num_bars))

## Composition bars
for i in range(0, len(bracken_file_relative_abun["name"])):
        fig.add_bar(name=bracken_file_relative_abun.iloc[i,0].strip(),
                        x=sample_names,y=bracken_file_relative_abun.iloc[i,1:],
                        row=2, col=1, marker=dict(color=colors[i])
                    )

## Diversity bars
width=0.4
offset=0.2
fig.add_bar(x=sample_names, y=[2]*6, showlegend=False,
                        row=1, col=1, width=width,offset=offset,
                        text="shannon", textangle=360, textposition='outside', hoverinfo="none"
                    )
fig.add_bar(x=sample_names, y=[5]*6, showlegend=False,
                        row=1, col=1, width=width,
                        text="simpson", textangle=360, textposition='outside', hoverinfo="none"
                    )

fig.update_layout(barmode='stack',
    autosize=True,
    width=1200,
    height=800,
    title_text="Relative abundance V3-V4 Kraken2-Bracken",
    )


## Composition bars
fig.update_yaxes(title_text="Relative frequency (%)", row=2, col=1)
fig.update_xaxes(title_text="Samples", row=2, col=1)

## Diversity bars
fig.update_traces(base='group', row=1, col=1, overwrite=True)
fig.update_yaxes(title_text="Value", row=1, col=1)

