In [1]:
import pandas as pd
import numpy as np

In [2]:
## Open file
number_samples = 6
bracken_file=pd.read_csv(filepath_or_buffer='/Volumes/PiconCossio/biocomp_tools/MAUS/utils/all_bracken_report_V3-V4.txt', 
                        delimiter='\t', skiprows=8)

In [3]:
import os
alpha_diversity_raw={"sample":[], "shannon":[], "simpson_reciprocal":[]}
for i in os.listdir():
    if i.startswith("G"):
        handler=open(i)
        handler=handler.readlines()
        ## select lines 0 and 3 which are Shannon and Simpson's Reciprocal Index
        alpha_diversity_raw["sample"].append(i[:4])
        alpha_diversity_raw["shannon"].append(round(float(handler[0].split(" ")[2].strip()),2))
        alpha_diversity_raw["simpson_reciprocal"].append(round(float(handler[3].split(" ")[3].strip()),2))
alpha_data=pd.DataFrame(alpha_diversity_raw)

In [4]:
## compute relative abundance (RelAbun)
def compute_relative_abun (df, number_samples=6):
    for i in range(1, number_samples+1):
        i = str(i)
        df[f"G{i}_1_RelAbun"] = df[f"G{i}_1_lvl"]/df[f"G{i}_1_lvl"].sum() * 100
    return df

In [5]:
bracken_file=compute_relative_abun(df=bracken_file, number_samples=number_samples)
min_abundance=1
def filter_by_abundance_conditional (df, number_samples, min_abundance, min_samples):
    ## This is intended to mimic the taxa filter by abundance conditionally of qiime2
    ## Select taxa that have at least n abundance in at least n samples 
    df = df[(df.iloc[:,number_samples*-1:] >= min_abundance).sum(axis=1) >= min_samples]
    return df

bracken_file = filter_by_abundance_conditional (df=bracken_file, 
                                                number_samples=number_samples,
                                                min_abundance=min_abundance, 
                                                min_samples=2)
bracken_file=compute_relative_abun(df=bracken_file, number_samples=number_samples)

In [6]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px

In [7]:
alpha_data["shannon"]

0    4.58
1    3.97
2    3.86
3    3.76
4    4.15
5    3.86
Name: shannon, dtype: float64

In [8]:
bracken_file_relative_abun=bracken_file.loc[:,"name":]
fig = make_subplots(rows=2, cols=1, shared_xaxes=True)

sample_names=bracken_file_relative_abun.iloc[:,1:].columns
bracken_file_relative_abun["name"] = bracken_file_relative_abun["name"].apply(lambda x:x.strip())

# Number of bars
num_bars = len(bracken_file_relative_abun["name"])

# Generate colors using a predefined colorscale (Viridis, Cividis, etc.)
colors = px.colors.sample_colorscale('spectral', np.linspace(0, 1, num_bars))

## Composition bars
for i in range(0, len(bracken_file_relative_abun["name"])):
        fig.add_bar(name=bracken_file_relative_abun.iloc[i,0].strip(),
                        x=sample_names,y=bracken_file_relative_abun.iloc[i,1:],
                        row=2, col=1, marker=dict(color=colors[i])
                    )

## Diversity bars
width=0.4
offset=0.2
fig.add_bar(y=alpha_data["shannon"].to_list(), showlegend=False,
                        row=1, col=1, width=width,offset=offset,
                        text="shannon", textangle=360, textposition='outside'
                    )
fig.add_bar(y=alpha_data["simpson_reciprocal"], showlegend=False,
                        row=1, col=1, width=width,
                        text="simpson", textangle=360, textposition='outside'
                    )

fig.update_layout(barmode='stack',
    autosize=True,
    width=1200,
    height=800,
    title_text="Relative abundance V3-V4 Kraken2-Bracken"
    )


## Composition bars
fig.update_yaxes(title_text="Relative frequency (%)", row=2, col=1)
fig.update_xaxes(title_text="Samples", row=2, col=1)

## Diversity bars
fig.update_traces(dict(base='group'), row=1, col=1, overwrite=True)
fig.update_yaxes(dict(title_text="Value", tickmode="linear", tick0=0, dtick=5), row=1, col=1, overwrite=True)

