In [2]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import os
from tqdm import tqdm

In [6]:
repository_path = '/mnt/c/Users/quirosgu/Desktop/summaries'
organism = 'Ophaga'

In [4]:
def sunburst_count_plotter(repository_path, organism):
        
    path = os.path.normpath(repository_path)
    samples_dir = [directory for directory in os.listdir(path)]

    for directory in tqdm(samples_dir):
            
            canopus_path = os.path.join(path,'canopus_formula_summary_adducts.tsv')
            
            try:
                canopus_df = pd.read_csv(canopus_path, sep='\t')
            
            except FileNotFoundError:
                continue
            except NotADirectoryError:
                continue

            #read and format SIrius files
            canopus_df = pd.read_csv(canopus_path, sep='\t')
            min_class_confidence = 0.8       #cut-off filter for considering a sirius class valable. It is used in combination with min_recurrence.

            canopus_df = canopus_df[['id', 'molecularFormula', 'adduct', 'NPC#pathway',
                'NPC#pathway Probability', 'NPC#superclass',
                'NPC#superclass Probability', 'NPC#class', 'NPC#class Probability']]
            canopus_df.rename(columns={'NPC#class Probability': 'classProbability'}, inplace=True) 
            canopus_df['shared name'] = canopus_df['id'].str.split('_').str[-1].astype(int)
            canopus_df.drop('id', axis=1, inplace=True)
            canopus_df.rename(columns={'shared name': 'row ID', 'adduct': 'adduct (sirius)', 'molecularFormula': 'MF (sirius)', 'name': 'Compound name (sirius)'}, inplace=True) 
            canopus_df.drop(canopus_df[canopus_df.classProbability > min_class_confidence].index, inplace=True)
           #canopus_df.drop(['MF (sirius)','adduct (sirius)', 'classProbability', 'NPC#superclass Probability', 'NPC#pathway Probability'], axis=1, inplace=True)
            canopus_df = canopus_df[['row ID','NPC#pathway', 'NPC#superclass', 'NPC#class']]
            canopus_df = canopus_df.replace({np.nan:'None'})

            #generate treemap        
            fig1 = px.treemap(canopus_df, path=['NPC#pathway', 'NPC#superclass', 'NPC#class'],
                            color='NPC#pathway',
                            color_discrete_map={
                                'Terpenoids':'#44AA99',
                                'Alkaloids': '#88CCEE',
                                'Amino acids and Peptides': '#DDCC77',
                                'Polyketides': '#CC6677',
                                'Shikimates and Phenylpropanoids': '#AA4499',
                                'Fatty acids': '#882255',
                                'Carbohydrates': '#F4A261',})
            fig1.update_layout(margin = dict(t=50, l=25, r=25, b=25),
            title_text=" ("  +  organism + ") " + "- metabolite annotation overview (size proportional to number of annotations)")
            fig1.update_annotations(font_size=14, font_family="Arial")
            
    
            #generate sunburst

            fig2 = px.sunburst(canopus_df, path=['NPC#pathway', 'NPC#superclass', 'NPC#class'],
                            color='NPC#pathway',
                            color_discrete_map={
                                'Terpenoids':'#44AA99',
                                'Alkaloids': '#88CCEE',
                                'Amino acids and Peptides': '#DDCC77',
                                'Polyketides': '#CC6677',
                                'Shikimates and Phenylpropanoids': '#AA4499',
                                'Fatty acids': '#882255',
                                'Carbohydrates': '#F4A261',})
            fig2.update_layout(margin = dict(t=50, l=25, r=25, b=25),
            title_text= " ("  +  organism + ") " + "- metabolite annotation overview (size proportional to number of annotations)")
            fig2.update_annotations(font_size=14, font_family="Arial")
            

            pathout = os.path.join(path, 'results/')
            os.makedirs(pathout, exist_ok=True)
            pathout_treemap = os.path.join(pathout, directory + '_treemap_pos.html')
            pathout_sunburst = os.path.join(pathout, directory + '_sunburst_pos.html')
            fig1.write_html(pathout_treemap)
            fig2.write_html(pathout_sunburst)

In [8]:
sunburst_count_plotter(repository_path, organism)

100%|██████████| 1/1 [00:00<00:00,  1.12it/s]
