# Data Selection and Transformation

In [1]:
# Core data processing libraries
import pandas as pd
import numpy as np

# Visualization libraries
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import matplotlib.gridspec as gridspec
import seaborn as sns

# Statistical analysis
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import pairwise_tukeyhsd

# Utility imports
import csv
import json
import re
import io
import warnings
from functools import partial
from itertools import combinations

# Jupyter-specific imports
import traitlets
from traitlets import HasTraits, Instance, observe
from IPython.display import display, HTML, clear_output
import ipywidgets as widgets
from ipywidgets import (
    interact, interactive, fixed, interact_manual,
    GridspecLayout, VBox, HBox, Layout, Output
)
from ipydatagrid import DataGrid

# Custom module imports
from my_functions_datatransformation import (
    process_protein_combinations,
    setup_data_loading_ui,
    display_widgets,
    setup_widgets,
    initialize_settings,
    check_and_add_protein,
    process_pd_results
)
from my_functions_heatmapviz import (
    initialize_settings,
    proceed_with_label_specific_options,
    update_plot,
    update_filenames
)

# Initialize settings
import _settings as settings
settings_dict = initialize_settings()
globals().update(settings_dict)

# Global variables from settings
spec_translate_list = settings.SPEC_TRANSLATE_LIST
valid_discrete_cmaps = settings.valid_discrete_cmaps
valid_gradient_cmaps = settings.valid_gradient_cmaps
default_hm_color = settings.default_hm_color
default_lp_color = settings.default_lp_color
default_avglp_color = settings.default_avglp_color
hm_selected_color = settings.hm_selected_color
cmap = settings.cmap
#lp_selected_color = settings.lp_selected_color
#avglp_selected_color = settings.avglp_selected_color
#avg_cmap = settings.avg_cmap
legend_title = settings.legend_title
group_processor = combiner = None
base_filename = 'heatmap_data_files/exported_heatmap_data'

## Imports Proteome Discover Data, Fasta File and MBPDB Bioactive Peptide Matches

In [2]:
class DataTransformation(HasTraits):
    pd_results = Instance(pd.DataFrame, allow_none=True)
    mbpdb_results = Instance(pd.DataFrame, allow_none=True)
    
    def __init__(self):
        super().__init__()
        self.pd_results = pd.DataFrame()
        self.pd_results_cleaned = pd.DataFrame()
        self.mbpdb_results = pd.DataFrame()
        self.proteins_dic = {}
        self.output_area = None
        self.mbpdb_uploader = None
        self.pd_uploader = None
        self.fasta_uploader = None
        self.reset_button = None        
    def setup_data_loading_ui(self):
        """Initialize and display the data loading UI"""
        # Create file upload widgets
        self.mbpdb_uploader = widgets.FileUpload(
            accept='.csv,.txt,.tsv,.xlsx',
            multiple=False,
            description='Upload MBPDB File',
            layout=widgets.Layout(width='300px'),
            style={'description_width': 'initial'}
        )
        
        self.pd_uploader = widgets.FileUpload(
            accept='.csv,.txt,.tsv,.xlsx',
            multiple=False,
            description='Upload Peptidomic File',
            layout=widgets.Layout(width='300px'),
            style={'description_width': 'initial'}
        )
        
        self.fasta_uploader = widgets.FileUpload(
            accept='.fasta',
            multiple=True,
            description='Upload FASTA Files',
            layout=widgets.Layout(width='300px'),
            style={'description_width': 'initial'}
        )

        # Reset button
        self.reset_button = widgets.Button(
            description='Reset',
            button_style='warning'
        )

        self.output_area = widgets.Output()
        
        # Display widgets
        display(HTML("<h3><u>Upload Data Files:</u></h3>"))
        display(self.mbpdb_uploader, self.pd_uploader)
        display(HTML("<h3><u>Upload Protein FASTA Files:</u></h3>"))
        display(self.fasta_uploader)
        display(self.reset_button, self.output_area)

        # Register observers
        self.pd_uploader.observe(self._on_pd_upload_change, names='value')
        self.mbpdb_uploader.observe(self._on_mbpdb_upload_change, names='value')
        self.fasta_uploader.observe(self._on_fasta_upload_change, names='value')
        self.reset_button.on_click(self._reset_ui)

    def _reset_ui(self, b):
        """Reset the UI state"""
        self.mbpdb_uploader._counter = 0
        self.pd_uploader._counter = 0
        self.fasta_uploader._counter = 0
        self.mbpdb_uploader.value = ()
        self.pd_uploader.value = ()
        self.fasta_uploader.value = ()
        self.pd_results = None
        self.mbpdb_results = None
        self.proteins_dic = {}
        with self.output_area:
            self.output_area.clear_output()
            display(HTML('<b style="color:blue;">All uploads cleared.</b>'))

    def _on_pd_upload_change(self, change):
        if change['type'] == 'change' and change['name'] == 'value':
            with self.output_area:
                self.output_area.clear_output()
                if change['new'] and len(change['new']) > 0:
                    file_data = change['new'][0]
                    self.pd_results, pd_status = self._load_data(
                        file_data,
                        required_columns=['Positions in Proteins'],
                        file_type='Peptidomic'
                    )
                    if pd_status == 'yes' and self.pd_results is not None:
                        display(HTML(f'<b style="color:green;">Peptidomic data imported with {self.pd_results.shape[0]} rows and {self.pd_results.shape[1]} columns.</b>'))

    def _on_mbpdb_upload_change(self, change):
        if change['type'] == 'change' and change['name'] == 'value':
            with self.output_area:
                self.output_area.clear_output()
                if change['new'] and len(change['new']) > 0:
                    file_data = change['new'][0]
                    self.mbpdb_results, mbpdb_status = self._load_data(
                        file_data,
                        required_columns=['Search peptide', 'Protein ID', 'Peptide'],
                        file_type='MBPDB'
                    )
                    if mbpdb_status == 'yes' and self.mbpdb_results is not None:
                        display(HTML(f'<b style="color:green;">MBPDB file imported with {self.mbpdb_results.shape[0]} rows and {self.mbpdb_results.shape[1]} columns</b>'))

    def _on_fasta_upload_change(self, change):
        if change['type'] == 'change' and change['name'] == 'value':
            with self.output_area:
                self.output_area.clear_output()
                if change['new'] and len(change['new']) > 0:
                    for file_data in change['new']:
                        try:
                            file_name = getattr(file_data, 'name', None)
                            if file_name and file_name.endswith('.fasta'):
                                new_proteins = self._parse_uploaded_fasta(file_data)
                                self.proteins_dic.update(new_proteins)
                                display(HTML(f'<b style="color:green;">Successfully imported FASTA file: {file_name} ({len(new_proteins)} proteins)</b>'))
                            else:
                                display(HTML(f'<b style="color:red;">Invalid file format. Please upload FASTA files only.</b>'))
                        except Exception as e:
                            display(HTML(f'<b style="color:red;">Error processing FASTA file: {str(e)}</b>'))

    def _load_data(self, file_obj, required_columns, file_type):
        """Load and validate uploaded data files"""
        try:
            content = file_obj.content
            filename = file_obj.name
            extension = filename.split('.')[-1].lower()
            
            file_stream = io.BytesIO(content)
            
            if extension == 'csv':
                df = pd.read_csv(file_stream)
            elif extension in ['txt', 'tsv']:
                df = pd.read_csv(file_stream, delimiter='\t')
            elif extension == 'xlsx':
                df = pd.read_excel(file_stream)
            else:
                raise ValueError("Unsupported file format.")
            
            df.columns = df.columns.str.strip()

            if not set(required_columns).issubset(df.columns):
                missing = set(required_columns) - set(df.columns)
                display(HTML(f'<b style="color:red;">{file_type} File Error: Missing required columns: {", ".join(missing)}</b>'))
                return None, 'no'
            
            return df, 'yes'
        except Exception as e:
            display(HTML(f'<b style="color:red;">{file_type} File Error: {str(e)}</b>'))
            return None, 'no'

    def _parse_uploaded_fasta(self, file_data):
        """Parse uploaded FASTA file content"""
        fasta_dict = {}
        fasta_text = bytes(file_data.content).decode('utf-8')
        lines = fasta_text.split('\n')
        
        protein_id = ""
        protein_name = ""
        sequence = ""
        species = ""
        
        for line in lines:
            line = line.strip()
            if line.startswith('>'):
                if protein_id:
                    fasta_dict[protein_id] = {
                        "name": protein_name,
                        "sequence": sequence,
                        "species": species
                    }
                sequence = ""
                header_parts = line[1:].split('|')
                if len(header_parts) > 2:
                    protein_id = header_parts[1]
                    protein_name_full = re.split(r' OS=', header_parts[2])[0]
                    if ' ' in protein_name_full:
                        protein_name = protein_name_full
                    else:
                        protein_name = protein_name_full
                    species = self._find_species(line)
            else:
                sequence += line
                
        if protein_id:
            fasta_dict[protein_id] = {
                "name": protein_name,
                "sequence": sequence,
                "species": species
            }
        
        return fasta_dict

    def _find_species(self, header):
        """Find species in FASTA header"""
        header_lower = header.lower()
        for spec_group in spec_translate_list:
            for term in spec_group[1:]:
                if term.lower() in header_lower:
                    return spec_group[0]
        return "unknown"
    
    def process_protein_combinations(self):
        """Process protein combinations in pd_results"""
        if not self.pd_results.empty:
            df = self.pd_results.copy()
            
            # Create main grid container
            grid = widgets.GridspecLayout(1, 2,  # Number of rows and columns
                width='1000px', 
                grid_gap='5px',  # Adjust spacing between grid elements
            )
            
            # Create input and output areas
            input_area = widgets.VBox([
                widgets.HTML("<h3>Peptides Mapped to Multiple Proteins</h3>"),
                widgets.HTML("Peptides that have been identified and <b>mapped to multiple proteins</b> and the '<b>Master Protein Accessions</b>' and '<b>Positions in Proteins</b>' columns have multiple entries for a single peptide require special attention.")
            ], layout=widgets.Layout(width='100%'))
            
            self.protein_output_area = widgets.Output(
                #layout=widgets.Layout(width='90%')
            )
            
            # Create split container for input and output
            """split_container = widgets.VBox([
                input_area,
                self.protein_output_area
            ])"""
            
            # Add to grid
            grid[0, 0] = input_area
            grid[0, 1] = self.protein_output_area
    
            # Count peptides with multiple protein accessions
            num_multiple_entries = len(self.pd_results[self.pd_results['Master Protein Accessions'].str.contains(';')])
            input_area.children += (widgets.HTML(f"In your dataset, you have <b>{num_multiple_entries}</b> peptides mapped to multiple Master Protein Accessions."),)
            
            unique_proteins = self.pd_results['Master Protein Accessions'].dropna().unique()
            self.multi_protein_combinations = [up for up in unique_proteins if ';' in up]
            
            # Instructions for user actions
            html_content = """
            <h3>Options</h3>
            For each protein combination with multiple entries, you have two options:<br>
            1. <b>'new'</b> - Create a new row for each protein listed in the 'Master Protein Accessions' column and their corresponding 'Positions in Proteins'.<br>
            2. <b>Enter a Protein ID</b> - Replace the current protein combination with a custom Protein ID of your choice, updating 'Positions in Proteins' accordingly.
            """
            input_area.children += (widgets.HTML(html_content),)
            
            self.user_decisions = {}
            self.decision_inputs = []
            
            # Create input fields
            for combo in self.multi_protein_combinations:
                named_combo = self.fetch_protein_names(combo)
                occurrences = self.pd_results[self.pd_results['Master Protein Accessions'].str.contains(combo, regex=False)].shape[0]
                
                combo_container = widgets.VBox([
                    widgets.HTML(f"<b>{occurrences}</b> occurrences of<br><b>{named_combo}</b>."),
                    widgets.Text(
                        placeholder="Enter 'new', or a custom Protein ID",
                        description='Decision:',
                        layout=widgets.Layout(width='300px')
                    )
                ])
                self.decision_inputs.append(combo_container.children[-1])
                input_area.children += (combo_container,)
            
            # Create buttons
            submit_button = widgets.Button(description="Submit", button_style='success')
            reset_button = widgets.Button(description="Reset Selection", button_style='warning')
            button_box_protein = widgets.HBox([submit_button, reset_button])
            input_area.children += (button_box_protein,)
            
            # Register button callbacks
            reset_button.on_click(self.on_reset_button_clicked)
            submit_button.on_click(lambda b: self.on_submit(b, df))
            self.pd_results_cleaned = df
            display(grid)
            return df
        
    def on_submit(self, button, df):
        """Handle submit button click for protein combinations"""
        with self.protein_output_area:
            self.protein_output_area.clear_output()
            for combo, decision_input in zip(self.multi_protein_combinations, self.decision_inputs):
                self.user_decisions[combo] = decision_input.value.strip().upper()
            # Iterate over each row in the DataFrame
            for index, row in df.iterrows():
                proteins_row = row['Master Protein Accessions']
                positions_row = row['Positions in Proteins']
                if proteins_row in self.user_decisions:
                    decision = self.user_decisions[proteins_row]
                    # Split accessions and positions
                    accessions = proteins_row.split('; ')
                    positions = positions_row.split('; ')
                    # Create a dictionary to map each accession to its corresponding position
                    accession_position_map = {}
                    for acc in accessions:
                        for pos in positions:
                            if acc in pos:
                                accession_position_map[acc] = pos
                                positions.remove(pos)
                                break
                    acc_pos_pairs = list(accession_position_map.items())
            
                    if decision == 'NEW':
                        # Update the current row
                        df.at[index, 'Master Protein Accessions'] = acc_pos_pairs[0][0]
                        df.at[index, 'Positions in Proteins'] = acc_pos_pairs[0][1]
                        
                        # Create new rows for each additional accession and position
                        for acc, pos in acc_pos_pairs[1:]:
                            new_row = row.copy()
                            new_row['Master Protein Accessions'] = acc
                            new_row['Positions in Proteins'] = pos
                            df.loc[len(df)] = new_row
                     
                    else:
                        new_accession = decision
                        new_positions = []
                        for pos in positions_row.split('; '):
                            num_range = pos[pos.index('['):] if '[' in pos else ''
                            new_positions.append(f"{new_accession} {num_range}")
                        df.at[index, 'Master Protein Accessions'] = new_accession
                        df.at[index, 'Positions in Proteins'] = '; '.join(new_positions)
    
            # Display output
            for combo, decision in self.user_decisions.items():
                if decision == 'NEW':
                    display(HTML(f'<b>{combo}</b> <b style="color:green;">has been successfully processed.</b>'))
                    display(HTML('&nbsp;&nbsp;&nbsp;&nbsp;Shared occurrences of the peptide have been separated, with each now assigned a unique protein ID in a new row.'))
                else:
                    display(HTML(f'<b>{combo}</b> <b style="color:green;">has been successfully processed.</b>'))
                    display(HTML(f'&nbsp;&nbsp;&nbsp;&nbsp;The occurrences of the peptide with the shared combined protein ID "{combo}" have been replaced with "{decision}".'))
        return df
    
    def on_reset_button_clicked(self, b):
        """Handle reset button click for protein combinations"""
        with self.protein_output_area:
            self.protein_output_area.clear_output()
            display(HTML('<span style="color:red;">To reset "Mapped to Multiple Proteins" selection after hitting the submit button, <b>rerun the cell</b> and make the correct selections. This button <b>only</b> displays instructions</span>'))
        
    def fetch_protein_names(self, accession_str):
        """Fetch protein names from accession string"""
        names = []
        for acc in accession_str.split('; '):
            if acc in self.proteins_dic:
                names.append(f"{acc}<span style='color:blue'> ({self.proteins_dic[acc]['species']} - {self.proteins_dic[acc]['name']})</span>")
            else:
                names.append(acc)
        return '<br>'.join(names)
    def handle_protein_combinations(self):
        """
        Simple prompt for user to decide whether to process protein combinations.
        """
        display(HTML("<h3>Multiple Protein Mappings</h3>"))
                
        choice = widgets.RadioButtons(
            options=[('Yes', True), ('No', False)],
            description='Process peptides mapped to multiple proteins?',
            style={'description_width': 'initial'},
            value=None  # This makes it start unchecked
        )
        output = widgets.Output()
        
        def process_choice(_):
            with output:
                clear_output()
                if choice.value:
                    self.pd_results_cleaned = data_transformer.process_protein_combinations()
                    display(HTML("<b style='color:green;'>Processed peptides mapped to multiple proteins.</b>"))
                else:
                    self.pd_results_cleaned = self.pd_results.copy()
                    display(HTML("<b>Using original protein mappings.</b>"))
        
        choice.observe(process_choice, 'value')
        display(choice)
        display(output)

    # Then to use it, we can create an observe function:
    def observe_data_changes(change):
        if hasattr(change, 'new'):
            combiner.update_data(data_transformer.pd_results, data_transformer.mbpdb_results)
        

In [3]:
# Cell 1: Create the instance and setup UI
data_transformer = DataTransformation()
data_transformer.setup_data_loading_ui()

FileUpload(value=(), accept='.csv,.txt,.tsv,.xlsx', description='Upload MBPDB File', layout=Layout(width='300p…

FileUpload(value=(), accept='.csv,.txt,.tsv,.xlsx', description='Upload Peptidomic File', layout=Layout(width=…

FileUpload(value=(), accept='.fasta', description='Upload FASTA Files', layout=Layout(width='300px'), multiple…



Output()

## Handles Peptides Matched to Multiple Proteins

In [4]:
# Then call the handle_protein_combinations method
data_transformer.handle_protein_combinations()

RadioButtons(description='Process peptides mapped to multiple proteins?', options=(('Yes', True), ('No', False…

Output()

In [5]:
"""
from ipydatagrid import DataGrid

if data_transformer.pd_results is not None:
    grid = DataGrid(
        data_transformer.pd_results,
        selection_mode='cell',
        grid_style={'gridStroke': '#ddd'},
        base_row_size=25,
        base_column_size=100,
        auto_fit_columns=True,
        layout={'height': '300px', 'width': 'auto'}
    )
    display(grid)
else:
    print("No peptidomic data loaded yet")
""";

## Group data by Catagorical Varriables

In [19]:
class GroupProcessing:
    def __init__(self):
        self.group_data = {}
        self.group_number = 1
        self.filtered_columns = []
        self.group_uploader = widgets.FileUpload(
        accept='.json',
        multiple=False,
        description='Upload Groups File',
        layout=widgets.Layout(width='300px'),
        style={'description_width': 'initial'}
        )
        self.group_uploader.observe(self._on_group_upload_change, names='value')
        
        # Initialize output areas
        self.output = widgets.Output()
        self.gd_output_area = widgets.Output()
        
        # Initialize widgets for group selection
        self.column_dropdown = widgets.SelectMultiple(
            description='Absorbance',
            style={'description_width': 'initial'},
            disabled=False,
            layout=widgets.Layout(width='90%', height='300px')
        )
        
        self.grouping_variable_text = widgets.Text(
            description='Group Name',
            layout=widgets.Layout(width='90%'),
            style={'description_width': 'initial'}
        )
        
        # Initialize buttons
        self.search_button = widgets.Button(
            description='Search',
            button_style='info',
            layout=widgets.Layout(margin='10px 10px 0 0')
        )
        
        self.add_group_button = widgets.Button(
            description='Add Group',
            button_style='success',
            layout=widgets.Layout(margin='10px 10px 0 0')
        )
        
        self.reset_file_button = widgets.Button(
            description='Reset Selection',
            button_style='warning',
            layout=widgets.Layout(margin='10px 10px 0 75px')
        )
        
        # Set up button callbacks
        self.search_button.on_click(self._search_columns)
        self.add_group_button.on_click(self._add_group)
        self.reset_file_button.on_click(self._reset_selection)
                
        def setup_data(self, datat):
            """Initialize data and filters for the analysis"""
            # Define columns to exclude
            columns_to_exclude = ['Marked as', 'Number of Missed Cleavages', 'Number of PSMs',
                                'Checked', 'Confidence', 'Annotated Sequence', 'Unnamed: 3', 
                                'Modifications', '# Protein Groups', '# Proteins', '# PSMs', 
                                'Master Protein Accessions', 'Positions in Proteins', 
                                'Modifications in Proteins', '# Missed Cleavages', 
                                'Theo. MH+ [Da]', 'Quan Info', 
                                'Confidence (by Search Engine): Sequest HT',
                                'q-Value (by Search Engine): Sequest HT', 
                                'PEP (by Search Engine): Sequest HT',
                                'SVM Score (by Search Engine): Sequest HT', 
                                'XCorr (by Search Engine): Sequest HT',
                                'PEP', 'q-Value', 'Top Apex RT [min]', 'Top Apex RT in min']
                                
            exclude_substrings = ['Abundances by Bio Rep', 'Count', 'Origin']
            
            # Filter columns
            self.filtered_columns = [
                col for col in data_transformer.pd_results_cleaned.columns 
                if col not in columns_to_exclude and 
                not any(substring in col for substring in exclude_substrings)
            ]
            
            # Update dropdown options
            self.column_dropdown.options = self.filtered_columns
            self._reset_inputs()
        
        setup_data(self, data_transformer.pd_results_cleaned)

    def display_group_selector(self):
        """Display the JSON file selector for group dictionaries"""
        display(widgets.HTML("<h3><u>Upload Existing Group Dictionary:</u></h3>"))
        display(self.group_uploader, self.gd_output_area)
        

    def display_widgets(self):
        """Display the main UI for group selection"""
        # Create main grid container
        grid = widgets.GridspecLayout(1, 2,  # Number of rows and columns
            width='1000px', 
            grid_gap='5px',  # Adjust spacing between grid elements
        )
        
        # Create input container with vertical scroll
        input_container = widgets.VBox([
            widgets.HTML("<h3><u>Select New Grouping of Data:</u></h3>"),
            widgets.HTML('Now select the <b>absorbance columns</b> and assign the name of the <b>grouping variable</b>:'),
            self.column_dropdown,
            self.grouping_variable_text,
            # Create button layouts
            widgets.HBox([self.search_button, self.add_group_button]),
            widgets.HBox([self.reset_file_button])
        ], layout=widgets.Layout(
            width='95%',
            height='600px',
            overflow_y='auto'  # Add vertical scroll
        ))
        
        # Create output container with vertical scroll
        output_container = widgets.VBox([
            widgets.HTML("<h3><u>Group Selection Results:</u></h3>"),
            self.output
        ], layout=widgets.Layout(
            width='95%',
            height='600px',
            overflow_y='auto',  # Add vertical scroll
            padding='10px'
        ))
        
        # Add to grid
        grid[0, 0] = input_container  # Left column
        grid[0, 1] = output_container  # Right column
        
        display(grid)
    def _on_gd_submit(self, b, dropdown):
        """Handle JSON file submission"""
        selected_file = dropdown.value
        with self.gd_output_area:
            clear_output()
            
            if selected_file == 'Select an existing grouping dictionary file':
                print("Please select a valid file.")
                return
                
            try:
                # Load and process JSON file
                with open(selected_file, 'r') as file:
                    data = json.load(file)
                self.group_data = {}
                
                # Process groups
                with self.output:
                    clear_output()
                    for group_number, group_info in data.items():
                        group_name = group_info.get('grouping_variable')
                        selected_columns = group_info.get('abundance_columns')
                        
                        self.group_data[group_number] = {
                            'grouping_variable': group_name,
                            'abundance_columns': selected_columns
                        }
                        
                        display(widgets.HTML(
                            f"<b>Group {group_number}</b> created with <b>{len(selected_columns)} columns assigned</b>."
                        ))
                        display(widgets.HTML(f"<b>Grouping Variable:</b> {group_name}"))
                        display(widgets.HTML(f"<b>Selected Columns:</b> {', '.join(selected_columns)}"))
                        display(widgets.HTML("<hr style='border: 1px solid black;'>"))
                        
                display(widgets.HTML(f'<b style="color:green;">Successfully uploaded: {selected_file}</b>'))
                
            except Exception as e:
                display(widgets.HTML(f"<b style='color:red;'>An error occurred while processing the file: {str(e)}</b>"))
    
    def _search_columns(self, b):
        """Search for columns based on group name"""
        group_name = self.grouping_variable_text.value
        if group_name:
            matching_columns = [col for col in self.filtered_columns if group_name in col]
            self.column_dropdown.value = matching_columns
        else:
            with self.output:
                clear_output()
                display(widgets.HTML('<b style="color:red;">Please enter a group name to search.</b>'))
    
    def _add_group(self, b):
        """Add a new group to the data"""
        group_name = self.grouping_variable_text.value
        selected_columns = list(self.column_dropdown.value)
        
        if not (group_name and selected_columns):
            with self.output:
                display(widgets.HTML('<b style="color:red;">Please enter a group name and select at least one column.</b>'))
            return
        
        # If group_data exists, use next number, otherwise start at 1
        if self.group_data:
            # Convert existing keys to integers and find max
            existing_numbers = [int(k) for k in self.group_data.keys()]
            next_number = max(existing_numbers) + 1
            self.group_number = str(next_number)
        else:
            self.group_data = {}
            self.group_number = "1"
        
        # Add new group data to the dictionary
        self.group_data[self.group_number] = {
            'grouping_variable': group_name,
            'abundance_columns': selected_columns
        }
        
        # Display output
        with self.output:
            display(widgets.HTML(f"<b>Group {self.group_number}</b> created with <b>{len(selected_columns)} columns assigned</b>."))
            display(widgets.HTML(f"<b>Grouping Variable:</b> {group_name}"))
            display(widgets.HTML(f"<b>Selected Columns:</b> {', '.join(selected_columns)}"))
            display(widgets.HTML("<hr style='border: 1px solid black;'>"))
        
        self._reset_inputs()
        
    def _reset_selection(self, b):
        """Reset all selections and data"""
        self.group_data = {}
        self.group_number = 1
        with self.gd_output_area:
            clear_output()
        with self.output:
            clear_output()
        self._reset_inputs()
    
    def _reset_inputs(self):
        """Reset input fields"""
        self.grouping_variable_text.value = ''
        self.column_dropdown.value = ()

    def _on_group_upload_change(self, change):
        """Handle JSON file upload"""
        if change['type'] == 'change' and change['name'] == 'value':
            with self.gd_output_area:
                if change['new'] and len(change['new']) > 0:
                    file_data = change['new'][0]
                    try:
                        content = bytes(file_data.content).decode('utf-8')
                        data = json.loads(content)
                        
                        # Process groups
                        with self.output:
                            for group_number, group_info in data.items():
                                group_name = group_info.get('grouping_variable')
                                selected_columns = group_info.get('abundance_columns')
                                
                                # Update group_data without clearing previous entries
                                self.group_data[group_number] = {
                                    'grouping_variable': group_name,
                                    'abundance_columns': selected_columns
                                }
                                
                                display(widgets.HTML(
                                    f"<b>Group {group_number}</b> created with <b>{len(selected_columns)} columns assigned</b>."
                                ))
                                display(widgets.HTML(f"<b>Grouping Variable:</b> {group_name}"))
                                display(widgets.HTML(f"<b>Selected Columns:</b> {', '.join(selected_columns)}"))
                                display(widgets.HTML("<hr style='border: 1px solid black;'>"))
                                
                        display(widgets.HTML(f'<b style="color:green;">Successfully uploaded: {file_data.name}</b>'))
                        
                    except Exception as e:
                        display(widgets.HTML(f"<b style='color:red;'>An error occurred while processing the file: {str(e)}</b>"))


In [21]:
group_processor = GroupProcessing()
group_processor.display_group_selector()
#group_processor.setup_data(data_transformer.pd_results_cleaned)
group_processor.display_widgets()

HTML(value='<h3><u>Upload Existing Group Dictionary:</u></h3>')

FileUpload(value=(), accept='.json', description='Upload Groups File', layout=Layout(width='300px'))

Output()

GridspecLayout(children=(VBox(children=(HTML(value='<h3><u>Select New Grouping of Data:</u></h3>'), HTML(value…

In [18]:
data_transformer.pd_results_cleaned

Unnamed: 0,Checked,Confidence,Annotated Sequence,Modifications,Marked as,Number of Protein Groups,Number of Proteins,Number of PSMs,Master Protein Accessions,Positions in Proteins,...,Average_Abundance_A_Feed,Average_Abundance_A_Gastric,Average_Abundance_A_Intestinal,Average_Abundance_A1_Feed,Average_Abundance_A1_Gastric,Average_Abundance_A1_Intestinal,Average_Abundance_Bovine_Feed,Average_Abundance_Bovine_Gastric,Average_Abundance_Bovine_Intestinal,Average_Abundance_Bovine
0,False,High,[N].LHLPLPLLQ.[PS],,Bovine;Human,2,2,9,P05814; P02666,P05814 [139-147]; P02666 [148-156],...,15054.835449,119576.8,,21058.240885,168952.1,,18656.878711,144264.5,,87170.1
1,False,High,[LV].ENLHLPLPLLQ.[PS],,Bovine;Human,2,2,5,P05814; P02666,P05814 [137-147]; P02666 [146-156],...,,,,,,,,,,
2,False,High,[N].LHLPLPLL.[Q],,Bovine;Human,2,2,23,P05814; P02666,P05814 [139-146]; P02666 [148-155],...,,1256696.0,,,2139823.0,243338.40625,,1698259.0,243338.40625,825306.8
3,False,High,[Y].WLAHKAL.[C],,Bovine;Human,2,3,13,G9G9X6; P00709,G9G9X6 [123-129]; P00711 [123-129]; P00709 [12...,...,,1861642.0,947583.1875,,5004868.0,,,3433255.0,947583.1875,3078159.0
4,False,High,[LV].ENLHLPLPL.[L],,Bovine;Human,2,2,8,P05814; P02666,P05814 [137-145]; P02666 [146-154],...,,,880971.65625,,,424475.90625,,,652723.78125,652723.8


## Combine and Average Data

In [8]:
class CombineAverageDataframes:
    def __init__(self, data_transformer, group_processor):
        self.data_transformer = data_transformer
        self.group_processor = group_processor
        self.pd_results = data_transformer.pd_results
        self.mbpdb_results = data_transformer.mbpdb_results
        self.pd_results_cleaned = self.pd_results.copy() if self.pd_results is not None else None
        self._merged_df = None  # Add this line
        # Set up observer for data changes
        self.data_transformer.observe(self._handle_data_change, names=['pd_results', 'mbpdb_results'])
     
    def _handle_data_change(self, change):
        """Handle changes in the input data."""
        if change.name == 'pd_results':
            self.pd_results = change.new
        elif change.name == 'mbpdb_results':
            self.mbpdb_results = change.new
            
        self.pd_results_cleaned = self.pd_results.copy() if self.pd_results is not None else None
        
        # Re-run interactive display
        clear_output()        
    @property
    def merged_df(self):
        """Property to access the merged DataFrame."""
        return self._merged_df

    def extract_bioactive_peptides(self):
        """
        Extracts the list of bioactive peptide matches from the imported MBPDB search.
        """
        if not self.mbpdb_results.empty:
            # Drop rows where 'Protein ID' is NaN or 'None'
            mbpdb_results_cleaned = self.mbpdb_results.copy()
            mbpdb_results_cleaned.dropna(subset=['Protein ID'], inplace=True)
            mbpdb_results_cleaned = mbpdb_results_cleaned[mbpdb_results_cleaned['Protein ID'] != 'None']

            # Check if '% Alignment' column exists
            if '% Alignment' in mbpdb_results_cleaned.columns:
                agg_dict = {
                    'Peptide': 'first', 
                    'Protein ID': 'first',
                    'Protein description': 'first',
                    '% Alignment': 'first',
                    'Species': 'first',
                    'Intervals': 'first',
                    'Function': lambda x: list(x.dropna().unique())
                }
            else:
                agg_dict = {
                    'Peptide': 'first', 
                    'Protein ID': 'first',
                    'Protein description': 'first',
                    'Species': 'first',
                    'Intervals': 'first',
                    'Function': lambda x: list(x.dropna().unique())
                }

            # Perform the groupby and aggregation
            mbpdb_results_grouped = mbpdb_results_cleaned.groupby('Search peptide').agg(agg_dict).reset_index()

            # Flatten the 'Function' list
            mbpdb_results_grouped['Function'] = mbpdb_results_grouped['Function'].apply(
                lambda x: '; '.join(x) if isinstance(x, list) else x
            )
            return mbpdb_results_cleaned, mbpdb_results_grouped
        else:
            return None, None

    def create_unique_id(self, row):
        """Creates a unique ID for each peptide row."""
        if pd.notna(row['Modifications']):
            unique_id = row['Sequence'] + "_" + row['Modifications'].strip()
        else:
            unique_id = row['Sequence']
        return unique_id.rstrip('_')

    def process_pd_results(self, mbpdb_results_grouped):
        """Processes the PD results and merges with MBPDB results."""
        pd_results_cleaned = self.pd_results_cleaned

        # Process positions and accessions
        pd_results_cleaned['Positions in Proteins'] = pd_results_cleaned['Positions in Proteins'].str.split(';', expand=False).str[0]
        pd_results_cleaned['Master Protein Accessions'] = pd_results_cleaned['Master Protein Accessions'].str.split(';', expand=False).str[0]

        # Create sequence column if needed
        if 'Sequence' not in pd_results_cleaned.columns:
            pd_results_cleaned['Sequence'] = pd_results_cleaned['Annotated Sequence'].str.split('.', expand=False).str[1]

        # Create unique ID
        pd_results_cleaned['unique ID'] = pd_results_cleaned.apply(self.create_unique_id, axis=1)

        # Extract start and stop positions
        try:
            extracted = pd_results_cleaned['Positions in Proteins'].str.extract(r'\[(\d+)-(\d+)\]')
            pd_results_cleaned[['start', 'stop']] = extracted.astype(float).astype('Int64')
        except Exception as e:
            print(f"Error: {e}")

        # Reorder columns
        columns_order = ['Master Protein Accessions', 'Positions in Proteins', 'start', 'stop'] + \
                        [col for col in pd_results_cleaned.columns if col not in ['Master Protein Accessions', 'Positions in Proteins', 'start', 'stop']]
        pd_results_cleaned = pd_results_cleaned[columns_order]

        # Merge with MBPDB results if available
        if mbpdb_results_grouped is not None and not mbpdb_results_grouped.empty:
            merged_df = pd.merge(pd_results_cleaned, mbpdb_results_grouped, 
                               right_on='Search peptide', left_on='unique ID', how='left')
            display(HTML("<b style='color:green;'>The MBPDB was successfully merged with the peptidomic data matching the Search Peptide and Unique ID columns.</b>"))
        else:
            merged_df = pd_results_cleaned.copy()
            merged_df['Function'] = np.nan
            display(HTML("<b style='color:orange;'>No MBPDB was uploaded.</b>"))
            display(HTML("<b style='color:orange;'>The merged Dataframe contains only peptidomic data.</b>"))

        return merged_df
    
    def calculate_group_abundance_averages(self, df, group_data):
        """Calculates group abundance averages."""
        # Check if all average abundance columns already exist
        all_columns_exist = True
        for group_number, details in group_data.items():
            average_column_name = f"Average_Abundance_{details['grouping_variable']}"
            if average_column_name not in df.columns:
                all_columns_exist = False
                break
        
        if all_columns_exist:
            display(HTML('<b style="color:orange;">All average abundance columns already exist. Returning original DataFrame.</b>'))
            return df
        
        # If not all columns exist, proceed with calculations
        new_columns = {}
        for group_number, details in group_data.items():
            grouping_variable = details['grouping_variable']
            abundance_columns = details['abundance_columns']
            
            # Convert abundance columns to numeric
            for col in abundance_columns:
                df[col] = pd.to_numeric(df[col], errors='coerce')
            
            # Calculate averages
            average_column_name = f"Average_Abundance_{grouping_variable}"
            new_columns[average_column_name] = df[abundance_columns].mean(axis=1, skipna=True)
        
        # Add new columns to DataFrame
        df = pd.concat([df, pd.DataFrame(new_columns)], axis=1)
        if not df.empty:
            display(HTML('<b style="color:green;">Group average abundance columns have been successfully added to the DataFrame.</b>'))
        return df
    
    def process_data(self, group_data):
        """Main method to process all data."""
        if hasattr(self, 'pd_results') and self.pd_results is not None and not self.pd_results.empty:
            try:
                # Extract and process bioactive peptides
                mbpdb_results_cleaned, mbpdb_results_grouped = self.extract_bioactive_peptides()
                
                if not hasattr(self, 'pd_results_cleaned') or self.pd_results_cleaned is None:
                    self.pd_results_cleaned = self.pd_results.copy()
                
                # Process PD results and merge with MBPDB
                merged_df_temp = self.process_pd_results(mbpdb_results_grouped)
                
                # Calculate abundance averages if group_data exists
                if group_data:
                    with warnings.catch_warnings():
                        warnings.simplefilter("ignore", UserWarning)
                        final_df = self.calculate_group_abundance_averages(merged_df_temp, group_data)
                else:
                    final_df = merged_df_temp
                    display(HTML("<b style='color:orange;'>No group data provided. Skipping abundance calculations.</b>"))
                
                # Store the final DataFrame
                self._merged_df = final_df
                return final_df
            except Exception as e:
                display(HTML(f"<b style='color:red;'>Error processing data: {str(e)}</b>"))
                return None
        else:
            display(HTML("<b style='color:red;'>No PD results data available for processing.</b>"))
            return None

    
    def display_interactive_results(self, df):
        if df is not None:
            # Create DataGrid
            grid = DataGrid(df, selection_mode='cell', editable=False)
            grid.auto_fit_columns = True
            grid.base_row_size = 25
            grid.base_column_size = 150
            grid.auto_fit_params = {'area': 'column', 'padding': 10}
            
            # Display the grid
            display(grid)
        else:
            print("No data to display")

        
    def update_data(self, pd_results, mbpdb_results):
        """Update the input data and refresh the displa'."""
        self.pd_results = pd_results
        self.mbpdb_results = mbpdb_results
        self.pd_results_cleaned = pd_results.copy() if pd_results is not None else None
        
        # Clear previous outputs and rerun interactive display
        clear_output()
    

        # Display button and output
        display(confirm_button)
        display(output)


In [9]:
# Create confirmation button with appropriate styling
confirm_button = widgets.Button(
    description='Transform Data',
    button_style='success',
    tooltip='Click to start data processing'
)

# Create output area for status messages and results
output = widgets.Output()
merged_df = pd.DataFrame()
def on_button_clicked(b):
    with output:
        clear_output()              
        # Create the combiner (it will automatically set up the observers)
        global combiner
        combiner = CombineAverageDataframes(data_transformer, group_processor)
                # Run the interactive display
        global merged_df
        merged_df = combiner.process_data(group_processor.group_data)
        if merged_df is not None:
            print("\nData processing completed successfully!")
            print(f"Final results shape: {merged_df.shape}")
            # Display the results using DataGrid
            grid = DataGrid(merged_df)
            grid.auto_fit_columns = True
            display(grid)

        else:
            print("Error: No data was processed")

# Set up button callback
confirm_button.on_click(on_button_clicked)

# Display button and output area
display(confirm_button)
display(output)

Button(button_style='success', description='Transform Data', style=ButtonStyle(), tooltip='Click to start data…

Output()

# Heatmap Visualization

In [10]:
class HeatmapDataHandler:
    def __init__(self):
        # Initialize all widgets here
        self.protein_dropdown = widgets.Dropdown(description='Protein:')
        self.grouping_variable_text = widgets.Text(description='Grouping Variable:')
        self.var_key_dropdown = widgets.Dropdown(description='Variable Key:')
        self.button_box = HBox([widgets.Button(description='Submit')])
        self.var_selection_output = widgets.Output()
        self.label_order_output = widgets.Output()
        self.available_data_variables = {}  # Populate this as needed
        self.label_widgets = {}  # Populate this as needed
        self.base_filename = base_filename

        # Initialize variables
        self.data_variables = {} #self.extract_and_format_data()
                # Extract available protein IDs and names
        self.protein_mapping = {
            key.split('_')[0]: value['protein_name']
            for key, value in self.data_variables.items()
        }
        self.available_proteins = set([key.split('_')[0] for key in self.data_variables.keys()])
        #self.available_grouping_vars = {
        #   protein: [key.split('_', 1)[1] for key in self.data_variables.keys() if key.startswith(protein)] for protein
        #   in self.available_proteins}    
        if group_processor:
            self.available_grouping_vars = [group['grouping_variable'] for group in group_processor.group_data.values()]
        else:
            self.available_grouping_vars = [] 
        self.selected_var_keys_list = []

        # Filtered Data Variables
        self.filtered_data_variables = {}
        self.available_data_variables = {}
        self.label_widgets = {}
        
        self.order_widgets = {}
        self.default_label_values = {}
        self.default_order_values = {}

        # Widgets
        self.create_widgets()

        # Additional attributes for plotting options
        self.ms_average_choice = None
        self.selected_peptides = []
        self.selected_functions = []
        self.legend_title = legend_title
        # Initialize variables
        self.bio_or_pep = 'no'  # Default value
        self.ms_average_choice = 'yes'  # Default value
        self.plot_heatmap = 'yes'  # Default value
        self.plot_zero = 'no'  # Default value

        self.user_protein_id = ''  # Will be set appropriately
        self.protein_name_short = ''  # Will be set appropriately

        self.label_order_output = widgets.Output()
    def create_filtered_data_variables(self):
        return {key: selector.data_variables[key] 
                for key in selector.selected_var_keys_list
                if key in selector.data_variables}
    
    
    """
    def load_complex_dict(self, base_path=None):
        if base_path is None:
            base_path = self.base_filename

        with open(os.path.join(base_path, 'metadata.json'), 'r') as f:
            metadata = json.load(f)

        result = {}
        for key, info in metadata.items():
            if info['type'] == 'nested':
                # Recursively load nested dictionaries
                result[key] = self.load_complex_dict(os.path.join(base_path, info['path']))
            elif info['type'] == 'dataframe':
                # Load DataFrame from CSV
                result[key] = pd.read_csv(os.path.join(base_path, info['filename']))
            elif info['type'] == 'direct':
                # Load direct value from metadata
                result[key] = info['value']

        return result

    # Function to extract and format data
    """
    def extract_and_format_data(self):
        # Load the data from the saved directory
        
        self.loaded_data = process_export(data_transformer.proteins_dic, combiner.merged_df, group_processor.group_data, heatmap_directory, self.selected_protein)

        # Initialize the new dictionary
        data_variables = {}

        # Iterate over the loaded data to extract and reorganize it
        for protein_id, protein_data in self.loaded_data.items():
            protein_sequence = protein_data.get('protein_sequence')

            for grouping_var_name, group_info in protein_data.items():
                # Extract the required DataFrames and other information
                func_df = group_info.get('func_heatmap_df')
                abs_df = group_info.get('heatmap_df')
                label = grouping_var_name
                protein_sequence = group_info.get('protein_sequence')
                protein_name = group_info.get('protein_name')
                protein_species = group_info.get('protein_species')

                # Determine if the func_df is all None
                is_func_df_all_none = func_df.isnull().all().all() if func_df is not None else True

                # Create a unique key combining protein_id and grouping_var_name
                var_key = f"{protein_id}_{grouping_var_name}"

                # Populate the data_variables dictionary using the unique key
                data_variables[var_key] = {
                    'protein_id': protein_id,
                    'protein_sequence': protein_sequence,
                    'protein_name': protein_name,
                    'protein_species': protein_species,
                    'heatmap_df': abs_df,
                    'function_heatmap_df': func_df,
                    'label': label,
                    'is_func_df_all_none': is_func_df_all_none
                }

        return data_variables
    
                    
    def chunk_dataframe(self, df, chunk_size, exclude_columns=3):

        # Select all rows and all but the last 'exclude_columns' columns
        df_subset = df.iloc[:, :-exclude_columns] if exclude_columns else df

        # Calculate the number of rows needed to make the last chunk exactly 'chunk_size'
        total_rows = df_subset.shape[0]
        remainder = total_rows % chunk_size
        if remainder != 0:
            # Rows needed to complete the last chunk
            rows_to_add = chunk_size - remainder

            # Create a DataFrame with zero values for the missing rows
            additional_rows = pd.DataFrame(np.zeros((rows_to_add, df_subset.shape[1])), columns=df_subset.columns)

            # Append these rows to df_subset
            df_subset = pd.concat([df_subset, additional_rows], ignore_index=True)

        # Create chunks of the DataFrame
        max_index = df_subset.index.max() + 1
        return [df_subset.iloc[i:i + chunk_size] for i in range(0, max_index, chunk_size)]

    # Function to process data variables
    def process_data_variables(self):
        chunk_size = 78
        # Print loaded dataframes and their labels
        for var, info in self.filtered_data_variables.items():
            if 'function_heatmap_df' in info:
                if info['is_func_df_all_none']:
                    display(HTML(f"<b>{var} - Label: {info['label']}</b>: Only absorbance data loaded."))
                else:
                    display(HTML(f"<b>{var} - Label: {info['label']}</b>: Absorbance and function data loaded."))

        # Dynamically generate the list of variable names based on loaded data
        variables = list(self.filtered_data_variables.keys())
        protein_id_list = []
        protein_name_list = []
        for var in variables:
            if var in self.filtered_data_variables and 'heatmap_df' in self.filtered_data_variables[var]:
                df = self.filtered_data_variables[var]['heatmap_df']
                df_func = self.filtered_data_variables[var]['function_heatmap_df']

                try:
                    self.filtered_data_variables[var]['peptide_counts'] = df['count']
                    self.filtered_data_variables[var]['ms_data'] = df['average']

                    self.filtered_data_variables[var]['max_peptide_counts'] = self.filtered_data_variables[var][
                        'peptide_counts'].max()
                    self.filtered_data_variables[var]['min_peptide_counts'] = self.filtered_data_variables[var][
                        'peptide_counts'].min()
                    self.filtered_data_variables[var]['max_ms_data'] = self.filtered_data_variables[var][
                        'ms_data'].max()
                    self.filtered_data_variables[var]['min_ms_data'] = self.filtered_data_variables[var]['ms_data'][
                        self.filtered_data_variables[var]['ms_data'] > 0].min()

                    self.filtered_data_variables[var]['amino_acids_chunks'] = [
                        self.filtered_data_variables[var]['protein_sequence'][i:i + chunk_size]
                        for i in range(0, len(self.filtered_data_variables[var]['protein_sequence']), chunk_size)
                    ]

                    self.filtered_data_variables[var]['peptide_counts_chunks'] = [
                        self.filtered_data_variables[var]['peptide_counts'][i:i + chunk_size]
                        for i in range(0, len(self.filtered_data_variables[var]['peptide_counts']), chunk_size)
                    ]

                    self.filtered_data_variables[var]['ms_data_chunks'] = [
                        self.filtered_data_variables[var]['ms_data'][i:i + chunk_size]
                        for i in range(0, len(self.filtered_data_variables[var]['ms_data']), chunk_size)
                    ]
                    self.filtered_data_variables[var]['ms_data_list'] = list(
                        self.filtered_data_variables[var]['ms_data'])
                    self.filtered_data_variables[var]['AA_list'] = df['AA'].tolist()

                    columns_to_include = df.columns.difference(['AA', 'COUNT'])
                    df_filtered = df[columns_to_include]

                    self.filtered_data_variables[var]['bioactive_peptide_abs_df'] = df_filtered
                    self.filtered_data_variables[var]['bioactive_peptide_chunks'] = self.chunk_dataframe(df_filtered,
                                                                                                         chunk_size=chunk_size)
                    self.filtered_data_variables[var]['bioactive_function_chunks'] = self.chunk_dataframe(df_func,
                                                                                                          chunk_size=chunk_size)
                    self.filtered_data_variables[var]['bioactive_peptide_func_df'] = df_func
                    protein_id_list.append(self.filtered_data_variables[var]['protein_id'])
                    protein_name_list.append(self.filtered_data_variables[var]['protein_name'])

                    print(f"All data structures for {var} have been created successfully.")

                except Exception as e:
                    display(HTML(f'<span style="color:red;">Error processing data for {var}: {e}</span>'))
            else:
                display(HTML(f'<span style="color:red;">{var} DataFrame is not loaded or does not exist.</span>'))

        user_protein_id_set = list(set(protein_id_list))
        user_protein_name_set = list(set(protein_name_list))

        if len(user_protein_id_set) > 1 and len(user_protein_name_set) == 1:
            self.user_protein_id = '_'.join(user_protein_id_set)
            self.protein_name_short = user_protein_name_set[0]

        elif len(user_protein_id_set) > 1 and len(user_protein_name_set) > 1:
            self.user_protein_id = '_'.join(user_protein_id_set)
            self.protein_name_short = '_'.join(user_protein_name_set)

        elif len(user_protein_name_set) == 1:
            self.user_protein_id = user_protein_id_set[0]
            self.protein_name_short = user_protein_name_set[0]

        self.available_data_variables = self.filtered_data_variables.copy()
    # Function to create order input widgets
    def create_order_input_widgets(self):
        description_layout_invisible = widgets.Layout(width='90%')

        self.label_widgets = {}
        self.order_widgets = {}
        for i, (var, info) in enumerate(self.available_data_variables.items()):
            self.label_widgets[var] = widgets.Text(
                value=info['label'],
                description='',
                layout=widgets.Layout(width='150px')
            )
            self.order_widgets[var] = widgets.IntText(
                value=i,
                description='',
                layout=description_layout_invisible,
            )
        # Optionally, you can return the widgets if needed
        # return self.label_widgets, self.order_widgets

    # Function to create widgets
    def create_widgets(self):
        if combiner:
            # Create widgets for protein selection
            # Count occurrences of each protein
            protein_counts = combiner.merged_df['Master Protein Accessions'].value_counts()
            sorted_proteins = protein_counts.index.tolist()
        else:
            sorted_proteins = ''            
        self.protein_dropdown = widgets.Dropdown(

            #options=[('Select Protein', None)] + [
            #    (
            #        f"{protein_id} - {self.protein_mapping.get(protein_id, 'Unknown')}",
            #        protein_id
            #    )
            #    for protein_id in sorted(self.available_proteins)
            #],
            options=[(f"{protein} - {proteins_dic.get(protein, {'name': 'Unknown'})['name']}", protein) for protein in sorted_proteins],

            description='Protein ID:',
            style={'description_width': 'initial'},
            layout=widgets.Layout(width='90%')
        )

        self.grouping_variable_text = widgets.Text(
            description='Search Term',
            style={'description_width': 'initial'},
            layout=widgets.Layout(width='90%')
        )

        self.var_key_dropdown = widgets.SelectMultiple(
            description='Groups',
            style={'description_width': 'initial'},
            disabled=False,
            layout=widgets.Layout(width='90%', height='100px')
        )

        self.add_group_button = widgets.Button(
            description='Add Group',
            button_style='success',
            layout=widgets.Layout(width='150px', height='30px')
        )
        self.search_button = widgets.Button(
            description='Search',
            button_style='info',
            layout=widgets.Layout(width='150px', height='30px')
        )

        self.reset_button = widgets.Button(
            description='Reset Selection',
            button_style='warning',
            layout=widgets.Layout(width='150px', height='30px')
        )

        # Create buttons
        self.update_label_button = widgets.Button(
            description="Update Labels",
            button_style='success',
            layout=widgets.Layout(width='125px', height='30px')
        )
        self.update_order_button = widgets.Button(
            description="Update Order",
            button_style='success',
            layout=widgets.Layout(width='125px', height='30px')
        )
        self.reset_labelorder_button = widgets.Button(
            description="Reset to Default",
            button_style='warning',
            layout=widgets.Layout(width='125px', height='30px', margin='10px 10px 0 75px')
        )

        # Attach click event handlers
        self.update_label_button.on_click(self.on_update_label_click)
        self.update_order_button.on_click(self.on_update_order_click)
        self.reset_labelorder_button.on_click(self.on_reset_click)

        # Display buttons
        self.label_order_button_box = widgets.HBox([self.update_label_button, self.update_order_button])

        self.var_selection_output = widgets.Output()
    
        # Set widget events
        self.protein_dropdown.observe(self.update_var_keys, names='value')
        self.search_button.on_click(self.search_var_keys)
        self.add_group_button.on_click(self.add_group)
        self.reset_button.on_click(self.reset_selection)

        
        self.button_box = widgets.HBox([self.search_button, self.add_group_button, self.reset_button],
        layout=widgets.Layout(
            height='40px', 
            width='90%', 
            overflow='hidden', 
            justify_content='space-between'
            )
        )
    # Function to update var_key_dropdown based on selected protein
    def update_var_keys(self, change):
        self.selected_protein = change['new']
        current_selection = set(self.var_key_dropdown.value)

        new_options = list(map(str, self.available_grouping_vars))

        self.var_key_dropdown.options = sorted(set(self.var_key_dropdown.options).union(new_options))
        self.var_key_dropdown.value = tuple(current_selection.intersection(self.var_key_dropdown.options))

    # Function to search and filter var_keys based on the grouping variable text input
    def search_var_keys(self, b):
        group_name = self.grouping_variable_text.value
        if group_name:
            matching_keys = [key for key in self.var_key_dropdown.options if group_name in key]
            self.var_key_dropdown.value = matching_keys
        else:
            with self.var_selection_output:
                self.var_selection_output.clear_output()
                display(HTML('<b style="color:red;">Please enter a group name to search.</b>'))

    # Function to add a group of selected var_keys to the list
    def add_group(self, b):
        self.selected_protein = self.protein_dropdown.value
        selected_keys = list(self.var_key_dropdown.value)

        if selected_keys and self.selected_protein:
            combined_keys = [f"{self.selected_protein}_{key}" for key in selected_keys]
            self.selected_var_keys_list.extend(combined_keys)
            self.selected_var_keys_list = list(set(self.selected_var_keys_list))  # Ensure no duplicates

            with self.var_selection_output:
                self.var_selection_output.clear_output()
                
                
                display(HTML("<h3> </h3>"))

                #display(HTML("<hr style='border: 1px solid black;'>"))
                display(HTML(f"<b>{len(combined_keys)} variables added.</b>"))
                display(HTML(f"<b>Selected variables:</b> {', '.join(combined_keys)}"))
                display(HTML(f"<b>Total unique variables:</b> {len(self.selected_var_keys_list)}"))
                display(HTML(f"<b>All unique variables:</b> {', '.join(self.selected_var_keys_list)}"))

            self.grouping_variable_text.value = ''
            self.data_variables = self.extract_and_format_data()

            self.filtered_data_variables = self.create_filtered_data_variables()
            # Process data variables
            self.process_data_variables()
            # Create label and order widgets
            self.create_order_input_widgets()
            self.default_label_values = {key: self.label_widgets[key].value for key in self.label_widgets}
            self.default_order_values = [info['label'] for info in self.available_data_variables.values()]
            self.display_label_order_widgets()

        else:
            with self.var_selection_output:
                self.var_selection_output.clear_output()
                display(HTML('<b style="color:red;">Please select a protein and at least one key.</b>'))

    # Function to reset the selection
    def reset_selection(self, b):
        self.selected_var_keys_list.clear()
        self.protein_dropdown.value = None
        self.var_key_dropdown.options = []
        self.grouping_variable_text.value = ''
        self.loaded_data = {}
        self.filtered_data_variables = {}
        self.available_data_variables = {}
        self.available_grouping_vars = {}

        self.label_widgets = {}
        self.order_widgets = {}
        self.default_label_values = {}
        self.default_order_values = []

        with self.var_selection_output:
            self.var_selection_output.clear_output()
            display(HTML('<b style="color:green;">Selection has been reset.</b>'))

        # Clear the label_order_output as well
        self.label_order_output.clear_output()

    # Function to display messages in the output widget
    def display_message(self, message, is_error=False):
        with self.message_output:
            self.message_output.clear_output()  # Clear previous messages
            if is_error:
                display(HTML(f"<b style='color:red;'>{message}</b>"))  # Error message in red
            else:
                display(HTML(f"<b style='color:green;'>{message}</b>"))  # Success message in green

    # Function to update order based on new order input
    def update_order(self, order_labels):

        vars_list = list(self.available_data_variables.keys())
        labels_list = [info['label'] for info in self.available_data_variables.values()]

        # Check if the provided labels match the available labels
        if len(order_labels) != len(labels_list):
            raise ValueError("Number of labels provided does not match the number of items to reorder.")

        # Check for duplicates in order_labels
        if len(order_labels) != len(set(order_labels)):
            raise ValueError("Duplicate labels found in order_labels. Please provide unique labels.")

        # **New Check**: Check for duplicates in available labels
        if len(labels_list) != len(set(labels_list)):
            raise ValueError("Duplicate labels found in available data variables. Cannot reorder unambiguously.")

        # Ensure all provided labels exist in available_data_variables
        if not all(label in labels_list for label in order_labels):
            raise ValueError("One or more provided labels are invalid.")

        # Build a mapping from label to variable key
        label_to_var = {info['label']: var for var, info in self.available_data_variables.items()}

        # Reorder available_data_variables based on the new order of labels
        ordered_available_data_variables = {
            label_to_var[label]: self.available_data_variables[label_to_var[label]] for label in order_labels
        }

        # Update self.available_data_variables
        self.available_data_variables = ordered_available_data_variables

        # Optionally, return the reordered dictionary
        # return self.available_data_variables

    # Event handler for updating labels
    def on_update_label_click(self, b):
        try:
            self.update_labels()
            self.display_message("Labels updated successfully.")
        except Exception as e:
            self.display_message(f"Error updating labels: {e}", is_error=True)

    # Event handler for updating order
    def on_update_order_click(self, b):
        # Split the entered label string and strip spaces
        order_list = [label.strip() for label in self.new_order_input.value.split(',')]
        try:
            # Update order based on the label input
            self.update_order(order_list)
            self.display_message("Order updated successfully.")
        except Exception as e:
            self.display_message(f"Error updating order: {e}", is_error=True)

    # Event handler for resetting labels and order
    def on_reset_click(self, b):
        try:
            # Reset each label widget to its default value
            for key in self.label_widgets:
                self.label_widgets[key].value = self.default_label_values[key]

            # Reset the order widget to its default value
            self.new_order_input.value = ', '.join(self.default_order_values)

            # Apply the default labels and order
            self.on_update_label_click(b)
            self.on_update_order_click(b)

            self.display_message("Labels and order reset to default.")
        except Exception as e:
            self.display_message(f"Error resetting labels and order: {e}", is_error=True)

    # Function to display label and order widgets
    
    def display_label_order_widgets(self):
        # Output widget for displaying messages
        self.message_output = widgets.Output()
    
        # Header for the columns
        header = HTML("<h3><u>Update Sample Labels & Order (Optional)</u></h3>")
    
        # Update labels section
        update_label = [widgets.HTML(value="<h3><u>Update Labels:</u></h3>")]
        for i, (var, info) in enumerate(self.available_data_variables.items()):
            label_widget = HBox([
                widgets.Label(
                    value=f"{i + 1})  {info['label']}  -  {info.get('protein_species', '')}  -  {info.get('protein_name', '')}",
                    layout=widgets.Layout(width='90%', height='30px',overflow='hidden')
                ),
                self.label_widgets.get(var, widgets.Text())
            ])
            update_label.append(label_widget)
        update_label_box = VBox(update_label,  layout=widgets.Layout(margin='0px', height='auto', width='90%', overflow='visible', padding='0px'))
    
        # Label above the text input box
        label_above_input = widgets.HTML(
            value="<h3><u>Re-order Samples:</u></h3>Enter labels in desired order separated by commas (e.g., label_1, label_2, label_3)")
    
        # Extract labels from available_data_variables for display
        label_list = [info['label'] for info in self.available_data_variables.values()]
    
        # Text input for new order
        # Text input for new order without scrollbar
        self.new_order_input = widgets.Textarea(
            value=', '.join(label_list),
            layout=widgets.Layout(
                width='90%',
                height='auto',  # Automatically adjust the height to fit the content
                overflow='hidden'  # Eliminate scrollbars
            )
        )
        update_order_box = VBox([label_above_input, self.new_order_input], layout=widgets.Layout(margin='0px', height='200px', overflow='visible', padding='0px'))

        # Create buttons with fixed sizes
        update_label_button = widgets.Button(
            description="Update Labels",
            button_style='success',
            layout=widgets.Layout(
                width='150px',       # Fixed width
                height='30px',       # Fixed height
                overflow='hidden'    # Eliminate scrollbars
            )
        )
        
        update_order_button = widgets.Button(
            description="Update Order",
            button_style='success',
            layout=widgets.Layout(
                width='150px',       # Fixed width
                height='30px',       # Fixed height
                overflow='hidden'    # Ensure no internal scrolling
            )
        )
        
        reset_labelorder_button = widgets.Button(
            description="Reset to Default",
            button_style='warning',
            layout=widgets.Layout(
                width='150px',       # Fixed width
                height='30px',       # Fixed height
                overflow='hidden'    # Ensure no internal scrolling
            )
        )
        
        # Combine buttons into a container (HBox) with sufficient width
        label_order_button_box = widgets.HBox(
            [update_label_button, update_order_button, reset_labelorder_button],
            layout=widgets.Layout(
                width='90%',            # Ensure enough space for all buttons
                height='auto',            # Adjust height automatically
                overflow='visible',       # No scrolling for the container
                justify_content='space-between'  # Distribute buttons horizontally
            )
        )

    
        # Attach click event handlers
        update_label_button.on_click(self.on_update_label_click)
        update_order_button.on_click(self.on_update_order_click)
        reset_labelorder_button.on_click(self.on_reset_click)
    
        # Display buttons
        vert_button_box = VBox(
            [
                update_label_box,
                update_order_box,
                label_order_button_box, 
                self.message_output
            ],
            layout=widgets.Layout(
                margin='0px',
                width='90%',        # Ensure it takes up available horizontal space
                height='auto',       # Ensure it takes up as much vertical space as needed
                flex_flow='column',  # Maintain column layout
                align_items='stretch'  # Prevent compacting by stretching items
            )
        )
    
        # Return the constructed widgets
        return vert_button_box#, update_label_box, update_order_box

            
    def update_labels(self):
        # Update labels in available_data_variables based on label_widgets
        for key in self.available_data_variables:
            self.available_data_variables[key]['label'] = self.label_widgets[key].value

    # Function to display the initial selection widgets
            
    def display_widgets(self):
        # Create a grid layout with 3 rows and 2 columns
    
        # Input widgets
        input_widgets = VBox([
            widgets.HTML("<h3><u>Select Protein and Grouping Variables:</u></h3>"),
            self.protein_dropdown,
            self.grouping_variable_text,
            self.var_key_dropdown,
            self.button_box,
            self.label_order_output
        ], layout=widgets.Layout(height = 'auto', width = '90%', margin='0px', padding='0px',overflow='hidden',))  # Minimize widget margins

    
        # Output widgets
        output_widgets = VBox([
            self.var_selection_output,
        ], layout=widgets.Layout(margin='0px', padding='0px'))  # Minimize widget margins

        vert_button_box = self.display_label_order_widgets()

    
        # Display the grid
        #display(grid)
        return input_widgets, output_widgets, vert_button_box#, update_label_box, update_order_box

In [11]:

class HeatmapPlotHandler:
    def __init__(self, selector):
        instance_variables = {
            attr: getattr(selector, attr)
            for attr in dir(selector)
            if not callable(getattr(selector, attr))  # Exclude methods
            and not attr.startswith("__")            # Exclude magic methods
            and "button" not in attr                 # Exclude attributes containing "button"
        }
        for key, value in instance_variables.items():
            setattr(self, key, value)
  
        # Initialize with data from selector
        self.plot_heatmap, self.plot_zero = 'yes', 'no'

        # List of valid gradient colormaps
        def get_valid_gradient_colormaps():
            return settings.valid_gradient_cmaps
        
        # List of valid discrete colormaps
        def get_valid_discretecolormaps():
            return settings.valid_discrete_cmaps

        def display_plotting_options(self):
            dropdown_layout = widgets.Layout(width='50%')
            self.plot_message = widgets.HTML("<h3><u>Ploting Options:</u></h3>")

            self.ms_average_choice_dropdown = widgets.Dropdown(
                options=['yes', 'no'],
                description='Plot Averaged Data:',
                disabled=False,
                style={'description_width': 'initial'},
                layout=dropdown_layout,
            )
            self.bio_or_pep_dropdown = widgets.Dropdown(
                options=[('None', 'no'), ('Peptide Intervals', '1'), ('Bioactive Functions', '2')],
                description='Plot Specific Peptides:',
                disabled=False,
                style={'description_width': 'initial'},
                layout=dropdown_layout,
            )
            self.specific_select_multiple = widgets.SelectMultiple(
                options=[],
                description='Specific Options:',
                disabled=False,
                layout=widgets.Layout(display='none')  # Start hidden
            )
        
            # Attach the observer only to bio_or_pep_dropdown
            self.bio_or_pep_dropdown.observe(
                lambda change: on_selection_change(self, change),
                names='value'
            )

        
        def create_plotting_widgets(self):
            # Generate filenames
            generate_filenames(self)
    
            # Layouts
            description_layout_invisible = widgets.Layout(width='90%', overflow = 'visible')
            description_layout = widgets.Layout(width='90%', overflow = 'visible')
            dropdown_layout = widgets.Layout(width='50%', overflow = 'visible')
            dropdown_layout_large = widgets.Layout(width='90%', overflow = 'visible')
    
            # Color Widgets
            self.hm_selected_color = widgets.Dropdown(
                options=get_valid_gradient_colormaps(),
                value=default_hm_color,
                description='Heatmap:',
                layout=dropdown_layout,
                style={'description_width': 'initial'}
            )
    
            self.lp_selected_color = widgets.Dropdown(
                options=get_valid_discretecolormaps(),
                value=default_lp_color,
                description='Line Plot:',
                layout=dropdown_layout,
                style={'description_width': 'initial'}
            )
    
            self.avglp_selected_color = widgets.Dropdown(
                options=valid_discrete_cmaps,
                value=default_avglp_color,
                description='Avg Line Plot:',
                layout=dropdown_layout,
                style={'description_width': 'initial'}
            )
    
            self.color_message = widgets.HTML("<h3><u>Color Options:</u></h3>")
            self.color_widget_box = widgets.VBox([
                self.color_message,
                self.hm_selected_color,
                self.lp_selected_color,
                self.avglp_selected_color
            ])
    
            # Figure Label Widgets
            self.xaxis_label_input = widgets.Text(
                value=f"{self.protein_name_short} Sequence",
                description='x-axis label:',
                layout=description_layout,
                style={'description_width': 'initial'}
            )
    
            self.yaxis_label_input = widgets.Text(
                value="Averaged Peptide Abundance",
                description='y-axis label:',
                layout=description_layout,
                style={'description_width': 'initial'}
            )
    
            self.yaxis_position = widgets.IntSlider(
                value=0,
                min=-10,
                max=10,
                step=1,
                layout=description_layout,
                description='y-axis title position:',
                style={'description_width': 'initial'}
            )
    
            
            self.legend_title_input_1 = widgets.Text(
                value=legend_title[0],
                description=f'Legend title ({legend_title[0]}):',
                layout=description_layout,
                style={'description_width': 'initial'}

            )
            
            self.legend_title_input_2 = widgets.Text(
                value=legend_title[1],
                description=f'Legend title ({legend_title[1]}):',
                layout=description_layout,
                style={'description_width': 'initial'}

            )
            
            self.legend_title_input_3 = widgets.Text(
                value=legend_title[2],
                description=f'Legend title ({legend_title[2]}):',
                layout=description_layout,
                style={'description_width': 'initial'}

            )
            
            self.legend_title_input_4 = widgets.Text(
                value=legend_title[3],
                description=f'Legend title ({legend_title[3]}):',
                layout=description_layout,
                style={'description_width': 'initial'}

            )
            
            self.legend_title_input_5 = widgets.Text(
                value=legend_title[4],
                description=f'Legend title ({legend_title[4]}):',
                layout=description_layout,
                style={'description_width': 'initial'}
            )
            # Conditional Widgets
            if self.ms_average_choice == 'yes' and self.bio_or_pep == '1':
                self.legend_title_input_1 = widgets.Text(
                    value=self.legend_title[0],
                    description=f'Legend title ({self.legend_title[0]}):',
                    layout=description_layout,
                    style={'description_width': 'initial'}
                )
                self.legend_title_input_3 = widgets.Text(
                    value=self.legend_title[2],
                    description=f'Legend title ({self.legend_title[2]}):',
                    layout=description_layout_invisible,
                    style={'description_width': 'initial'}
                )
                self.legend_title_input_3.layout.display = 'none'
                self.legend_title_input_4 = widgets.Text(
                    value=self.legend_title[3],
                    description=f'Legend title ({self.legend_title[3]}):',
                    layout=description_layout,
                    style={'description_width': 'initial'}
                )
                self.legend_title_input_5 = widgets.Text(
                    value=self.legend_title[4],
                    description=f'Legend title ({self.legend_title[4]}):',
                    layout=description_layout,
                    style={'description_width': 'initial'}
                )

            if self.ms_average_choice == 'yes' and self.bio_or_pep == '2':
                self.legend_title_input_1 = widgets.Text(
                    value=self.legend_title[0],
                    description=f'Legend title ({self.legend_title[0]}):',
                    layout=description_layout,
                    style={'description_width': 'initial'}
                )
                self.legend_title_input_3 = widgets.Text(
                    value=self.legend_title[2],
                    description=f'Legend title ({self.legend_title[2]}):',
                    layout=description_layout,
                    style={'description_width': 'initial'}
                )
                self.legend_title_input_4 = widgets.Text(
                    value=self.legend_title[3],
                    description=f'Legend title ({self.legend_title[3]}):',
                    layout=description_layout_invisible,
                    style={'description_width': 'initial'}
                )
                self.legend_title_input_4.layout.display = 'none'
                self.legend_title_input_5 = widgets.Text(
                    value=self.legend_title[4],
                    description=f'Legend title ({self.legend_title[4]}):',
                    layout=description_layout,
                    style={'description_width': 'initial'}
                )
            
            if self.ms_average_choice == 'yes' and self.bio_or_pep == 'no':
                self.legend_title_input_1 = widgets.Text(
                    value=self.legend_title[0],
                    description=f'Legend title ({self.legend_title[0]}):',
                    layout=description_layout,
                    style={'description_width': 'initial'}
                )
                self.legend_title_input_3 = widgets.Text(
                    value=self.legend_title[2],
                    description=f'Legend title ({self.legend_title[2]}):',
                    layout=description_layout_invisible,
                    style={'description_width': 'initial'}
                )
                self.legend_title_input_3.layout.display = 'none'
                self.legend_title_input_4 = widgets.Text(
                    value=self.legend_title[3],
                    description=f'Legend title ({self.legend_title[3]}):',
                    layout=description_layout_invisible,
                    style={'description_width': 'initial'}
                )
                self.legend_title_input_4.layout.display = 'none'
                self.legend_title_input_5 = widgets.Text(
                    value=self.legend_title[4],
                    description=f'Legend title ({self.legend_title[4]}):',
                    layout=description_layout,
                    style={'description_width': 'initial'}
                )
            
            if self.ms_average_choice == 'no' and self.bio_or_pep == '1':
                self.legend_title_input_1 = widgets.Text(
                    value=self.legend_title[0],
                    description=f'Legend title ({self.legend_title[0]}):',
                    layout=description_layout,
                    style={'description_width': 'initial'}
                )
                self.legend_title_input_3 = widgets.Text(
                    value=self.legend_title[2],
                    description=f'Legend title ({self.legend_title[2]}):',
                    layout=description_layout_invisible,
                    style={'description_width': 'initial'}
                )
                self.legend_title_input_3.layout.display = 'none'
                self.legend_title_input_4 = widgets.Text(
                    value=self.legend_title[3],
                    description=f'Legend title ({self.legend_title[3]}):',
                    layout=description_layout,
                    style={'description_width': 'initial'}
                )
                self.legend_title_input_5 = widgets.Text(
                    value=self.legend_title[4],
                    description=f'Legend title ({self.legend_title[4]}):',
                    layout=description_layout_invisible,
                    style={'description_width': 'initial'}
                )
                self.legend_title_input_5.layout.display = 'none'
            
            if self.ms_average_choice == 'no' and self.bio_or_pep == '2':
                self.legend_title_input_1 = widgets.Text(
                    value=self.legend_title[0],
                    description=f'Legend title ({self.legend_title[0]}):',
                    layout=description_layout,
                    style={'description_width': 'initial'}
                )
                self.legend_title_input_3 = widgets.Text(
                    value=self.legend_title[2],
                    description=f'Legend title ({self.legend_title[2]}):',
                    layout=description_layout,
                    style={'description_width': 'initial'}
                )
                self.legend_title_input_4 = widgets.Text(
                    value=self.legend_title[3],
                    description=f'Legend title ({self.legend_title[3]}):',
                    layout=description_layout_invisible,
                    style={'description_width': 'initial'}
                )
                self.legend_title_input_4.layout.display = 'none'
                self.legend_title_input_5 = widgets.Text(
                    value=self.legend_title[4],
                    description=f'Legend title ({self.legend_title[4]}):',
                    layout=description_layout_invisible,
                    style={'description_width': 'initial'}
                )
                self.legend_title_input_5.layout.display = 'none'
            
            # Plot Widgets
            self.plot_port = widgets.ToggleButton(
                value=True,
                description='Portrait Plot',
                disabled=False,
                button_style='',
                tooltip='Show updated plot',
                icon='check'
            )
    
            self.plot_land = widgets.ToggleButton(
                value=True,
                description='Landscape Plot',
                disabled=False,
                button_style='',
                tooltip='Show updated plot',
                icon='check'
            )
                 
            self.create_plot_message = widgets.HTML("<h3><u>Create Plot Checkboxs:</u></h3>")
           
            self.plot_toggle_buttons = widgets.HBox([
                self.plot_port,
                self.plot_land
            ])
            
            self.plot_toggle_widget_box = widgets.VBox([
                self.create_plot_message,
                self.plot_toggle_buttons,

            ])     
            self.figure_label_message = widgets.HTML("<h3><u>Figure Label Options:</u></h3>")
            
            self.figure_label_box = widgets.VBox([
                self.figure_label_message,
                self.xaxis_label_input,
                self.yaxis_label_input,
                self.yaxis_position,
                self.legend_title_input_1,
                self.legend_title_input_2,
                self.legend_title_input_3,
                self.legend_title_input_4,
                self.legend_title_input_5,
                self.plot_toggle_widget_box 
            ], layout=widgets.Layout(
            width='100%',
            height='370px',
            margin='0px')
            )


      
            self.filename_port_input = widgets.Text(
                value=self.display_filename_port,
                description='Filename (Portrait):',
                layout=dropdown_layout_large,
                style={'description_width': 'initial'}
            )
    
            self.filename_land_input = widgets.Text(
                value=self.display_filename_land,
                description='Filename (Landscape):',
                layout=dropdown_layout_large,
                style={'description_width': 'initial'}
            )
            self.filename_label_message = widgets.HTML("<h3><u>Save As Options</u></h3>")

            self.plot_filename_widget_box = widgets.VBox([
                self.filename_label_message,
                self.filename_port_input,
                self.filename_land_input
            ])

            # Add buttons for update and save plot
            self.update_button = widgets.Button(
                description='Show/Update Plot',
                button_style='success',
                tooltip='Click to update the plot',
                icon='refresh'
            )
    
            self.save_button = widgets.Button(
                description='Save Plot',
                button_style='info',
                tooltip='Click to save the plot',
                icon='save'
            )

            self.update_save_box = widgets.HBox([self.update_button, self.save_button])

        def on_dropdown_change(self, change):
            self.ms_average_choice = self.ms_average_choice_dropdown.value
            self.bio_or_pep = self.bio_or_pep_dropdown.value
            if self.bio_or_pep != 'no':
                self.selected_bio_or_pep = self.specific_select_multiple.value
            else:
                self.selected_bio_or_pep = []
    
            if self.bio_or_pep != 'no' and self.selected_bio_or_pep:
                self.selected_peptides, self.selected_functions = proceed_with_label_specific_options(self.selected_bio_or_pep, self.bio_or_pep)
            else:
                self.selected_peptides, self.selected_functions = [], []
    
            # Call the method to create plotting widgets
            create_plotting_widgets(self)

        
        # Function to handle updates

        def extract_non_zero_non_nan_values(df):
            unique_functions = set()
            # Iterate over each value in the DataFrame
            for value in df.stack().values:  # df.stack() stacks the DataFrame into a Series
                if value != 0 and not pd.isna(value):  # Check if value is non-zero and not NaN
                    if isinstance(value, str):
                        # If the value is a string, it could contain multiple delimited entries
                        entries = value.split('; ')
                        unique_functions.update(entries)
                    else:
                        unique_functions.add(value)
            return unique_functions
    

        def on_selection_change(self, change):
            if change['type'] == 'change' and change['name'] == 'value':
                self.bio_or_pep = self.bio_or_pep_dropdown.value
    
                # Initialize containers for unique values
                unique_functions = set()
                unique_peptides = set()
    
                # Aggregate unique functions and peptides from available data
                for var in self.available_data_variables:
                    df = self.available_data_variables[var]['bioactive_peptide_func_df']
                    df.replace('0', 0, inplace=True)  # Standardize zero representations
                    unique_functions.update(extract_non_zero_non_nan_values(df))
    
                    abs_df = self.available_data_variables[var]['bioactive_peptide_abs_df']
                    unique_peptides.update(col for col in abs_df.columns if col not in ['AA', 'count', 'average'])
    
                # Convert sets to sorted lists for widget options
                unique_peptides_list = sorted(list(unique_peptides))
                unique_functions_list = sorted(list(unique_functions))
    
                # Update widget based on dropdown choice
                if self.bio_or_pep == '1':  # Peptide Intervals
                    self.specific_select_multiple.options = [(peptide, peptide) for peptide in unique_peptides_list]
                    self.specific_select_multiple.layout.display = 'block'
                elif self.bio_or_pep == '2':  # Bioactive Functions
                    self.specific_select_multiple.options = [(function, function) for function in unique_functions_list]
                    self.specific_select_multiple.layout.display = 'block'
                else:
                    self.specific_select_multiple.options = [""]
                    self.specific_select_multiple.layout.display = 'none'
        # Function to display plotting options    
        def generate_additional_vars_str(self):
            additional_vars = []
    
            if self.plot_heatmap == 'yes':
                additional_vars.append('heatmap')
            elif self.plot_heatmap == 'no':
                additional_vars.append('no-heatmap')
    
            if self.bio_or_pep == '1':
                additional_vars.append('intervals')
            elif self.bio_or_pep == '2':
                additional_vars.append('bioactive-functions')
            elif self.bio_or_pep == 'no':
                additional_vars.append('averages-only')
    
            # Join the additional_vars list into a single string with underscores
            additional_vars_str = '_'.join(additional_vars)
            return additional_vars_str
    
        def generate_filenames(self):
            additional_vars_str = generate_additional_vars_str(self)
            self.xaxis_label = f"\n{self.protein_name_short.replace('_', ' ')} Sequence"
            self.yaxis_label = 'Averaged Peptide Abundance'
    
            self.protein_filename_short = re.sub(r'[^\w-]', '-', self.protein_name_short)
            self.display_filename_port = f'portrait_{self.user_protein_id}_{self.protein_filename_short}_average-only'
            self.display_filename_land = f'landscape_{self.user_protein_id}_{self.protein_filename_short}_{additional_vars_str}'
    
        # Call the method to create plotting widgets
        create_plotting_widgets(self)

        # Attach observer functions to widgets
        display_plotting_options(self)
        # Attach observer functions to widgets
        self.ms_average_choice_dropdown.observe(
            lambda change: on_dropdown_change(self, change), names='value'
        )
        self.bio_or_pep_dropdown.observe(
            lambda change: on_dropdown_change(self, change), names='value'
        )
        self.specific_select_multiple.observe(
            lambda change: on_dropdown_change(self, change), names='value'
        )

        # Manually trigger the function once to use default values at the start
           #on_dropdown_change(self, None)
 

    
        # Create plot output widget
        self.plot_output = widgets.Output(layout=widgets.Layout(
        width='100%',
        height='100%',  # Automatically adjust the height to fit the content
        #overflow='hidden'  # Eliminate scrollbars)
        ))
                # Attach button click events
        self.update_button.on_click(self.on_update_plot_clicked)
        self.save_button.on_click(self.on_save_plot_clicked)
             
    #self.plot_output.capture(clear_output=True)
    def on_update_plot_clicked(self, b):
        with self.plot_output:
            # Clear the previous output
            self.plot_output.clear_output()
            global figures
            # Call the update_plot function
            figures = update_plot(
                self.available_data_variables, self.ms_average_choice, self.bio_or_pep, self.selected_peptides, 
                self.selected_functions, self.hm_selected_color.value, self.lp_selected_color.value, 
                self.avglp_selected_color.value, self.xaxis_label_input.value, self.yaxis_label_input.value, 
                self.yaxis_position.value, self.legend_title_input_1.value, self.legend_title_input_2.value, 
                self.legend_title_input_3.value, self.legend_title_input_4.value, self.legend_title_input_5.value, 
                self.plot_land.value, self.plot_port.value, self.filename_port_input.value, 
                self.filename_land_input.value, save_fig='no'
            )
            return figures
    
    #self.plot_output.capture(clear_output=True)
    def on_save_plot_clicked(self, b):
        with self.plot_output:
            # Clear the previous output
            self.plot_output.clear_output()
    
            # Update filenames based on user input
            self.filename_land, self.filename_port = update_filenames(
                self.filename_port_input.value, self.filename_land_input.value
            )
    
            # Call the update_plot function with save option enabled
            figures = update_plot(
                self.available_data_variables, self.ms_average_choice, self.bio_or_pep, self.selected_peptides, 
                self.selected_functions, self.hm_selected_color.value, self.lp_selected_color.value, 
                self.avglp_selected_color.value, self.xaxis_label_input.value, self.yaxis_label_input.value, 
                self.yaxis_position.value, self.legend_title_input_1.value, self.legend_title_input_2.value, 
                self.legend_title_input_3.value, self.legend_title_input_4.value, self.legend_title_input_5.value, 
                self.plot_land.value, self.plot_port.value, self.filename_port, self.filename_land, save_fig='yes'
            )
    
            # Notify the user that files have been saved
            display(HTML(f'Files have been saved to the <b>heatmap_images</b> directory'))
    """            
    def get_layout(self):
        # Display the widgets as needed
        display(HTML(f"<h3><u>Select line plot options</u></h3>"))
        display(self.ms_average_choice_dropdown)
        display(self.bio_or_pep_dropdown)
        display(self.specific_select_multiple)
        display(self.color_widget_box)
        display(self.figure_label_box)
        display(self.plot_filename_widget_box)
        display(self.update_save_box)
        display(self.plot_output)
        return  # Suppress implicit None
    """;
    def get_layout(self):
        # Create a grid layout with 4 rows and 3 columns
        grid = GridspecLayout(
            1, 2,  # Number of rows and columns
            width='1000px', 
            height='auto',
            grid_gap='5px',  # Adjust spacing between grid elements
        )
            
        # Row 0, Column 0: Input widgets
        input_widgets = VBox([
            self.plot_message,
            self.ms_average_choice_dropdown,
            self.bio_or_pep_dropdown,
            self.specific_select_multiple,
            self.color_widget_box,
            self.figure_label_box,
            self.plot_filename_widget_box,
            self.update_save_box
        ])
        input_widgets.layout.width = '90%'

        grid[0, 0] = input_widgets  # Place in row 0, column 0
        
        return grid
    
        #return input_widgets,  self.figure_label_box, self.update_save_box

    def show_plots(self):
        display(self.plot_output)  # Span across all columns in row 3
    
        return 

In [12]:

# Initialize the selector
selector = HeatmapDataHandler()

# Create an output container for dynamic updates
dynamic_output = widgets.Output()

# Placeholder for the app
app = None

def generate_grid(selector):
    global app  # Ensure global app instance is updated

    # Reinitialize the app with updated data
    app = HeatmapPlotHandler(selector)

    # Get widgets from the selector
    sel_input_widgets, sel_output_widgets, sel_vert_button_box = selector.display_widgets()

    # Set fixed heights for widgets
    sel_input_widgets.layout.height = '300px'
    sel_output_widgets.layout.height = '300px'
    sel_vert_button_box.layout.height = '300px'

    # Create a grid layout with fixed row heights
    grid = GridspecLayout(2, 2,  # Number of rows and columns
        width='1000px', 
        grid_gap='5px',  # Adjust spacing between grid elements
    )

    # Add widgets to the grid
    grid[0, 0] = sel_input_widgets  # Place in row 0, column 0
    grid[0, 1] = sel_output_widgets  # Place in row 0, column 1
    grid[1, 0] = sel_vert_button_box  # Place in row 1, column 0

    # Return the grid layout
    return grid

# Function to dynamically update and reinitialize the app
def show_plotting_options(change=None):
    global app  # Use the global app instance

    with dynamic_output:
        # Clear the previous content
        clear_output(wait=True)

        # Generate and display the grid with updated widgets
        grid = generate_grid(selector)
        display(grid)

        # Display additional widgets from the app layout
        app.get_layout()  # This will directly display self.input_widgets
        display(app.get_layout())

        # Display the plots (if any)
        plots = app.show_plots()
        display(plots)

# Attach observers to relevant widgets in HeatmapDataHandler
selector.add_group_button.on_click(show_plotting_options)
selector.reset_button.on_click(show_plotting_options)
selector.update_label_button.on_click(show_plotting_options)
selector.update_order_button.on_click(show_plotting_options)
selector.reset_labelorder_button.on_click(show_plotting_options)

# Initial display
with dynamic_output:
    grid = generate_grid(selector)
    display(grid)
    display(selector.label_order_output)
    if app:
        display(app.get_layout())
        display(app.show_plots())


# Display the dynamic output container
display(dynamic_output)

Output()