In [1]:
import numpy as np
from datetime import datetime
import pandas as pd
import json, io, base64, re, os, requests, time, traceback
from IPython.display import display, HTML, clear_output
from collections import defaultdict

import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from itertools import combinations
from scipy.stats import pearsonr, spearmanr

import ipywidgets as widgets
from ipywidgets import (
    interact, interactive, fixed, interact_manual,
    GridspecLayout, VBox, HBox, Layout, Output
)
from xml.etree import ElementTree
from utils.uniprot_client import UniProtClient
# Initialize settings
import _settings as settings

# Global variables from settings
spec_translate_list = settings.SPEC_TRANSLATE_LIST
#plotly_colors = settings.plotly_colors

In [2]:
def safe_concat(dfs, **kwargs):
    """Safely concatenate DataFrames handling empty/NA columns"""
    if not dfs:  # If empty list
        return pd.DataFrame()
        
    # Remove any all-NA columns from each DataFrame
    cleaned_dfs = []
    for df in dfs:
        if isinstance(df, pd.DataFrame) and not df.empty:
            # Drop all-NA columns
            df = df.dropna(axis=1, how='all')
            cleaned_dfs.append(df)
    
    if not cleaned_dfs:  # If no valid DataFrames after cleaning
        return pd.DataFrame()
    
    # Find common columns across all DataFrames
    common_cols = set.intersection(*[set(df.columns) for df in cleaned_dfs])
    
    # Filter to common columns before concatenation
    filtered_dfs = [df[list(common_cols)] for df in cleaned_dfs]
    
    # Perform concatenation
    return pd.concat(filtered_dfs, **kwargs)

# Use the existing create_help_icon function style
def create_help_icon(tooltip_text):
    """Create a help icon widget with tooltip"""
    help_icon = widgets.HTML(
        value='<i class="fa fa-question-circle" style="color: #007bff;"></i>',
        layout=widgets.Layout(width='5px', margin='0 0 0 2px')  # Reduced margin, only on left side
    )
    help_icon.add_class('jupyter-widgets')
    help_icon.add_class('widget-html')
    return widgets.HTML(
        f'<div title="{tooltip_text}" style="display: inline-block; margin-left: 2px;">{help_icon.value}</div>'
    )

# General warning display function 
def display_warning( message):
    """Display a standardized warning message"""
    warning_html = f"""
    <div style='color: #856404; background-color: #fff3cd; border: 1px solid #ffeeba; border-radius: 4px; padding: 10px; margin: 10px 0;'>
        <strong>Warning:</strong> {message}
    </div>
    """
    display(HTML(warning_html))  

In [3]:
class DataTransformation:
    def __init__(self):
        self.merged_df = None
        self.protein_dict = {}
        self.group_data_dict = {}
        self.output_area = None
        self.merged_uploader = None
        self.uniprot_client = UniProtClient()  # Add this line
        
        # Create the checkbox with improved description
        self.plot_lock = widgets.Checkbox(value=True)
        self._initialize_instructions()
                        
    def _initialize_instructions(self):
        self.stepone_output_html_message = """
        <div style='padding: 10px; background-color: #f8f9fa; border-left: 5px solid #007bff; margin: 10px 0;'>
            <h3>Step 1: Upload Data</h3>
            <p>Please upload your data files to begin visualization:</p>
            <ul style='list-style-type: circle;'>
                <li>Upload a merged data file exported from the Data Transformation modeule</li>
                <li>File must contain Master Protein Accessions, unique ID, and Avg_* columns</li>
                <li>Protein information will be automatically retrieved from UniProt or FASTA files</li>
            </ul>
        </div>
        """
        self.stepone_status_output = widgets.Output(
            layout=widgets.Layout(
                max_width='1000px',
                width='100%'
            )
        )
        with self.stepone_status_output:
            display(HTML(self.stepone_output_html_message))

    def _on_file_change(self, change):
        """Handle file upload changes without recreating the UI"""
        # Check if we now have all required data
        self.has_merged = (hasattr(self, 'merged_df') and 
                    isinstance(self.merged_df, pd.DataFrame) and 
                    not self.merged_df.empty)
        
        self.has_groups = (hasattr(self, 'group_data_dict') and 
                    isinstance(self.group_data_dict, dict) and 
                    len(self.group_data_dict) > 0)
        
        self.has_proteins = (hasattr(self, 'protein_dict') and 
                    isinstance(self.protein_dict, dict) and 
                    len(self.protein_dict) > 0)
                
        # Check if we have all required data
        has_required_data = self.has_merged and self.has_groups and self.has_proteins
        
        # Prepare status info section with current data summary
        status_info = ""
        if self.has_merged and hasattr(self, 'merged_df'):
            df = self.merged_df
            status_info += f"<div>Data loaded: {df.shape[0]} rows, {df.shape[1]} columns</div>"
        
        if self.has_groups and hasattr(self, 'group_data_dict'):
            groups = len(self.group_data_dict)
            status_info += f"<div>Groups detected: {groups}</div>"
        
        if self.has_proteins and hasattr(self, 'protein_dict'):
            proteins = len(self.protein_dict)
            status_info += f"<div>Proteins loaded: {proteins}</div>"
        
        if has_required_data:
            # We have all necessary data, so populate the selectors and enable widgets
            self.stepone_output_html_message = f"""
            <div style='padding: 10px; background-color: #e8f5e9; border-left: 5px solid #4caf50; margin: 10px 0;'>
                <h3>Step 1: Upload Data</h3>
                <p>Data successfully loaded:</p>
                <ul style="list-style-type: none;">
                    <li>✅ <b>{self.filename}</b> successfully upload as a merged data file with {df.shape[0]} rows of data</li> 
                    <li>✅ {len(self.group_data_dict) if self.group_data_dict else 0} Valid sample groups detected</li> 
                    <li>✅ {len(self.protein_dict) if self.protein_dict else 0} unique proteins loaded from data file</li>
                    <li>✅ {len(df['function'].unique()) if 'function' in df.columns else 0} unique function combinations loaded from datafile</li>
                </ul>
                <p>You can now proceed to Step 2: variable selection.</p>
            </div>
            """
        else:
            # Still missing data, update the message
            missing_data = self._get_missing_data_message()
            background_color = "#fff3e0"  # Light orange for warning
            border_color = "#ff9800"      # Orange for warning
            
            self.stepone_output_html_message = f"""
            <div style='padding: 10px; background-color: {background_color}; border-left: 5px solid {border_color}; margin: 10px 0;'>
                <h3>Step 1: Upload Data</h3>
                <div style='background-color: #f5f5f5; padding: 8px; border-radius: 4px; margin-bottom: 10px;'>
                    <strong>Current Status:</strong> <span style='color: #ff9800;'>Waiting for required data</span>
                    {status_info}
                </div>
                <p>Please complete the data upload:</p>
                {missing_data}
            </div>
            """
        
        # Always update the display at the end of the method
        with self.stepone_status_output:
            clear_output(wait=True)
            display(HTML(self.stepone_output_html_message))

    def _get_missing_data_message(self):
        message = "<ul style='list-style-type: none;'>"
        if not self.has_merged:
            message += "<li>❌ Missing merged data file</li>"
        else:
            message += "<li>✅ Merged data file uploaded</li>"
            
        if not self.has_groups:
            message += "<li>❌ Missing valid abundance columns (Avg_*)</li>"
        else:
            message += "<li>✅ Valid abundance columns detected</li>"
        
        if not self.has_proteins:
            message += "<li>❌ Missing protein information</li>"
        else:
            message += "<li>✅ Protein information loaded</li>"
        
        message += "</ul>"
        return message
   
    def create_download_link(self, file_path, label):
        """Create a download link for a file."""
        if os.path.exists(file_path):
            # Read file content and encode it as base64
            with open(file_path, 'rb') as f:
                content = f.read()
            b64_content = base64.b64encode(content).decode('utf-8')

            # Generate the download link HTML
            return widgets.HTML(f"""
                <a download="{os.path.basename(file_path)}" 
                   href="data:application/octet-stream;base64,{b64_content}" 
                   style="color: #0366d6; text-decoration: none; margin-left: 20px; font-size: 14px;">
                    {label}
                </a>
            """)
        else:
            # Show an error message if the file does not exist
            return widgets.HTML(f"""
                <span style="color: red; margin-left: 20px; font-size: 14px;">
                    File "{file_path}" not found!
                </span>
            """)

    def setup_data_loading_ui(self):
        """Initialize and display the data loading UI."""
        # Create file upload widget
        self.merged_uploader = widgets.FileUpload(
            accept='.csv,.txt,.tsv,.xlsx',
            multiple=False,
            description='Upload Merged Data File',
            layout=widgets.Layout(width='300px'),
            style={'description_width': 'initial'}
        )

        self.output_area = widgets.Output()

        # Create upload box with example link
        merged_box = widgets.HBox([
            self.merged_uploader,
            self.create_download_link("example_merged_dataframe.csv", "Example")
        ], layout=widgets.Layout(align_items='center'))

        # Create container for status display
        self.status_area = widgets.Output(
            layout=widgets.Layout(
                width='650px',
                margin='0 0 0 20px',
            )
        )

        # Create left column with upload widgets
        upload_widgets = widgets.VBox([
            self.stepone_status_output,
            widgets.HTML("<u>Upload Data File:</u>"),
            merged_box,
            self.status_area,
        ], layout=widgets.Layout(
            width='700px',
            margin='0 20px 0 0',
            overflow='hidden'  # or 'auto' or 'scroll' depending on your needs
        ))
        
        display(upload_widgets)

        # Register observer
        self.merged_uploader.observe(self.on_merged_upload_change, names='value')
    
    def process_group_data_from_dataframe(self, df):
        """
        Process and extract group data from DataFrame columns that have grouping information.
        This function parses column names in the format "ColumnName 'Grouped: (Group1; Group2; ...)'"
        and organizes them into a dictionary structure.
        
        Returns:
        {
            "Group1": ["Column1", "Column2", ...],
            "Group2": ["Column3", "Column4", ...],
            ...
        }
        """
        try:
            group_data_dict = {}
            renamed_columns = {}
            
            # Find all columns that have the 'Grouped:' pattern
            grouped_columns = [col for col in df.columns if " 'Grouped:" in str(col)]
            
            if not grouped_columns:
                display(HTML('<b style="color:orange;">No valid group data could be extracted.</b>'))
                try:
                    # Extract group information from Avg_ columns
                    avg_columns = [col for col in df.columns if col.startswith('Avg_')]
                    if not avg_columns:
                        raise ValueError("No Avg_ columns found in the data file")

                    # Create group data dictionary from Avg_ columns
                    for col in avg_columns:
                        group_name = col.replace('Avg_', '')
                        # Find all abundance columns that correspond to this group
                        group_data_dict[group_name] = col

                    if not group_data_dict:
                        raise ValueError("Could not identify any groups from Avg_ columns")
                        

                    display(HTML(
                        f'<b style="color:green;">Group definition imported from Peptide Data File with {len(group_data_dict)} groups and no replicate data.</b><br>'
                    ))
                    display_warning("Some plot and data export features will be limited without replicate data. Limited features include: \n" +
                                    "• Error bars\n" +
                                    "• Replicate correlative analysis")
                    return group_data_dict, df
                    
                except Exception as e:
                    display(HTML(f'<b style="color:red;">Error loading group definition from Peptide Data File: {str(e)}</b>'))
                    return group_data_dict, df
            
            
            # Extract grouping information from each column
            for col in grouped_columns:
                # Extract the base column name (before the grouping info)
                base_col_name = col.split(" 'Grouped:")[0].strip()
                
                # Extract the groups from the format "(Group1; Group2; ...)"
                match = re.search(r"\((.*?)\)", col)
                if match:
                    groups_str = match.group(1)
                    groups = [g.strip() for g in groups_str.split(";")]
                    
                    # Add the column to each group it belongs to
                    for group in groups:
                        if group not in group_data_dict:
                            group_data_dict[group] = []
                        group_data_dict[group].append(base_col_name)
                    
                    # Create mapping for column renaming (strip the 'Grouped:' part)
                    renamed_columns[col] = base_col_name
            
            # Rename the columns in the DataFrame to remove the 'Grouped:' part
            df_renamed = df.rename(columns=renamed_columns)
            
            # Validate the structure
            if not group_data_dict:
                #display(HTML('<b style="color:orange;">No valid group data could be extracted.</b>'))
                return group_data_dict, df
            
            #display(HTML('<b style="color:green;">Group data successfully extracted and column names cleaned.</b>'))
            return group_data_dict, df_renamed
        
        except Exception as e:
            display(HTML(f"<b style='color:red;'>Error processing group data from DataFrame: {str(e)}</b>"))
            return group_data_dict, df

    def _validate_and_clean_data(self, df):
        """
        Validate and clean the uploaded data, preserving numeric data even if stored as strings.
        Returns tuple of (cleaned_df, warnings, errors)
        """
        warnings = []
        errors = []
        cleaned_df = df.copy()
    
        # Check required columns exist
        required_columns = [
            'Master Protein Accessions', 
            'unique ID'
        ]
        
        # Check that at least one Avg_ column exists
        avg_columns = [col for col in df.columns if col.startswith('Avg_')]
        if not avg_columns:
            errors.append("No columns starting with 'Avg_' found in the data")
            return None, warnings, errors
            
        # Function to function
        cleaned_df = cleaned_df.rename(columns={'Function': 'function'})
            
        # Add Avg_ columns to required columns
        required_columns.extend(avg_columns)
        
        missing = set(required_columns) - set(df.columns)
        if missing:
            errors.append(f"Missing required columns: {', '.join(missing)}")
            return None, warnings, errors
    
        # Separate numeric and non-numeric columns
        numeric_columns = avg_columns  # Avg_ columns should be numeric
        text_columns = ['Master Protein Accessions', 'unique ID']
    
        # Handle blank values differently for numeric vs text columns
        for column in required_columns:
            if column in numeric_columns:
                # For numeric columns, try to convert to numeric first
                try:
                    # Convert to numeric, coerce errors to NaN
                    cleaned_df[column] = pd.to_numeric(cleaned_df[column], errors='coerce')
                    blank_count = cleaned_df[column].isna().sum()
                    if blank_count > 0:
                        warnings.append(f"Found {blank_count} invalid/blank numeric values in {column} column")
                except Exception as e:
                    errors.append(f"Error converting {column} to numeric: {str(e)}")
                    return None, warnings, errors
            elif column in text_columns:
                # For text columns, check for truly empty values
                blank_mask = cleaned_df[column].isna() | (cleaned_df[column].astype(str).str.strip() == '')
                blank_count = blank_mask.sum()
                if blank_count > 0:
                    warnings.append(f"Dropping {blank_count} rows with blank values in {column} column")
                    cleaned_df = cleaned_df[~blank_mask]
    
        # Check for invalid characters in non-blank rows
        if len(cleaned_df) > 0:
            # Check Positions in Proteins
            invalid_pos = cleaned_df['Positions in Proteins'].apply(
                lambda x: ',' in str(x) or ':' in str(x)
            )
            if invalid_pos.any():
                errors.append(
                    "Found invalid characters (',' or ':') in Positions in Proteins column. "
                    "Please update the file and upload again."
                )
            
            # Check Master Protein Accessions
            invalid_acc = cleaned_df['Master Protein Accessions'].apply(
                lambda x: ',' in str(x) or ':' in str(x)
            )
            if invalid_acc.any():
                errors.append(
                    "Found invalid characters (',' or ':') in Master Protein Accessions column. "
                    "Please update the file and upload again."
                )
    
        return cleaned_df, warnings, errors

    def process_protein_info(self, df, fetch_sequence=False):
        """
        Process protein information from the dataframe and store in protein_dict.
        Asks user whether to fetch from UniProt or use accession IDs when protein info is missing.
        
        Args:
            df (DataFrame): DataFrame containing protein data
            fetch_sequence (bool): Whether to fetch protein sequences. Defaults to False.
        """
        # Initialize a cache for UniProt information to avoid redundant queries
        if not hasattr(self, 'uniprot_client'):
            self.uniprot_client = UniProtClient()
        if not hasattr(self, 'uniprot_cache'):
            self.uniprot_cache = getattr(self, 'uniprot_client').cache if hasattr(self, 'uniprot_client') else {}
        

        # Check if we need to fetch any data from UniProt
        has_protein_info = all(col in df.columns for col in ['protein_name', 'protein_species'])
        if has_protein_info:
            # Check if we have valid data for all entries
            all_data_present = (
                df['protein_name'].notna().all() and 
                df['protein_species'].notna().all() and
                (df['protein_name'] != '').all() and
                (df['protein_species'] != '').all()
            )
            if all_data_present:
                # If we have all data, just process it silently
                protein_info = df.groupby('Master Protein Accessions').agg({
                    'protein_name': 'first',
                    'protein_species': 'first'
                }).reset_index()
                
                for _, row in protein_info.iterrows():
                    protein_id = row['Master Protein Accessions']
                    self.protein_dict[protein_id] = {
                        "name": row['protein_name'],
                        "species": row['protein_species']
                    }
                self._protein_processing_complete = True

                return len(self.protein_dict)

        # Store the dataframe for later processing
        self._protein_df_to_process = df

        if not all_data_present:
            # Store sequence fetch preference
            self._fetch_sequence = fetch_sequence
            
            # Create a flag to track if processing is complete

            self._protein_processing_complete = False
            
            # If we need to fetch data, ask the user what they want to do
            # Display in the status area
            with self.status_area:
                self.status_area.clear_output()
                
                # Create buttons for user choice
                fetch_button = widgets.Button(
                    description='Query UniProt',
                    button_style='info',
                    tooltip='Fetch protein names from UniProt database (may take time)',
                    layout=widgets.Layout(width='250px')
                )
                
                use_accession_button = widgets.Button(
                    description='Use Protein IDs',
                    button_style='warning',
                    tooltip='Use protein accession IDs as names without querying UniProt',
                    layout=widgets.Layout(width='250px')
                )
                
                # Define button click handlers
                fetch_button.on_click(lambda b: self.process_proteins_with_choice(True))
                use_accession_button.on_click(lambda b: self.process_proteins_with_choice(False))
                
                display(HTML("""
                    <div style="padding: 15px; margin: 10px 0; border-left: 4px solid #17a2b8; background-color: #f8f9fa;">
                        <h4 style="margin-top: 0;">Protein Information Missing</h4>
                        <p>Some protein names or species information is missing in your data.</p>
                        <p>Would you like to:</p>
                            <ul>
                                    <li>Fetch protein names from UniProt database (may take time)</li>
                                    <li>Use protein accession IDs as names without querying UniProt</li>
                            </ul>
                    </div>
                """))
                display(widgets.HBox([fetch_button, use_accession_button]))
            # Mark processing as complete
            self._protein_processing_complete = True

        # Process _pending_merged_df if it exists rather than merged_df
        if hasattr(self, '_pending_merged_df') and self._pending_merged_df is not None:
            df_to_update = self._pending_merged_df
            
            # Add protein_name and protein_species columns if they don't exist
            if 'protein_name' not in df_to_update.columns:
                df_to_update['protein_name'] = ''
            if 'protein_species' not in df_to_update.columns:
                df_to_update['protein_species'] = ''
            
            # Update the columns with the fetched information
            for protein_id, info in self.protein_dict.items():
                mask = df_to_update['Master Protein Accessions'] == protein_id
                df_to_update.loc[mask, 'protein_name'] = info['name']
                df_to_update.loc[mask, 'protein_species'] = info['species']
            
            # Finalize the data import now that processing is complete
            self._finalize_data_import()
        # Return the current count, but processing will continue when a button is clicked
        return len(self.protein_dict)

    def process_proteins_with_choice(self, fetch_from_uniprot):
        """
        Process proteins based on user choice.
        This is called when the user clicks one of the choice buttons.
        """
        # Get the dataframe to process
        df = self._protein_df_to_process
        # Get sequence fetch preference (default to False if not set)
        fetch_sequence = getattr(self, '_fetch_sequence', False)
        
        # Clear the status area and show processing message
        with self.status_area:
            self.status_area.clear_output()
            if fetch_from_uniprot:
                display(HTML('<div style="color: #17a2b8; padding: 10px; margin: 10px 0;">Fetching protein information from UniProt...</div>'))
            else:
                display(HTML('<div style="color: #ffc107; padding: 10px; margin: 10px 0;">Using protein accession IDs as names...</div>'))
        
        # Process proteins based on user choice
        # Use the status area for progress display
        with self.status_area:
            # Initialize counters
            total_proteins = 0
            uniprot_found = 0
            uniprot_not_found = 0
            multiple_entries = 0
            cached_proteins = 0
            
            # Check if we need to fetch any data from UniProt
            has_protein_info = all(col in df.columns for col in ['protein_name', 'protein_species'])
            
            # Group by protein accession to get unique proteins
            protein_info = df.groupby('Master Protein Accessions').agg({
                'protein_name': 'first' if 'protein_name' in df.columns else lambda x: None,
                'protein_species': 'first' if 'protein_species' in df.columns else lambda x: None
            }).reset_index()

            progress_html = """
                <style>
                    .fetch-status { font-family: monospace; margin: 10px 0; padding: 10px; }
                    .fetch-progress { margin: 5px 0; padding: 5px; }
                    .success { color: #28a745; }
                    .warning { color: #ffc107; }
                    .error { color: #dc3545; }
                    .info { color: #17a2b8; }
                    .summary { margin-top: 10px; padding: 10px;}
                </style>
                <div class="fetch-status">
                    <div id="progress-updates"></div>
                </div>
            """
            # First, collect all proteins that need fetching
            proteins_to_fetch = []
            
            for _, row in protein_info.iterrows():
                total_proteins += 1
                protein_id = row['Master Protein Accessions']
                
                # Skip entries with multiple protein IDs
                if ';' in protein_id:
                    multiple_entries += 1
                    self.protein_dict[protein_id] = {
                        "name": protein_id,
                        "species": "Multiple"
                    }
                    continue
                
                # Use existing data if available and not empty
                if (has_protein_info and 
                    pd.notna(row['protein_name']) and 
                    pd.notna(row['protein_species']) and 
                    row['protein_name'] != '' and 
                    row['protein_species'] != ''):
                    self.protein_dict[protein_id] = {
                        "name": row['protein_name'],
                        "species": row['protein_species']
                    }
                    continue
                
                # Check if we already have this protein in cache
                if hasattr(self, 'uniprot_client') and protein_id in self.uniprot_client.cache:
                    cached_proteins += 1
                    name, species = self.uniprot_client.cache[protein_id]
                    self.protein_dict[protein_id] = {
                        "name": name if name else protein_id,
                        "species": species if species else "Unknown"
                    }
                    continue
                elif hasattr(self, 'uniprot_cache') and protein_id in self.uniprot_cache:
                    cached_proteins += 1
                    name, species = self.uniprot_cache[protein_id]
                    self.protein_dict[protein_id] = {
                        "name": name if name else protein_id,
                        "species": species if species else "Unknown"
                    }
                    continue
                
                # If we need to fetch and user chose to fetch from UniProt
                if fetch_from_uniprot:
                    proteins_to_fetch.append(protein_id)
                else:
                    # Use accession ID as name
                    self.protein_dict[protein_id] = {
                        "name": protein_id,
                        "species": "Unknown"
                    }
            
            # Process proteins in batches if fetching from UniProt
            if fetch_from_uniprot and proteins_to_fetch:
                # Update progress display
                display(HTML(progress_html + f"""
                    <div class="fetch-progress info">
                        Preparing to fetch {len(proteins_to_fetch)} proteins from UniProt in batches...
                    </div>
                """))
                
                # Process in batches of 50 (adjust as needed)
                batch_size = 50
                total_batches = (len(proteins_to_fetch) + batch_size - 1) // batch_size
                
                for batch_num in range(total_batches):
                    start_idx = batch_num * batch_size
                    end_idx = min((batch_num + 1) * batch_size, len(proteins_to_fetch))
                    current_batch = proteins_to_fetch[start_idx:end_idx]
                    
                    # Update progress
                    self.status_area.clear_output(wait=True)
                    display(HTML(progress_html + f"""
                        <div class="fetch-progress info">
                            Fetching batch {batch_num + 1}/{total_batches} ({len(current_batch)} proteins)...
                        </div>
                        <div class="summary">
                            <h4>Progress:</h4>
                            <ul>
                                <li>Total proteins: {total_proteins}</li>
                                <li>Proteins from cache: {cached_proteins}</li>
                                <li>UniProt matches found: {uniprot_found}</li>
                                <li>UniProt matches not found: {uniprot_not_found}</li>
                                <li>Multiple entry proteins: {multiple_entries}</li>
                                <li>Remaining to fetch: {len(proteins_to_fetch) - start_idx}</li>
                            </ul>
                        </div>
                    """))
                    
                    # Fetch the batch
                    batch_results = {}
                    for protein_id in current_batch:
                        try:
                            # Use the appropriate method based on whether we want to fetch sequence
                            if fetch_sequence:
                                name, species, sequence = self.uniprot_client.fetch_protein_info_with_sequence(protein_id)
                                if name or species:
                                    batch_results[protein_id] = (name, species, sequence)
                            else:
                                name, species = self.uniprot_client.fetch_protein_info(protein_id)
                                if name or species:
                                    batch_results[protein_id] = (name, species)
                        except Exception as e:
                            print(f"Error fetching {protein_id}: {str(e)}")
                    
                    # Process the results
                    for protein_id in current_batch:
                        if protein_id in batch_results:
                            if fetch_sequence:
                                name, species, sequence = batch_results[protein_id]
                            else:
                                name, species = batch_results[protein_id]
                                sequence = None
                                
                            # Use the protein_id as name if name is None or empty
                            if not name:
                                name = protein_id
                                
                            # Use "Unknown" for species if it's None or empty
                            if not species:
                                species = "Unknown"
                                
                            uniprot_found += 1
                            self.protein_dict[protein_id] = {
                                "name": name,
                                "species": species
                            }
                            
                            # Add sequence if we have it
                            if sequence:
                                self.protein_dict[protein_id]["sequence"] = sequence
                                
                            # Add to cache
                            if hasattr(self, 'uniprot_client'):
                                if fetch_sequence:
                                    self.uniprot_client.sequence_cache[protein_id] = (name, species, sequence)
                                self.uniprot_client.cache[protein_id] = (name, species)
                            else:
                                if not hasattr(self, 'uniprot_cache'):
                                    self.uniprot_cache = {}
                                self.uniprot_cache[protein_id] = (name, species)
                        else:
                            uniprot_not_found += 1
                            self.protein_dict[protein_id] = {
                                "name": protein_id,
                                "species": "Unknown"
                            }
                            # Cache the negative result too
                            if hasattr(self, 'uniprot_cache'):
                                self.uniprot_cache[protein_id] = (protein_id, "Unknown")
            
            # Show final summary
            self.status_area.clear_output(wait=True)
            display(HTML(f"""
                <div class="fetch-status">
                    <h4 style="color:green;"><b>Protein Processing Complete!</b></h4>
                    <div class="summary">
                        <h4>Final Summary:</h4>
                        <ul>
                            <li>Total proteins processed: {total_proteins}</li>
                            <li>Proteins from cache: {cached_proteins}</li>
                            <li>Multiple entry proteins: {multiple_entries}</li>
                            {"<li>UniProt matches found: " + str(uniprot_found) + "</li>" if fetch_from_uniprot else ""}
                            {"<li>UniProt matches not found: " + str(uniprot_not_found) + "</li>" if fetch_from_uniprot else ""}
                        </ul>
                    </div>
                </div>
            """))
        
        # Mark processing as complete
        self._protein_processing_complete = True
        
        # Process _pending_merged_df if it exists rather than merged_df
        if hasattr(self, '_pending_merged_df') and self._pending_merged_df is not None:
            df_to_update = self._pending_merged_df
            
            # Add protein_name and protein_species columns if they don't exist
            if 'protein_name' not in df_to_update.columns:
                df_to_update['protein_name'] = ''
            if 'protein_species' not in df_to_update.columns:
                df_to_update['protein_species'] = ''
            
            # Update the columns with the fetched information
            for protein_id, info in self.protein_dict.items():
                mask = df_to_update['Master Protein Accessions'] == protein_id
                df_to_update.loc[mask, 'protein_name'] = info['name']
                df_to_update.loc[mask, 'protein_species'] = info['species']
            
            # Finalize the data import now that processing is complete
            self._finalize_data_import()
        
        # Otherwise update the existing merged_df if it exists
        elif hasattr(self, 'merged_df') and self.merged_df is not None:
            # Add protein_name and protein_species columns if they don't exist
            if 'protein_name' not in self.merged_df.columns:
                self.merged_df['protein_name'] = ''
            if 'protein_species' not in self.merged_df.columns:
                self.merged_df['protein_species'] = ''
            
            # Update the columns with the fetched information
            for protein_id, info in self.protein_dict.items():
                mask = self.merged_df['Master Protein Accessions'] == protein_id
                self.merged_df.loc[mask, 'protein_name'] = info['name']
                self.merged_df.loc[mask, 'protein_species'] = info['species']
        
        # Check if we can enable the plot button
        self.check_and_update_plot_button_state()
        # Return the number of proteins processed
        return len(self.protein_dict)

    def _finalize_data_import(self):
        """Finalize data import after protein processing is complete"""
        if hasattr(self, '_pending_merged_df') and self._pending_merged_df is not None:
            # Set the merged_df property
            self.merged_df = self._pending_merged_df

            # Display success message in output area
            with self.status_area:
                display(HTML(
                    f'<b style="color:green;">Data imported successfully with '
                    f'{self.merged_df.shape[0]} rows and {self.merged_df.shape[1]} columns.</b>'
                ))
            
            # Clear the pending data
            self._pending_merged_df = None

    def on_merged_upload_change(self, change):
        """Handle merged data file upload"""
        if change['type'] == 'change' and change['name'] == 'value':
            self.merged_df = pd.DataFrame()
            self.group_data_dict = {}
            with self.status_area:
                self.status_area.clear_output()
                if change['new'] and len(change['new']) > 0:
                    file_data = change['new'][0]
                    df, status = self._load_data(file_data)
                    if status == 'yes' and df is not None:
                        # Set a property to track that we have pending data
                        self._pending_merged_df = df
                        
                        # Only update UI and set merged_df when protein processing is complete
                        if hasattr(self, '_protein_processing_complete') and self._protein_processing_complete:
                            self._finalize_data_import()

                        # Otherwise, the process_proteins_with_choice method will call _finalize_data_import
                        else:
                            display(HTML(
                                f'<b style="color:orange;">Protein information processing in progress... '
                                f'Please complete protein information selection.</b>'
                            ))

                        # Process group data
                        self.group_data_dict, self.merged_df = self.process_group_data_from_dataframe(df)
                        # Check if we can enable the plot button
                        self._on_file_change(change)
                        self.check_and_update_plot_button_state()
    
    def _load_data(self, file_data):
        """
        Load and validate merged data file
        Returns tuple of (dataframe, status)
        """
        try:
            content = bytes(file_data.content)
            filename = file_data.name
            extension = filename.split('.')[-1].lower()
            self.filename = filename
            file_stream = io.BytesIO(content)

            # Load data based on file extension
            try:
                if extension == 'csv':
                    df = pd.read_csv(file_stream)
                elif extension in ['txt', 'tsv']:
                    df = pd.read_csv(file_stream, delimiter='\t')
                elif extension == 'xlsx':
                    df = pd.read_excel(file_stream)
                else:
                    display(HTML(f'<b style="color:red;">Error: Unsupported file format</b>'))
                    return None, 'no'
            except Exception as e:
                display(HTML(f'<b style="color:red;">Error reading file: {str(e)}</b>'))
                return None, 'no'

            # Check for protein info columns and notify user
            missing_columns = []
            if 'protein_name' not in df.columns:
                missing_columns.append('protein_name')
                df['protein_name'] = ''
            if 'protein_species' not in df.columns:
                missing_columns.append('protein_species')
                df['protein_species'] = ''
                
            if missing_columns:
                notification = f"""
                <div style="padding: 10px; margin: 10px 0;">
                    <p style="color: #17a2b8; margin: 0;">
                        <b>Notice:</b> The following columns are missing from your data:
                        <ul style="color: #17a2b8; margin: 5px 0;">
                            {''.join(f'<li>{col}</li>' for col in missing_columns)}
                        </ul>
                        </p>
                        <p style="color: #17a2b8; margin: 0;">
                        UniProt will be searched to automatically fill in this information. <br>
                        Alternativly you can upload a standardized file from the data transomation module with the protein information. 
                    </p>
                </div>
                """
                display(HTML(notification))

            # Validate and clean data
            cleaned_df, warnings, errors = self._validate_and_clean_data(df)

            # Warnings about invalid/blank values are commented out
            # if warnings:
            #     warning_html = "<br>".join([
            #         f'<b style="color:orange;">Warning: {w}</b>'
            #         for w in warnings
            #     ])
            #     display(HTML(warning_html))

            # Display errors if any
            if errors:
                error_html = "<br>".join([
                    f'<b style="color:red;">Error: {e}</b>'
                    for e in errors
                ])
                display(HTML(error_html))
                return None, 'no'

            if cleaned_df is not None and len(cleaned_df) > 0:
                # Process protein information
                num_proteins = self.process_protein_info(cleaned_df)
                
                # Add information about remaining rows and processed proteins
                success_message = f"""
                <div style="padding: 10px; margin: 10px 0; border-left: 4px solid #28a745; background-color: #f8f9fa;">
                    <p style="color: #28a745; margin: 0;">
                        <b>Data Import Complete!</b><br>
                        • Data imported successfully with {cleaned_df.shape[0]} rows and {cleaned_df.shape[1]} columns.<br>
                        • Processed data contains {len(cleaned_df)} rows after removing blank values.<br>
                        • Successfully processed information for {num_proteins} unique proteins.
                    </p>
                </div>
                """
                #display(HTML(success_message))
                return cleaned_df, 'yes'
            else:
                display(HTML('<b style="color:red;">Error: No valid data rows remaining after cleaning</b>'))
                return None, 'no'

        except Exception as e:
            display(HTML(f'<b style="color:red;">Error processing file: {str(e)}</b>'))
            return None, 'no'

    def check_and_update_plot_button_state(self):
        """
        Check if all requirements are met to enable the plot button:
        1. Merged data is available
        2. Protein info is successfully processed
        3. Group data is available
        """

        # Check if we have merged data
        has_merged_data = hasattr(self, 'merged_df') and self.merged_df is not None

        # Check if protein info is processed
        protein_info_processed = hasattr(self, '_protein_processing_complete') and self._protein_processing_complete

        # Check if we have group data
        has_group_data_dict = hasattr(self, 'group_data_dict') and self.group_data_dict is not None

        
        # Enable plot button only if all conditions are met
        if has_merged_data and protein_info_processed and has_group_data_dict:
            self.plot_lock.value=False


In [4]:
class DataHandler:
    def __init__(self, data_transformer):
        self.data_transformer = data_transformer
        self.selected_proteins = []
        self.selected_functions = []
        # Add variable to store current figure
        self.current_fig = None
        self.merged_df = self.data_transformer.merged_df
        self.group_data_dict = self.data_transformer.group_data_dict
        self.protein_dict = self.data_transformer.protein_dict
        self.set_up_self_widget()   
        self._initialize_instructions() 
        
        self.data_transformer.merged_uploader.observe(self.update_group_options, names='value')

        # Register an explicit callback to populate proteins when merged data changes
        self.data_transformer.merged_uploader.observe(self.populate_protein_selector, names='value')

        # Observe for data loading to update functions and color generation
        self.data_transformer.merged_uploader.observe(self.on_data_loaded_func_and_color_gen, names='value')
               
        self.data_transformer.plot_lock.observe(self._on_plot_lock_change_handler, names='value')
        # In your setup_widgets method, add this line:
        self.color_scheme.observe(self.on_color_scheme_change, names='value')

        self.plot_func_or_pro.observe(self.on_plot_type_change, names='value')
        #self.on_plot_func_or_pro_change(type('Change', (), {'new': 'No Filter'})())
   
        self.plot_type.observe(self.on_plot_type_change, names='value')
  
    def _on_plot_lock_change_handler(self, change):
        """Handle changes in Plot_lock state"""
        if change.new == False and change.old == True:
            self.merged_df = self.data_transformer.merged_df
            self.group_data_dict = self.data_transformer.group_data_dict
            self.protein_dict = self.data_transformer.protein_dict

            self.group_selector.disabled = False
            self.protein_selector.disabled = False
            self.function_selector.disabled = False
            self.plot_func_or_pro.disabled = False
            self.xlabel_widget.disabled = False
            self.ylabel_widget.disabled = False
            self.legend_widget.disabled = False
            self.title_widget.disabled = False
            self.color_scheme.disabled = False
            self.plot_type.disabled = False
            self.metric_type.disabled = False
            self.abs_or_count.disabled = False
            self.invert_plot.disabled = False
            self.correlation_type.disabled = False
            self.log_transform.disabled = False

            self.update_group_options(change)       
            self.populate_protein_selector()  
            self.on_data_loaded_func_and_color_gen()

    def update_group_options(self, change=None):
        """Update group selection options when data changes"""
        if self.merged_df is not None:
            # Get all Avg_ columns
            self.avg_columns = [col for col in self.merged_df.columns 
                        if col.startswith('Avg_')]
            # Use the stripped names but maintain original order
            self.stripped_columns = [col.replace('Avg_', '')  for col in self.avg_columns]
            # Update group selection options
            self.group_selector.options = self.stripped_columns
            # Select all groups by default
            self.group_selector.value = self.stripped_columns
    
    def on_data_loaded_func_and_color_gen(self, change=None):
        """Handle data loading and generate function colors"""
        # Process bioactive data and get both dictionaries
        self.calculate_bioactivt_count_and_dict()

        if self.unique_function_absorbance_dict:
            # Get all unique functions and their total absorbance
            function_totals_dict = {}
            for group_data_dict in self.unique_function_absorbance_dict.values():
                for function, absorbance in group_data_dict.items():
                    if function not in function_totals_dict:
                        function_totals_dict[function] = 0
                    function_totals_dict[function] += absorbance
            
            # Sort functions by total absorbance in descending order
            self.all_functions = [
                function for function, _ in sorted(
                    function_totals_dict.items(),
                    key=lambda x: x[1],
                    reverse=True
                )
            ]
            
            # Update function selector options first
            all_functions = [func for func in self.all_functions if func != 'Minor Functions' and func != 'Functional Peptides' and func != 'Non-Functional Peptides']
            self.function_selector.options = ['All Functional Peptides'] + all_functions
            # Then set the default value to 'All'
            self.function_selector.value = ('All Functional Peptides',)
            
            # Generate colors for functions
            self.function_colors = self.get_color_sequence(len(self.all_functions))
            
            # Create color mapping
            self.function_color_map = {
                function: color 
                for function, color in zip(self.all_functions, self.function_colors)
            }
            # Check if we should plot minor functions
            plot_minor = self.plot_minor.value
            if plot_minor:
                # Always set Minor Functions to grey
                if 'Minor Functions' not in self.function_color_map:
                    self.function_color_map['Minor Functions'] = '#808080'  # Grey color
        
    def populate_protein_selector(self, change=None):
        """Populate the protein selector with proteins ordered by their relative abundance across all samples"""
        
        # Check if data_transformer is available
        if not hasattr(self, 'data_transformer') or self.data_transformer is None:
            return
            
        # Use protein_dict (with 's') instead of protein_dic
        if not self.protein_dict:
            return
            
        try:
            # Calculate protein abundance across all samples
            protein_abundance = {}
            
            if self.merged_df  is not None:
                df = self.merged_df.copy()
                
                # Find all Avg_ columns for abundance data
                abundance_cols = self.avg_columns
                protein_col = 'Master Protein Accessions'
                
                if abundance_cols and protein_col in df.columns:
                    
                    # Process each row in the dataframe
                    for _, row in df.iterrows():
                        # Skip rows without protein information
                        if pd.isna(row[protein_col]) or row[protein_col] == '':
                            continue
                            
                        # Get proteins for this peptide
                        proteins = [p.strip() for p in str(row[protein_col]).split(';') if p.strip()]
                        
                        # Calculate total abundance across all samples for this peptide
                        total_abundance = 0
                        for col in abundance_cols:
                            try:
                                if pd.notna(row.get(col)):
                                    total_abundance += float(row.get(col, 0))
                            except (ValueError, TypeError) as e:
                                print(f"Error converting abundance value in column {col}: {str(e)}")
                                print(f"Value: {row.get(col)}, Type: {type(row.get(col))}")
                        
                        # If there are multiple proteins, divide the abundance equally among them
                        per_protein_abundance = total_abundance / len(proteins) if proteins else 0
                        
                        # Add to each protein's total
                        for protein in proteins:
                            if protein in protein_abundance:
                                protein_abundance[protein] += per_protein_abundance
                            else:
                                protein_abundance[protein] = per_protein_abundance
                    
            
            # Get the list of all proteins from protein_dict
            all_proteins = list(self.protein_dict.keys())
            self.all_proteins = []
            # Sort proteins by abundance (highest first)

            if protein_abundance:
                # Get proteins sorted by abundance
                sorted_proteins = sorted(all_proteins, 
                                        key=lambda p: protein_abundance.get(p, 0), 
                                        reverse=True)

                # Create options with protein ID and name
                options = []
                options.append('All Proteins (No Filter)')  # Add 'All' option first
                
                # Add each protein with its ID, name and abundance
                for protein_id in sorted_proteins:
                    protein_info = self.protein_dict.get(protein_id, {})
                    protein_name = protein_info.get('name', protein_id)
                    self.all_proteins.append(protein_name)
                    abundance = protein_abundance.get(protein_id, 0)

                    options.append(protein_name)
                # Update the protein selector with all options
                self.protein_selector.options = options

                self.all_proteins 
                # Automatically select the top 10 proteins (or fewer if less are available)
                num_proteins = min(10, len(options) - 1)  # -1 to account for 'All'
                if num_proteins > 0:
                    # Select top proteins (options[1:] to skip 'All')
                    self.protein_selector.value = tuple(options[1:num_proteins+1])
        
        except Exception as e:
            print(f"Error populating protein selector: {str(e)}")
            import traceback
            traceback.print_exc()
            
    def set_up_self_widget(self):
        
        # Create multi-select widget for groups
        self.group_selector = widgets.SelectMultiple(
            options=[],
            #description='Groups:',
            style={'description_width': 'initial'},
            layout=widgets.Layout(width='300px', height='100px'),
            disabled=True
        )


        # Add Correlation specific widgets
        self.correlation_type = widgets.RadioButtons(
            options=['Pearson', 'Spearman'],
            description='Correlation Type:',
            value='Pearson',
            style={'description_width': 'initial'},
            layout=widgets.Layout(width='300px'),
            disabled=True
        )
        self.log_transform = widgets.Checkbox(
            value=True,
            description='Apply log10 transformation before correlation analysis',
            layout=widgets.Layout(width='350px'),
            indent=False,
            style={'description_width': 'initial'},
            disabled=True
        )
        
        # Add label customization widgets
        self.xlabel_widget = widgets.Text(
            description='X Label:',
            placeholder='Enter x-axis label',
            layout=widgets.Layout(width='300px'),
            disabled=True
        )
        self.ylabel_widget = widgets.Text(
            description='Y Label:',
            placeholder='Enter y-axis label',
            layout=widgets.Layout(width='300px'),
            disabled=True
        )
        self.legend_widget = widgets.Text(
            description='Legend Title',
            placeholder='Enter a custom legend title',
            layout=widgets.Layout(width='300px'),
            disabled=True
        )
        self.title_widget = widgets.Text(
            description='Plot Title',
            placeholder='Enter a custom plot title',
            layout=widgets.Layout(width='300px'),
            disabled=True
        )


        self.single_color_scheme = [
            '--- SINGLE COLORS OPTIONS ---',
            'red', 'green', 'blue', 'yellow', 'purple', 'orange', 'cyan', 
            'magenta', 'pink', 'brown', 'black', 'white', 'gray', 'darkblue',
            'darkgreen', 'darkred', 'darkorange', 'darkpurple', 'lightblue',
            'lightgreen', 'lightred', 'gold', 'silver', 'teal', 'navy', 'maroon',
            'olive', 'lime', 'aqua', 'indigo', 'violet', 'turquoise', 'coral',
            'crimson', 'salmon', 'sienna', 'tan', 'khaki', 'plum', 'orchid'
        ]
        # Update color scheme dropdown with categorized options
        color_palet = [
            '--- DEFAULT PALETTE(HSV) ---',
            'HSV',  # Default option
            '--- QUALITATIVE PALETTES (RECOMENDED) ---',
            'Plotly', 'D3', 'G10', 'T10', 'Alphabet', 
            'Set1', 'Set2', 'Set3', 'Pastel1', 'Pastel2', 'Paired',
            '--- SEQUENTIAL PALETTES ---',
            'Viridis', 'Cividis', 'Inferno', 'Magma', 'Plasma',
            'Hot', 'Jet', 'Blues', 'Greens', 'Reds', 'Purples', 'Oranges',
            '--- DIVERGING PALETTES ---',
            'Spectral', 'RdBu', 'RdYlBu', 'RdYlGn', 'PiYG', 'PRGn', 'BrBG', 'RdGy',
            '--- CYCLICAL PALETTES ---',
            'IceFire', 'Edge', 'Twilight',

        ]
        color_schemes = color_palet + self.single_color_scheme

        self.color_scheme = widgets.Dropdown(
            options=color_schemes,
            value='HSV',  # Default value
            description='Color Scheme:',
            style={'description_width': 'initial'},
            layout=widgets.Layout(width='300px'),
            disabled=True
        )

        # Add an inversion toggle radio button
        self.invert_plot = widgets.RadioButtons(
            description='Plot Orientation:',
            options=['By Sample', 'By Protein', 'By Function'],
            value='By Sample',  # Default selection
            style={'description_width': 'initial'},
            layout=widgets.Layout(width='300px', height='auto'),
            disabled=True,
            indent=True  # Keeps options aligned with description instead of appearing below
        )

        # Create a custom class for SelectMultiple with text-overflow ellipsis
        class EllipsisSelectMultiple(widgets.SelectMultiple):
            def __init__(self, **kwargs):
                super().__init__(**kwargs)
                self._dom_classes = ['ellipsis-select']
                
        # Apply CSS styling for ellipsis
        display(HTML("""
        <style>
        .ellipsis-select select option {
            text-overflow: ellipsis;
            overflow: hidden;
            white-space: nowrap;
        }
        </style>
        """))
        
        self.protein_selector = EllipsisSelectMultiple(
            options=['All Proteins (No Filter)'],
            value=['All Proteins (No Filter)'],  # Empty tuple - no selection by default
            #description='Proteins:',
            style={'description_width': 'initial'},
            layout=widgets.Layout(
                width='300px',
                height='100px',
            ),
            disabled=True
        )

                     
       # Plot type selection
        self.plot_func_or_pro = widgets.Dropdown(
            options=['No Filter', 'Selected Protein(s)', 'Selected Function(s)', 'Both', 'Functional vs Non-Functional Peptides'],
            value='Both',
            description='Plot Filter:',
            style={'description_width': 'initial'},
            layout=widgets.Layout(width='300px'),
            disabled=True
        )
        
        # Replace Dropdown with SelectMultiple for functions
        self.function_selector = widgets.SelectMultiple(
            options=['All Functional Peptides'],
            value=['All Functional Peptides'],
            #description='Functions:',
            style={'description_width': 'initial'},
            layout=widgets.Layout(
                width='300px',                # Use 100% to take up entire allocated width
                height='100px',              # Fixed height
                overflow_y='hidden',           # Enable vertical scrollbar when needed
                overflow_x='hidden'          # Hide horizontal scrollbar
            ),
            disabled=True
        )
        self.function_selector.disabled = True

        self.profunc_grid = widgets.GridBox(
            [
                widgets.VBox(
                    [widgets.HTML("<u>Select Proteins:</u>"), self.protein_selector],
                    layout=widgets.Layout(width='100%', height='150px')
                ),
                widgets.VBox(
                    [widgets.HTML("<u>Select Functions:</u>"), self.function_selector],
                    layout=widgets.Layout(width='100%', height='150px')
                )
            ],
            layout=widgets.Layout(
                grid_template_columns="1fr 1fr",
                gap="10px",
                width="650px",
                height="auto",
                overflow="hidden"
            )
        )    
        
        self.groups_grid = widgets.GridBox(
            [
                widgets.VBox(
                    [widgets.HTML("<u>Select Groups:</u>"), self.group_selector],
                    layout=widgets.Layout(width='100%', height='150px')
                )
            ],
            layout=widgets.Layout(
                grid_template_columns="1fr 1fr",
                gap="10px",
                width="650px",
                height="auto",
                overflow="hidden"
            )
        )    
        
        # Selecte between relative and absolute plots
        self.metric_type = widgets.RadioButtons(
            description='Scale Absorbance:',
            options=['Absolute', 'Relative'],
            value='Absolute',  # Default selection
            style={'description_width': 'initial'},
            layout=widgets.Layout(width='300px', height='auto'),
            disabled=True,
            indent=True  # Keeps options aligned with description instead of appearing below
        )                   
       

    
        # Create the checkbox with improved description
        self.plot_minor = widgets.Checkbox(
            description='Group Unselected Proteins or Functions',
            value=True,
            indent=False,
            style={'description_width': 'initial'},
            layout=widgets.Layout(width='300px', height='30px'),
            disabled=True
        )


        # Create a help icon with explanatory tooltip
        

        # Combine checkbox and help icon into a horizontal layout
        self.minor_row = widgets.HBox([
            self.plot_minor, 
        ], layout=widgets.Layout(align_items='center'))

        # Add after creating the protein_selector widget
        self.populate_protein_selector()


        # Update plot type selection to remove 'All Plots'
        self.plot_type = widgets.RadioButtons(
            options=['Grouped Bar Plots','Stacked Bar Plots', 'Pie Charts', 'Corr. Scatter Plots'],
            value='Grouped Bar Plots',
            description='Plot Type:',
            style={'description_width': 'initial'},
            layout=widgets.Layout(width='300px'),
            disabled=True
        )
        
        # Add bar plot type selection
        self.abs_or_count = widgets.RadioButtons(
            options=['Absorbance', 'Count'],
            value='Absorbance', 
            description='Data Type:',
            style={'description_width': 'initial'},
            layout=widgets.Layout(width='300px'),
            disabled=True
        )

       
        # Add help tooltips
        plot_type_help = create_help_icon("Select whether to display data as a bar plot or pie chart")
        bar_plot_type_help = create_help_icon("Choose the type of values to display in the bar plot")
        plot_orientation_help = create_help_icon("Group data by sample or by protein")

        self.plot_type_row = widgets.HBox([self.plot_type,
                                           self.metric_type],
                                           layout=widgets.Layout(width='300px')
                                          )
        self.plot_type_row_two= widgets.HBox([self.abs_or_count,
                                           self.invert_plot],
                                           layout=widgets.Layout(width='300px')
                                          )

        self.corr_box = widgets.VBox([
                                    #widgets.HTML("<u>Correlation Settings:</u>"), 
                                    self.correlation_type,
                                    self.log_transform],
                                    layout=widgets.Layout(width='360px', height='100px', overflow='hidden', margin='0')
                                   )

    def populate_function_selector(self, change=None):
        """Populate the function selector with functions ordered by their total absorbance across all samples"""

        if self.all_functions is None:
            self.function_selector.options = ['All Functional Peptides']
            self.function_selector.value = ('All Functional Peptides',)
            return
        else:
            function_selector = ['All Functional Peptides'] + self.all_functions
            self.function_selector.options = [func for func in function_selector if func != 'Minor Functions' and func != 'Functional Peptides' and func != 'Non-Functional Peptides']
            self.function_selector.value = ('All Functional Peptides',)

    def on_plot_type_change(self, change):
        try:
            new_value = change.new
            old_value = change.old if hasattr(change, 'old') else None
            
            # Reset disabled states when switching from 'No Filter' to any other option
            if old_value == 'No Filter':
                self.protein_selector.disabled = True
                self.function_selector.disabled = True
                if self.plot_type.value != 'Pie Charts':
                    self.metric_type.disabled = False
                self.plot_type.disabled = False
                self.invert_plot.disabled = False
                self.plot_minor.disabled = False
            
            # First handle Correlation Scatter Plots case which has special settings
            if self.plot_type.value == 'Corr. Scatter Plots':
                # Always disable these widgets for correlation plots
                self.invert_plot.disabled = True
                self.plot_minor.disabled = True
                self.plot_minor.value = False
                self.metric_type.disabled = True
                self.metric_type.value = 'Absolute'
                self.abs_or_count.disabled = True
                self.abs_or_count.value = 'Absorbance'
                
                # Enable appropriate selectors based on filter type
                if new_value == 'Both':
                    self.protein_selector.disabled = False
                    self.function_selector.disabled = False
                elif new_value == 'Selected Function(s)':
                    self.protein_selector.disabled = True
                    self.function_selector.disabled = False
                elif new_value == 'Selected Protein(s)':
                    self.protein_selector.disabled = False
                    self.function_selector.disabled = True
                elif new_value == 'No Filter':
                    self.protein_selector.disabled = True
                    self.function_selector.disabled = True
                elif new_value == 'Functional vs Non-Functional Peptides':
                    self.protein_selector.disabled = True
                    self.function_selector.disabled = True
                    
            else:
                # For non-correlation plots
                if new_value == 'Both':
                    # Enable both selectors
                    self.protein_selector.disabled = False
                    self.function_selector.disabled = False
                    if self.plot_type.value != 'Pie Charts':
                        self.metric_type.disabled = False
                    self.invert_plot.disabled = False
                    self.plot_type.disabled = False
                    self.plot_minor.disabled = False
                    
                    self.populate_protein_selector()
                    self.populate_function_selector()

                    # Ensure both selectors are enabled and visible
                    self.protein_selector.style.text_color = 'black'
                    self.function_selector.style.text_color = 'black'
                elif new_value == 'Functional vs Non-Functional Peptides':
                    self.protein_selector.disabled = True
                    self.function_selector.disabled = True
                    self.plot_minor.disabled = True
                    self.plot_minor.value = False
                elif new_value == 'Selected Function(s)':
                    # Clear and disable protein selector
                    self.protein_selector.value = ['All Proteins (No Filter)']
                    self.protein_selector.disabled = True
                    # Enable function selector
                    self.function_selector.disabled = False
                    if self.plot_type.value != 'Pie Charts':
                        self.metric_type.disabled = False
                    self.plot_type.disabled = False
                    self.invert_plot.disabled = False
                    self.plot_minor.disabled = False

                    self.populate_function_selector()
                    if self.invert_plot.value == 'By Protein':
                        self.invert_plot.value = 'By Function'
                        
                elif new_value == 'Selected Protein(s)':
                    # Clear and disable function selector
                    self.function_selector.value = ['All Functional Peptides']
                    self.function_selector.disabled = True
                    # Enable protein selector
                    self.protein_selector.disabled = False
                    if self.plot_type.value != 'Pie Charts':
                        self.metric_type.disabled = False
                    self.invert_plot.disabled = False
                    self.plot_type.disabled = False
                    self.plot_minor.disabled = False
                    
                    self.populate_protein_selector()
                    
                    if self.invert_plot.value == 'By Function':
                        self.invert_plot.value = 'By Protein'

                elif new_value == 'No Filter':
                    self.protein_selector.disabled = True
                    self.function_selector.disabled = True
                    self.plot_type.disabled = False
                    self.plot_minor.value = False
                    self.invert_plot.disabled = False
                    self.plot_minor.disabled = True
                    self.plot_minor.value = False
                    if self.plot_type.value != 'Pie Charts':
                        self.metric_type.disabled = False
                
                # For non-correlation plots, enable these controls
                self.abs_or_count.disabled = False
                self.invert_plot.disabled = False
                self.plot_minor.disabled = False
                if self.plot_type.value != 'Pie Charts':
                    self.metric_type.disabled = False

            # Update the widget appearances
            self.protein_selector.style.text_color = 'grey' if self.protein_selector.disabled else 'black'
            self.function_selector.style.text_color = 'grey' if self.function_selector.disabled else 'black'
            
        except Exception as e:
            print(f"Error in on_plot_func_or_pro_change: {str(e)}")
            import traceback
            traceback.print_exc()
                    
    def contains_function(self, func_string, target_function):
        if not isinstance(func_string, str) or pd.isna(func_string):
            return False
        funcs = [f.strip() for f in func_string.split(';')]
        return target_function in funcs      

    def calculate_bioactivt_count_and_dict(self):
        """Calculate basic function metrics (counts and absorbance) from the dataframe"""
        unique_function_absorbance_dict = {}
        unique_function_counts_dict = {}
        # Track total bioactivitiesper group
        total_functional_peptides_dict = {}
        # Track non-bioactive peptides (blank or NA functions)
        non_bioactive_absorbance_dict = {}
        non_bioactive_counts_dict = {}
        # Track total bioactive values for each group
        all_bioactives_absorbance_dict = {}
        all_bioactives_counts_dict = {}

        selected_groups = []
        for group in self.group_selector.value:
            if group not in selected_groups:
                selected_groups.append(group)

        # For other modes, use the merged dataframe
        df = self.merged_df.copy() if self.merged_df is not None else None
       
        if df is None:
            return
        else:
             self.full_merged_df = df.copy()            
        # Process group data
        for column, grouping_variable in zip(self.avg_columns, self.stripped_columns):
            
            # Initialize counters for this group
            total_functional_peptides_dict[grouping_variable] = 0
            non_bioactive_absorbance_dict[grouping_variable] = 0
            non_bioactive_counts_dict[grouping_variable] = 0
            all_bioactives_absorbance_dict[grouping_variable] = 0
            all_bioactives_counts_dict[grouping_variable] = 0
            
            # Filter all data with valid abundance values
            temp_df = df[['unique ID', 'function', column]].copy()
            temp_df = temp_df[
                (temp_df[column] != 0) & 
                temp_df[column].notna()
            ]
            
            if temp_df.empty:
                continue
                    
            # Identify non-bioactive peptides (blank or NA functions)
            non_bioactive_mask = temp_df['function'].isna() | (temp_df['function'] == '')
            non_bioactive_df = temp_df[non_bioactive_mask]
            
            # Calculate non-bioactive metrics
            if not non_bioactive_df.empty:
                non_bioactive_absorbance_dict[grouping_variable] = non_bioactive_df[column].sum()
                non_bioactive_counts_dict[grouping_variable] = non_bioactive_df['unique ID'].nunique()
            
            # Filter for bioactive (valid function) peptides
            bioactive_df = temp_df[~non_bioactive_mask & temp_df['function'].notna()]
            
            if bioactive_df.empty:
                continue
                    
            # Calculate total bioactive metrics for the group (before handling individual functions)
            all_bioactives_absorbance_dict[grouping_variable] = bioactive_df[column].sum()
            all_bioactives_counts_dict[grouping_variable] = bioactive_df['unique ID'].nunique()
            
            # Drop duplicates to get unique peptide counts
            unique_peptides_df = bioactive_df.drop_duplicates(subset='unique ID')
            
            # Count total unique bioactivitiesfor this group
            total_functional_peptides_dict[grouping_variable] = unique_peptides_df['unique ID'].nunique()
            
            # Process functions for both absorbance and counts
            for df_to_process, result_dict in [
                (bioactive_df, unique_function_absorbance_dict),
                (unique_peptides_df, unique_function_counts_dict)
            ]:
            
                # Get all unique functions from the dataset
                all_functions = set()
                for func_str in df_to_process['function'].dropna():
                    if isinstance(func_str, str):
                        funcs = [f.strip() for f in func_str.split(';') if f.strip()]
                        all_functions.update(funcs)
                
                # For each function, find peptides that contain it using contains_function
                for func in all_functions:
                    matching_rows = df_to_process[df_to_process['function'].apply(
                        lambda x: self.contains_function(x, func)
                    )]
                    
                    if not matching_rows.empty:
                        if result_dict is unique_function_absorbance_dict:
                            # Sum absorbance values
                            total_absorbance = matching_rows[column].sum()
                            if grouping_variable not in result_dict:
                                result_dict[grouping_variable] = {}
                            result_dict[grouping_variable][func] = total_absorbance
                        else:
                            # Count unique peptides per function
                            unique_count = matching_rows['unique ID'].nunique()
                            if grouping_variable not in result_dict:
                                result_dict[grouping_variable] = {}
                            result_dict[grouping_variable][func] = unique_count
        
        # Add non_bioactive and all_bioactives to the dictionaries
        for group in selected_groups:
            # Non-bioactive
            if group in non_bioactive_absorbance_dict:
                if group not in unique_function_absorbance_dict:
                    unique_function_absorbance_dict[group] = {}
                unique_function_absorbance_dict[group]['Non-Functional Peptides'] = non_bioactive_absorbance_dict[group]
                
            if group in non_bioactive_counts_dict:
                if group not in unique_function_counts_dict:
                    unique_function_counts_dict[group] = {}
                unique_function_counts_dict[group]['Non-Functional Peptides'] = non_bioactive_counts_dict[group]
            
            # All bioactives
            if group in all_bioactives_absorbance_dict:
                if group not in unique_function_absorbance_dict:
                    unique_function_absorbance_dict[group] = {}
                unique_function_absorbance_dict[group]['Functional Peptides'] = all_bioactives_absorbance_dict[group]
                
            if group in all_bioactives_counts_dict:
                if group not in unique_function_counts_dict:
                    unique_function_counts_dict[group] = {}
                unique_function_counts_dict[group]['Functional Peptides'] = all_bioactives_counts_dict[group]
                    
        # Sort the functions within each group in descending order
        for group, functions in unique_function_counts_dict.items():
            unique_function_counts_dict[group] = dict(sorted(functions.items(), key=lambda x: x[1], reverse=True))
        
        for group, functions in unique_function_absorbance_dict.items():
            unique_function_absorbance_dict[group] = dict(sorted(functions.items(), key=lambda x: x[1], reverse=True))
                
        # Store the results
        self.unique_function_absorbance_dict = unique_function_absorbance_dict
        self.unique_function_counts_dict = unique_function_counts_dict
        self.total_functional_peptides_dict = total_functional_peptides_dict

    def get_single_color(self):
        """Get the first color from the selected scheme."""
        try:
            # Get the selected color scheme
            if hasattr(self, 'color_scheme') and self.color_scheme.value:
                scheme = self.color_scheme.value

          
            # Carolina Blue
            carolina_blue = '#7BAFD4'
            
            # Check for single color schemes
            if scheme.lower() not in self.single_color_scheme:
                # Display error message to notify user to choose new options
                warning_html = f"""
                <div style='color: #856404; background-color: #fff3cd; border: 1px solid #ffeeba; border-radius: 4px; padding: 10px; margin: 10px 0;'>
                    <strong>Warning:</strong> Please choose a single color scheme otherwise Carolina Blue will be used by defualt.<br>
                </div>
                """  
                display(HTML(warning_html))
                return carolina_blue
            else:
                return scheme
            
        except Exception as e:
            print(f"Error getting color: {e}")
            # Fallback to Carolina Blue
            return '#7BAFD4'  # Carolina Blue
    
    def get_color_sequence(self, n_colors, ncolor=None):
        """Get color sequence based on selected scheme.
        
        Args:
            n_colors: Number of colors to generate
            ncolor: Alternative parameter name for backward compatibility
            
        Returns:
            List of color strings in the selected scheme
        """
        # Use ncolor parameter if provided (for backward compatibility)
        n_colors = ncolor if ncolor is not None else n_colors
        
        if n_colors <= 0:
            return []
            
        try:
            # Get the selected color scheme
            scheme = 'HSV'  # Default scheme
            if hasattr(self, 'color_scheme') and self.color_scheme.value:
                scheme = self.color_scheme.value
            
            if scheme.lower() in self.single_color_scheme:
                # Display error message to notify user to choose new options
                warning_html = f"""
                <div style='color: #856404; background-color: #fff3cd; border: 1px solid #ffeeba; border-radius: 4px; padding: 10px; margin: 10px 0;'>
                    <strong>Warning:</strong> Please choose from a color pallette not a single color. Otherwise 'HSV' pallette will be used by defualt.<br>
                </div>
                """  
                display(HTML(warning_html))

            # Skip header options that start with '---'
            if scheme.startswith('---'):
                scheme = 'HSV'  # Default to HSV if a header is selected
            
            # Handle special cases
            if scheme.lower() in ['rainbow', 'hsv']:
                return [f'hsl({h},70%,60%)' for h in np.linspace(0, 330, n_colors)]
            
            # Try qualitative color scales first (best for categorical data)
            color_sequence = getattr(px.colors.qualitative, scheme, None)
            if color_sequence is None:
                # Try sequential color scales
                color_sequence = getattr(px.colors.sequential, scheme, None)
            if color_sequence is None:
                # Try diverging color scales
                color_sequence = getattr(px.colors.diverging, scheme, None)
            if color_sequence is None:
                # Try cyclical color scales
                color_sequence = getattr(px.colors.cyclical, scheme, None)
            
            if color_sequence:
                if n_colors >= len(color_sequence):
                    # If we need more colors than available, interpolate
                    indices = np.linspace(0, len(color_sequence)-1, n_colors)
                    return [color_sequence[int(i)] for i in indices]
                else:
                    # If we need fewer colors, take a subset
                    indices = np.linspace(0, len(color_sequence)-1, n_colors, dtype=int)
                    return [color_sequence[i] for i in indices]
            
            # Default to HSV if no matching scheme found

            return [f'hsl({h},70%,60%)' for h in np.linspace(0, 330, n_colors)]
            
        except Exception as e:
            print(f"Error generating colors: {e}")
            # Fallback to HSV

            return [f'hsl({h},70%,60%)' for h in np.linspace(0, 330, n_colors)]
    
    def calculate_group_metrics(self):
        """Calculate summed and relative metrics for each group in the filtered dataframe."""
       
        if self.plot_func_or_pro.value == 'No Filter':
            self.filtered_df = self.merged_df.copy()
        try:
            # Identify group columns (they start with 'Avg_')
            group_cols = [col for col in self.filtered_df.columns if col.startswith('Avg_')]
            if not group_cols:
                print("No group columns (Avg_*) found in dataframe")
                return None
                
            # Initialize results dictionary
            group_metrics = {}
            
            # Calculate total abundance and counts for each group
            for col in group_cols:
                group_name = col.replace('Avg_', '')  # Remove 'Avg_' prefix
                
                # Calculate abundance metrics
                total_abundance = self.filtered_df[col].sum()
                non_zero_peptides = self.filtered_df[
                    (self.filtered_df[col].notna()) & 
                    (self.filtered_df[col] > 0)
                ]['unique ID'].nunique()
                
                # Store metrics
                group_metrics[group_name] = {
                    'total_abundance': total_abundance,
                    'unique_peptides': non_zero_peptides
                }
            
            # Calculate relative metrics
            total_abundance_all = sum(m['total_abundance'] for m in group_metrics.values())
            total_peptides_all = sum(m['unique_peptides'] for m in group_metrics.values())
            
            # Add relative metrics to each group
            for group_name, metrics in group_metrics.items():
                metrics['relative_abundance'] = (metrics['total_abundance'] / total_abundance_all * 100 
                                            if total_abundance_all > 0 else 0)
                metrics['relative_peptides'] = (metrics['unique_peptides'] / total_peptides_all * 100 
                                            if total_peptides_all > 0 else 0)
            
            # Debug output
            #print("\nGroup Metrics Summary:")
            #print(f"Total abundance across all groups: {total_abundance_all:.2e}")
            #print(f"Total unique peptides across all groups: {total_peptides_all}")
            #print("\nPer-group metrics:")
            #for group, metrics in group_metrics.items():
                #print(f"\n{group}:")
                #print(f"  Total abundance: {metrics['total_abundance']:.2e}")
                #print(f"  Unique peptides: {metrics['unique_peptides']}")
                #print(f"  Relative abundance: {metrics['relative_abundance']:.2f}%")
                #print(f"  Relative peptides: {metrics['relative_peptides']:.2f}%")
                
            return group_metrics
            
        except Exception as e:
            print(f"Error calculating group metrics: {str(e)}")
            import traceback
            traceback.print_exc()
            return None

    def create_function_df(self):
        """
        Creates a DataFrame for functions similar to protein_df when by_sample is selected.
        Updates self.function_df with the results.
        """
        # Get all unique functions and groups

        selected_groups = self.group_selector.value

        
        # Create initial data structure for DataFrame
        data = []
        
        if self.plot_func_or_pro.value == 'Functional vs Non-Functional Peptides':
            all_functions = ['Functional Peptides', 'Non-Functional Peptides']           
        else:
            all_functions = [func for func in self.all_functions if func != 'Functional Peptides' and func != 'Non-Functional Peptides']
        # Process each function
        for function in all_functions:
            row_data = {
                'Description': function  # Similar to Protein_ID in protein_df
            }
            
            # Add absorbance columns and their relative values
            for group in selected_groups:
                # Absorbance columns
                avg_col = f'Avg_{group}'
                rel_avg_col = f'Rel_Avg_{group}'
                # Get absorbance value
                absorbance = self.unique_function_absorbance_dict.get(group, {}).get(function, 0)
                
                row_data[avg_col] = absorbance
                
                # Calculate relative absorbance (will update after collecting all data)
                row_data[rel_avg_col] = 0.0
                
                # Count columns
                count_col = f'Count_{group}'
                rel_count_col = f'Rel_Count_{group}'
                
                # Get count value
                count = self.unique_function_counts_dict.get(group, {}).get(function, 0)
                row_data[count_col] = count
                
                # Calculate relative count (will update after collecting all data)
                row_data[rel_count_col] = 0.0
            
            data.append(row_data)
        # Create DataFrame
        self.function_df = pd.DataFrame(data)
        
        # Calculate relative values for each group
        for group in selected_groups:
            avg_col = f'Avg_{group}'
            rel_avg_col = f'Rel_Avg_{group}'
            count_col = f'Count_{group}'
            rel_count_col = f'Rel_Count_{group}'
            
            # Calculate total absorbance and count for this group
            total_absorbance = self.function_df[avg_col].sum()
            total_count = self.function_df[count_col].sum()
            
            # Calculate relative values
            if total_absorbance > 0:
                self.function_df[rel_avg_col] = (self.function_df[avg_col] / total_absorbance * 100).round(6)
            
            if total_count > 0:
                self.function_df[rel_count_col] = (self.function_df[count_col] / total_count * 100).round(6)
        
        # Sort by total absorbance across all groups (similar to protein_df)
        absorbance_cols = [col for col in self.function_df.columns if col.startswith('Avg_')]
        self.function_df['avg_absorbance_all'] = (
            self.function_df[absorbance_cols].sum(axis=1) / 
            self.function_df[absorbance_cols].sum().sum() * 100
        ).round(6)
        
        # Sort by total absorbance
        self.function_df = self.function_df.sort_values('avg_absorbance_all', ascending=False)
        
        use_all_functions = False
        # Handle Minor Functions if needed
        selected_functions = list(self.function_selector.value)
        selected_functions = [f for f in selected_functions if f != '---Select Individual Functions---']
        if self.plot_type.value != 'Functional vs Non-Functional Peptides':
            if self.plot_minor.value  == True:
                if 'All Functional Peptides' in selected_functions:
                    selected_functions = self.all_functions
                    use_all_functions = 'All Functional Peptides' in selected_functions

                selected_functions = selected_functions.remove('All Functional Peptides') if 'All Functional Peptides' in selected_functions else selected_functions
                if len(selected_functions) != self.all_functions:
                    self.selected_functions = selected_functions.append('Minor Functions')

            else: # if plot_minor is False

                if 'All Functional Peptides' in selected_functions:
                    selected_functions = self.all_functions
                    use_all_functions = True
                    selected_functions = [f for f in selected_functions if f != 'All Functional Peptides'] if 'All Functional Peptides' in selected_functions else selected_functions
                selected_functions = [f for f in selected_functions if f != 'Minor Functions'] if 'Minor Functions' in selected_functions else selected_functions
            if self.plot_func_or_pro.value == 'No Filter':
                selected_functions = self.all_functions
            if (len(selected_functions) - 1) == len(self.all_functions):
                selected_functions = [f for f in selected_functions if f != 'Minor Functions'] if 'Minor Functions' in selected_functions else selected_functions
        else:
            selected_functions = ['All Functional Peptides', 'Non-Functional Peptides']
        # Preserve order but remove redundant minor function occurrences
        self.selected_functions = []
        for func in selected_functions:
            if func not in self.selected_functions:
                self.selected_functions.append(func)
        
        if self.plot_func_or_pro.value != 'No Filter' or self.plot_func_or_pro.value != 'Functional vs Non-Functional Peptides':
        
            if not use_all_functions:
                major_functions = [f for f in selected_functions if f != 'All Functional Peptides']
                major_functions.remove('Minor Functions') if 'Minor Functions' in major_functions else major_functions
                minor_functions_data = {
                    'Description': 'Minor Functions'
                }
                # Calculate aggregated values for minor functions
                for group in selected_groups:
                    avg_col = f'Avg_{group}'
                    rel_avg_col = f'Rel_Avg_{group}'
                    count_col = f'Count_{group}'
                    rel_count_col = f'Rel_Count_{group}'
                    
                    # Filter minor functions
                    minor_mask = ~self.function_df['Description'].isin(major_functions)
                    minor_functions = self.function_df[minor_mask]
                    
                    # Sum values for minor functions
                    minor_functions_data[avg_col] = minor_functions[avg_col].sum()
                    minor_functions_data[count_col] = minor_functions[count_col].sum()
                    
                    # Calculate relative values
                    total_absorbance = self.function_df[avg_col].sum()
                    total_count = self.function_df[count_col].sum()
                    
                    if total_absorbance > 0:
                        minor_functions_data[rel_avg_col] = (minor_functions_data[avg_col] / total_absorbance * 100)
                    else:
                        minor_functions_data[rel_avg_col] = 0.0
                        
                    if total_count > 0:
                        minor_functions_data[rel_count_col] = (minor_functions_data[count_col] / total_count * 100)
                    else:
                        minor_functions_data[rel_count_col] = 0.0
                
                # Calculate avg_absorbance_all for Minor Functions
                absorbance_sum = sum(minor_functions_data[col] for col in absorbance_cols)
                total_absorbance_sum = self.function_df[absorbance_cols].sum().sum()
                if total_absorbance_sum > 0:
                    minor_functions_data['avg_absorbance_all'] = (absorbance_sum / total_absorbance_sum * 100)
                else:
                    minor_functions_data['avg_absorbance_all'] = 0.0
                
                # Remove minor functions from main DataFrame and add aggregated Minor Functions row
                self.function_df = self.function_df[self.function_df['Description'].isin(major_functions)]
                self.function_df = safe_concat([
                    self.function_df,
                    pd.DataFrame([minor_functions_data])
                ], ignore_index=True)
        
        elif self.plot_type.value == 'Functional vs Non-Functional Peptides':
            # Create a simplified DataFrame with just two rows: functional and non-functional
            func_data = []
            self.function_df = pd.DataFrame()
            for group in selected_groups:
                # Extract the values from dictionaries
                if group in self.unique_function_absorbance_dict and group in self.unique_function_counts_dict:
                    # Get non-bioactive values
                    non_bioactive_abs = self.unique_function_absorbance_dict[group].get('Non-Functional Peptides', 0)
                    non_bioactive_count = self.unique_function_counts_dict[group].get('Non-Functional Peptides', 0)
                    
                    # Get all bioactive values
                    all_bioactive_abs = self.unique_function_absorbance_dict[group].get('Functional Peptides', 0)
                    all_bioactive_count = self.unique_function_counts_dict[group].get('Functional Peptides', 0)
                    
                    # Add functional row
                    func_row = {
                        'Description': 'All Functional Peptides',
                        f'Avg_{group}': all_bioactive_abs,
                        f'Count_{group}': all_bioactive_count
                    }
                    
                    # Add non-functional row
                    non_func_row = {
                        'Description': 'Non-Functional Peptides',
                        f'Avg_{group}': non_bioactive_abs,
                        f'Count_{group}': non_bioactive_count
                    }
                    
                    # Add rows if they don't exist yet
                    if not func_data:
                        func_data.append(func_row)
                        func_data.append(non_func_row)
                    else:
                        # Update existing rows with additional group data
                        func_data[0].update({
                            f'Avg_{group}': all_bioactive_abs,
                            f'Count_{group}': all_bioactive_count
                        })
                        func_data[1].update({
                            f'Avg_{group}': non_bioactive_abs,
                            f'Count_{group}': non_bioactive_count
                        })
            
            # Create new DataFrame with just these two rows
            self.function_df = pd.DataFrame(func_data)
            
            # Calculate relative values for each group
            for group in selected_groups:
                avg_col = f'Avg_{group}'
                rel_avg_col = f'Rel_Avg_{group}'
                count_col = f'Count_{group}'
                rel_count_col = f'Rel_Count_{group}'
                
                # Calculate total absorbance and count for this group
                total_absorbance = self.function_df[avg_col].sum()
                total_count = self.function_df[count_col].sum()
                
                # Calculate relative values
                if total_absorbance > 0:
                    self.function_df[rel_avg_col] = (self.function_df[avg_col] / total_absorbance * 100).round(6)
                
                if total_count > 0:
                    self.function_df[rel_count_col] = (self.function_df[count_col] / total_count * 100).round(6)
            
            # Calculate average absorbance across all groups for sorting
            absorbance_cols = [col for col in self.function_df.columns if col.startswith('Avg_')]
            if not self.function_df.empty and absorbance_cols:
                total_sum = self.function_df[absorbance_cols].sum().sum()
                if total_sum > 0:
                    self.function_df['avg_absorbance_all'] = (
                        self.function_df[absorbance_cols].sum(axis=1) / total_sum * 100
                    ).round(6)
                else:
                    self.function_df['avg_absorbance_all'] = 0
            
            # Set the selected functions list for proper filtering and legend
            self.selected_functions = ['All Functional Peptides', 'Non-Functional Peptides']            
        
        return self.function_df
    
    def get_selected_proteins(self):
        """Get the list of proteins to plot based on user selection"""
        selected_proteins = []

        all_proteins = self.all_proteins
        
        if self.plot_func_or_pro.value == 'Selected Protein(s)' or self.plot_func_or_pro.value == 'Both':
            if hasattr(self, 'protein_selector'):
                if self.protein_selector.value:
                    selected = list(self.protein_selector.value)
                    
                    # Handle 'All' selection
                    if 'All Proteins (No Filter)' in selected:
                        selected_proteins = all_proteins.copy()
                    else:
                        selected_proteins = selected.copy()
                        
                        # Add Minor Proteins if needed
                        if self.plot_minor.value and len(selected_proteins) != len(all_proteins):
                            if 'Minor Proteins' not in selected_proteins:
                                selected_proteins.append('Minor Proteins')
                else:
                    selected_proteins = all_proteins.copy()
            else:
                selected_proteins = all_proteins.copy()
        else:
            selected_proteins = all_proteins.copy()
        
        # Preserve order but remove redundant protein occurrences
        self.selected_proteins = []
        for pro in selected_proteins:
            if pro not in self.selected_proteins:
                self.selected_proteins.append(pro)
        
        return self.selected_proteins  # Return the list for immediate use
    
    def process_protein_data(self):
        if self.merged_df is None or not self.protein_dict:
            return False

        # First, update the protein list to plot
        selected_proteins = self.get_selected_proteins()
        # For other modes, use the merged dataframe
        df = self.merged_df.copy() if self.merged_df is not None else None


        # Get Absorbance columns based on selected groups
        selected_groups = self.group_selector.value
        if selected_groups:
            Absorbance_cols = [f'Avg_{var}' for var in self.group_selector.value]
        for col in self.avg_columns:
            if col not in Absorbance_cols:
                df.drop(columns=[col], inplace=True)    
        
        # Filter rows based on selected proteins
        filtered_rows = []
        for index, row in df.iterrows():
            if row['protein_name'] not in selected_proteins:
                filtered_rows.append(index)
        
        if filtered_rows:
            df_filtered_by_proteinds = df.drop(index=filtered_rows)
            #if self.plot_minor.value:
            #    df = df_filtered_by_proteinds.copy()

        df['Total_Absorbance'] = df[Absorbance_cols].sum(axis=1).astype(int)
        
        # Filter out zero Absorbance entries
        result_df = df[['unique ID', 'Total_Absorbance']]
        result_df = result_df[result_df['Total_Absorbance'] == 0]
        all_zero_list = list(result_df['unique ID'])
        peptides_df = df[~df['unique ID'].isin(all_zero_list)]

        # Process protein positions and create proteins DataFrame
        additional_columns = ['Master Protein Accessions', 'unique ID']
        selected_columns = additional_columns + Absorbance_cols
        
        peptides_df.loc[:, 'Master Protein Accessions'] = peptides_df['Master Protein Accessions']
        
        temp_df = peptides_df.copy()
        temp_df.loc[:, 'Protein_ID'] = temp_df['Master Protein Accessions']
        
        # Create proteins DataFrame with selected columns
        self.protein_df = temp_df.groupby('Protein_ID').agg(
            {**{col: 'first' for col in ['Master Protein Accessions']},
            **{col: 'sum' for col in Absorbance_cols}}
        ).reset_index()
        
        # Calculate relative Absorbance for selected groups
        for col in Absorbance_cols:
            col_sum = self.protein_df[col].sum()
            if col_sum > 0:  # Avoid division by zero
                self.protein_df[f'Rel_{col}'] = (self.protein_df[col] / col_sum) * 100
            else:
                self.protein_df[f'Rel_{col}'] = 0
                
        # Create sum DataFrame for selected groups
        self.sum_df = pd.DataFrame({
            'Sample': Absorbance_cols,
            'Total_Sum': [self.protein_df[col].sum() for col in Absorbance_cols]
        })
        

        name_list = []
        for _, row in self.protein_df.iterrows():
            if ',' in row['Protein_ID']:
                strrow = row['Protein_ID'].split(',')
                named_combo = self._fetch_protein_names('; '.join(strrow))
            else:
                named_combo = self._fetch_protein_names(row['Protein_ID'])
            name_list.append(named_combo)
        
        # Drop the 'Protein_ID' column
        self.protein_df = self.protein_df.drop(columns=['Protein_ID'])    
        
        self.protein_df['Description'] = name_list
        self.protein_df['Description'] = self.protein_df['Description'].astype(str).str.replace(r"['\['\]]", "", regex=True)
        
        # Calculate average Absorbance for sorting using only selected groups
       
        # Calculate sum of all selected columns
        total_sum = self.protein_df[Absorbance_cols].sum().sum()
        
        # Calculate row sums
        row_sums = self.protein_df[Absorbance_cols].sum(axis=1)
        
        # Calculate relative percentage contribution
        self.protein_df['avg_absorbance_all'] = (row_sums / total_sum * 100).round(2)
        
        # Sort proteins by abundance for consistent ordering
        self.protein_df = self.protein_df.sort_values('avg_absorbance_all', ascending=False)
                                
        # Create a dictionary to store the actual peptide counts per group
        self.protein_count_bysample_dict = {}
        
        # Dictionary to store unique peptide counts per protein
        self.protein_count_byprotein_dict = {}
        
        # Track which peptides belong to which proteins
        protein_to_peptides = defaultdict(set)
        
        # Track which peptides belong to which proteins in each group
        protein_to_group_peptides = defaultdict(lambda: defaultdict(set))
        
        # Determine counts based on merged_df and add to protein_df
        if selected_groups and self.protein_df is not None and df is not None:
            # Add count columns to the protein_df (initialize with zeros)
            for group in selected_groups:
                count_col = f'Count_{group}'
                rel_count_col = f'Rel_Count_{group}'
                # Initialize with float64 dtype
                self.protein_df[count_col] = pd.Series(dtype='float64')
                self.protein_df[rel_count_col] = pd.Series(dtype='float64')
                # Set initial values to 0.0
                self.protein_df[count_col] = 0.0
                self.protein_df[rel_count_col] = 0.0
            
            # Create a mapping from accession to protein index in protein_df
            accession_to_idx = {}
            accession_to_description = {}  # Map accessions to descriptions for counting
            for idx, row in self.protein_df.iterrows():
                if 'Master Protein Accessions' in row and pd.notna(row['Master Protein Accessions']):
                    accession_to_idx[row['Master Protein Accessions']] = idx
                    accession_to_description[row['Master Protein Accessions']] = row['Description']
                elif 'Accession' in row and pd.notna(row['Accession']):
                    accession_to_idx[row['Accession']] = idx
                    accession_to_description[row['Accession']] = row['Description']
            
            # For each group, count peptides per protein
            for group in selected_groups:
                # Filter peptides that are present in this group
                group_peptides = df[df[f'Avg_{group}'] > 0]
                
                # Store the total number of peptides for this group
                self.protein_count_bysample_dict[group] = len(group_peptides)
                
                # Track which peptides have already been counted
                counted_peptides = set()
                
                # Track warning stats
                peptides_with_no_accession = 0
                peptides_with_no_id = 0
                peptides_already_counted = 0
                peptides_with_multi_accessions = set()
                peptides_with_no_protein_match = 0
                
                # Count peptides for each protein
                for _, peptide in group_peptides.iterrows():
                    if 'Master Protein Accessions' not in peptide or pd.isna(peptide['Master Protein Accessions']):
                        peptides_with_no_accession += 1
                        continue
                        
                    # Get unique peptide ID to track counting
                    peptide_id = peptide.get('unique ID', None)
                    if peptide_id is None or pd.isna(peptide_id):
                        peptides_with_no_id += 1
                        continue  # Skip if no unique ID
                    
                    # Skip if we've already counted this peptide for this group
                    if peptide_id in counted_peptides:
                        peptides_already_counted += 1
                        continue
                    
                    accession = peptide['Master Protein Accessions']
                    found_match = False
                    
                    # Check if this peptide maps to multiple proteins
                    if ';' in accession:
                        peptides_with_multi_accessions.add(peptide_id)
                        accessions = [acc.strip() for acc in accession.split(';') if acc.strip()]
                        
                        # Only count for the first valid protein in the list
                        for acc in accessions:
                            if acc in accession_to_idx:
                                idx = accession_to_idx[acc]
                                count_col = f'Count_{group}'
                                self.protein_df.at[idx, count_col] += 1
                                
                                # Add this peptide to the protein's set for protein-specific counting
                                protein_desc = accession_to_description.get(acc, acc)
                                protein_to_peptides[protein_desc].add(peptide_id)
                                protein_to_group_peptides[protein_desc][group].add(peptide_id)
                                
                                counted_peptides.add(peptide_id)  # Mark as counted
                                found_match = True
                                break  # Count only once
                    else:
                        # Handle direct match - only single protein
                        if accession in accession_to_idx:
                            idx = accession_to_idx[accession]
                            count_col = f'Count_{group}'
                            self.protein_df.at[idx, count_col] += 1
                            
                            # Add this peptide to the protein's set for protein-specific counting
                            protein_desc = accession_to_description.get(accession, accession)
                            protein_to_peptides[protein_desc].add(peptide_id)
                            protein_to_group_peptides[protein_desc][group].add(peptide_id)
                            
                            counted_peptides.add(peptide_id)  # Mark as counted
                            found_match = True
                    
                    # Track peptides that didn't match any protein in our list
                    if not found_match:
                        peptides_with_no_protein_match += 1
                        
                # After counting all peptides for this group, calculate relative counts
                count_col = f'Count_{group}'
                rel_count_col = f'Rel_Count_{group}'
                total_value = self.protein_count_bysample_dict[group]
                
                # Calculate relative counts as percentages of total peptides
                # When calculating relative counts
                if total_value > 0:
                    for idx in range(len(self.protein_df)):
                        protein_count = float(self.protein_df.at[idx, count_col])  # Ensure float
                        rel_value = (protein_count / total_value) * 100
                        self.protein_df.at[idx, rel_count_col] = rel_value

                
                # Display warning about peptides mapping to multiple proteins
                warning_html = '<div style="color: orange; margin: 5px 0;"><b>Warning:</b> Peptide counting stats for group {0}:<br>'
                
                if peptides_with_no_accession > 0:
                    warning_html += f'• Skipped {peptides_with_no_accession} peptides with no accession<br>'
                    
                if peptides_with_no_id > 0:
                    warning_html += f'• Skipped {peptides_with_no_id} peptides with no unique ID<br>'
                    
                if peptides_already_counted > 0:
                    warning_html += f'• Skipped {peptides_already_counted} duplicate peptides (already counted)<br>'
                    
                if len(peptides_with_multi_accessions) > 0:
                    warning_html += f'• Found {len(peptides_with_multi_accessions)} peptides mapping to multiple proteins<br>'
                    warning_html += f'  (Each counted only once for the first matching protein)<br>'
                    
                if peptides_with_no_protein_match > 0:
                    warning_html += f'• {peptides_with_no_protein_match} peptides had no matching protein in the protein list<br>'
                    
                total_peptides = len(group_peptides)
                warning_html += f'• Total peptides processed: {total_peptides}, successfully counted: {len(counted_peptides)}'
                warning_html += '</div>'
                
                #display(HTML(warning_html.format(group)))

        # Calculate the number of unique peptides per protein
        for protein, peptides in protein_to_peptides.items():
            self.protein_count_byprotein_dict[protein] = len(peptides)

        # Create a copy of the proteins DataFrame for protein sample distribution calculation
        working_df = self.protein_df.copy()
        
        # Calculate protein distributions across samples (for both counts and absorbance)
        self.protein_sample_distribution_dict = {}
        
        # Calculate data for major proteins (based on selected_proteins)
        major_proteins = []
        if hasattr(self, 'selected_proteins') and selected_proteins:
            major_proteins = selected_proteins.copy()
            if 'Minor Proteins' in major_proteins:
                major_proteins.remove('Minor Proteins')
            
        # Add "Minor Proteins" data structures to hold aggregated values
        minor_data = {
            'counts': {group: 0 for group in selected_groups},
            'count_relative': {group: 0 for group in selected_groups},
            'absorbance': {group: 0 for group in selected_groups},
            'absorbance_relative': {group: 0 for group in selected_groups},
            'unique_peptide_count': 0,
            'total_value': 0,
            'total_absorbance': 0,
            'total_count': 0
        }
        
        # Counts to track minor proteins' peptides
        minor_peptides = set()
        
        # Process each protein
        for _, row in working_df.iterrows():
            protein_name = row['Description']
            
            # Skip if protein name is empty or NaN
            if pd.isna(protein_name) or not protein_name:
                continue
            
            # Initialize data structure for this protein
            protein_data = {
                'counts': {},
                'count_relative': {},
                'absorbance': {},
                'absorbance_relative': {},
                'unique_peptide_count': 0
            }
            
            # Get count values for each group
            count_values = {}
            absorbance_values = {}
            
            for group in selected_groups:
                # Get count values from protein_df
                count_col = f'Count_{group}'
                if count_col in row:
                    count_values[group] = row[count_col]
                else:
                    count_values[group] = 0
                
                # Get absorbance values
                absorbance_col = f'Avg_{group}'
                if absorbance_col in row:
                    absorbance_values[group] = row[absorbance_col]
                else:
                    absorbance_values[group] = 0
            
            # Get the actual count of unique peptides for this protein (across all groups)
            if protein_name in protein_to_peptides:
                protein_data['unique_peptide_count'] = len(protein_to_peptides[protein_name])
            
            # Store the count and absorbance values
            protein_data['counts'] = count_values
            protein_data['absorbance'] = absorbance_values
            
            # Calculate totals as sums across groups
            protein_total_count = sum(count_values.values())
            protein_total_absorbance = sum(absorbance_values.values())
            
            protein_data['total_count'] = protein_total_count
            protein_data['total_absorbance'] = protein_total_absorbance
            
            # Calculate relative distributions
            # Count relative distribution - percentage of this protein's total count in each group
            if protein_total_count > 0:
                for group, count in count_values.items():
                    protein_data['count_relative'][group] = (count / protein_total_count) * 100
            else:
                for group in selected_groups:
                    protein_data['count_relative'][group] = 0
            
            # Absorbance relative distribution
            if protein_total_absorbance > 0:
                for group, absorbance in absorbance_values.items():
                    protein_data['absorbance_relative'][group] = (absorbance / protein_total_absorbance) * 100
            else:
                for group in selected_groups:
                    protein_data['absorbance_relative'][group] = 0
            
            # Add backward compatibility
            use_count = hasattr(self, 'abs_or_count') and ('count' in getattr(self, 'abs_or_count').value.lower() 
                                                        if hasattr(getattr(self, 'abs_or_count'), 'value') else True)
            
            if use_count:
                protein_data['total'] = protein_total_count
                protein_data['values'] = count_values
                protein_data['relative'] = protein_data['count_relative']
            else:
                protein_data['total'] = protein_total_absorbance
                protein_data['values'] = absorbance_values
                protein_data['relative'] = protein_data['absorbance_relative']
            
            # Check if this is a major or minor protein
            if major_proteins and protein_name not in major_proteins:
                # This is a minor protein - add its data to the minor proteins aggregated data
                for group in selected_groups:
                    minor_data['counts'][group] += count_values[group]
                    minor_data['absorbance'][group] += absorbance_values[group]
                
                # For minor proteins, track both the sum and the unique peptide count
                if protein_name in protein_to_peptides:
                    minor_peptides.update(protein_to_peptides[protein_name])
                
                minor_data['total_count'] += protein_total_count
                minor_data['total_absorbance'] += protein_total_absorbance
            else:
                # This is a major protein - store its individual data
                self.protein_sample_distribution_dict[protein_name] = protein_data
        
        # Set unique peptide count for minor proteins
        minor_data['unique_peptide_count'] = len(minor_peptides)
        
        # Calculate relative distributions for minor proteins
        if minor_data['total_count'] > 0:
            for group in selected_groups:
                minor_data['count_relative'][group] = (minor_data['counts'][group] / minor_data['total_count'] * 100)
        
        if minor_data['total_absorbance'] > 0:
            for group in selected_groups:
                minor_data['absorbance_relative'][group] = (minor_data['absorbance'][group] / minor_data['total_absorbance'] * 100)
        
        # Add backward compatibility for minor proteins
        if use_count:
            minor_data['total'] = minor_data['total_count']
            minor_data['values'] = minor_data['counts']
            minor_data['relative'] = minor_data['count_relative']
        else:
            minor_data['total'] = minor_data['total_absorbance']
            minor_data['values'] = minor_data['absorbance']
            minor_data['relative'] = minor_data['absorbance_relative']
        
        # Add minor proteins to the distribution dictionary and peptide counts
        self.protein_sample_distribution_dict['Minor Proteins'] = minor_data
        self.protein_count_byprotein_dict['Minor Proteins'] = minor_data['unique_peptide_count']
        
        # Add a row for "Minor Proteins" to the protein_df if not already present
        if 'Minor Proteins' not in self.protein_df['Description'].values and major_proteins:
            minor_row = {
                'Description': 'Minor Proteins', 
                'Master Protein Accessions': 'Minor Proteins'
            }
            # Add counts and relative counts
            for group in selected_groups:
                count_col = f'Count_{group}'
                rel_count_col = f'Rel_Count_{group}'
                avg_col = f'Avg_{group}'
                rel_avg_col = f'Rel_{avg_col}'
                
                # Use float values
                minor_row[count_col] = float(minor_data['counts'][group])
                minor_row[rel_count_col] = 0.0  # Will be recalculated
                minor_row[avg_col] = float(minor_data['absorbance'][group])
                
                if rel_avg_col in self.protein_df.columns:
                    total_absorbance = self.sum_df[self.sum_df['Sample'] == avg_col]['Total_Sum'].values[0]
                    if total_absorbance > 0:
                        minor_row[rel_avg_col] = float((minor_data['absorbance'][group] / total_absorbance) * 100)
                    else:
                        minor_row[rel_avg_col] = 0.0
            
            # Add the row
            self.protein_df = safe_concat([self.protein_df, pd.DataFrame([minor_row])], ignore_index=True)
            
            # Recalculate relative counts for all proteins
            for group in selected_groups:
                count_col = f'Count_{group}'
                rel_count_col = f'Rel_Count_{group}'
                total_count = self.protein_count_bysample_dict[group]
                
                if total_count > 0:
                    for idx in range(len(self.protein_df)):
                        protein_count = self.protein_df.at[idx, count_col]
                        self.protein_df.at[idx, rel_count_col] = (protein_count / total_count) * 100
        self.protein_df_full = self.protein_df.copy() 

        
        # Filter rows based on selected proteins
        filtered_rows = []
        for index, row in self.protein_df.iterrows():
            if row['Description'] not in selected_proteins:
                filtered_rows.append(index)
        
        if filtered_rows:
            self.protein_df.drop(index=filtered_rows, inplace=True)
            #if self.plot_minor.value:
            #    df = df_filtered_by_proteinds.copy()

        return True

    def reorganize_by_function(self):
        """
        Reorganize protein data by function instead of protein.
        Creates self.function_distribution_dict dictionary with Minor Functions handling.
        """
        self.process_bioactive_data()
        
        # Initialize the result dictionary
        self.function_distribution_dict = {}
        
        # Determine whether to use count or absorbance as primary metric
        use_count = hasattr(self, 'abs_or_count') and self.abs_or_count.value.lower() == 'count'
        
        # Get all groups (excluding special columns)
        all_groups = [col.replace('Avg_', '') for col in self.function_df.columns 
                    if col.startswith('Avg_')]
        
        # Create value_cols dictionary
        self.value_cols = {group: f'Avg_{group}' for group in all_groups}
        self.rel_cols = {group: f'Rel_Avg_{group}' for group in all_groups}
        if use_count:
            self.value_cols = {group: f'Count_{group}' for group in all_groups}
            self.rel_cols = {group: f'Rel_Count_{group}' for group in all_groups}
        
        # Process each function in the DataFrame
        for _, row in self.function_df.iterrows():
            function = row['Description']  # Use Description instead of Description
            if function in self.selected_functions:
                # Initialize data structure for this function
                self.function_distribution_dict[function] = {
                    'counts': {},
                    'count_relative': {},
                    'absorbance': {},
                    'absorbance_relative': {},
                    'unique_peptide_count': 0,
                    'total_count': 0,
                    'total_absorbance': 0,
                }
                
                # Skip metrics population if function_group_metrics_dict is not available
                if hasattr(self, 'function_group_metrics_dict') and function in self.function_group_metrics_dict:
                    # Get function metrics
                    function_metrics = self.function_group_metrics_dict[function]
                    
                    # Set total values from metrics
                    self.function_distribution_dict[function]['total_count'] = function_metrics.get('total_count', 0)
                    self.function_distribution_dict[function]['total_absorbance'] = function_metrics.get('total_absorbance', 0)
                    self.function_distribution_dict[function]['unique_peptide_count'] = function_metrics.get('unique_peptide_count', 0)

                    # Populate data for each group
                    for group in all_groups:
                        # Skip if group not in metrics
                        if group not in function_metrics:
                            continue
                        
                        group_metrics = function_metrics[group]
                        
                        # Get absolute and relative values from metrics
                        count = group_metrics.get('count', 0)
                        count_rel = group_metrics.get('rel_count', 0)
                        absorbance = group_metrics.get('absorbance', 0)
                        absorbance_rel = group_metrics.get('rel_absorbance', 0)
                        
                        # Store values
                        self.function_distribution_dict[function]['counts'][group] = count
                        self.function_distribution_dict[function]['count_relative'][group] = count_rel
                        self.function_distribution_dict[function]['absorbance'][group] = absorbance
                        self.function_distribution_dict[function]['absorbance_relative'][group] = absorbance_rel
                    
                    # Set backward compatibility fields
                    if use_count:
                        self.function_distribution_dict[function]['values'] = self.function_distribution_dict[function]['counts']
                        self.function_distribution_dict[function]['relative'] = self.function_distribution_dict[function]['count_relative']
                        self.function_distribution_dict[function]['total'] = self.function_distribution_dict[function]['total_count']
                    else:
                        self.function_distribution_dict[function]['values'] = self.function_distribution_dict[function]['absorbance']
                        self.function_distribution_dict[function]['relative'] = self.function_distribution_dict[function]['absorbance_relative']
                        self.function_distribution_dict[function]['total'] = self.function_distribution_dict[function]['total_absorbance']
            # After processing main functions, handle Minor Functions
        if self.plot_minor.value:
            minor_functions = {}
            for group in all_groups:
                minor_functions[group] = {
                    'count': 0,
                    'absorbance': 0,
                    'rel_count': 0,
                    'rel_absorbance': 0
                }
            
            # Get all functions that aren't in the main functions list
            all_functions = set()
            if hasattr(self, 'data_transformer') and self.merged_df is not None:
                all_functions = set([f.strip() for funcs in self.merged_df['function'].dropna() 
                                for f in funcs.split(';')])
            
            minor_function_list = [f for f in all_functions if f not in self.selected_functions]
            
            # Calculate totals for minor functions
            total_count = 0
            total_absorbance = 0
            
            for function in minor_function_list:
                if function in self.function_group_metrics_dict:
                    metrics = self.function_group_metrics_dict[function]
                    for group in all_groups:
                        if group in metrics:
                            group_metrics = metrics[group]
                            minor_functions[group]['count'] += group_metrics.get('count', 0)
                            minor_functions[group]['absorbance'] += group_metrics.get('absorbance', 0)
                            total_count += group_metrics.get('count', 0)
                            total_absorbance += group_metrics.get('absorbance', 0)
            
            # Calculate relative values
            if total_count > 0:
                for group in all_groups:
                    minor_functions[group]['rel_count'] = (minor_functions[group]['count'] / total_count) * 100
            
            if total_absorbance > 0:
                for group in all_groups:
                    minor_functions[group]['rel_absorbance'] = (minor_functions[group]['absorbance'] / total_absorbance) * 100
                        # extract unique peptides that dont conaint selected funcs
            
            if minor_function_list:    
                minor_unique_peptides = set()
                for func in minor_function_list:
                    # Filter peptides that contain this function
                    peptides_with_func = self.filtered_df[
                        self.filtered_df['function'].apply(lambda x: self.contains_function(x, func))
                    ]
                    # Add peptide IDs to the set
                    minor_unique_peptides.update(peptides_with_func['unique ID'].unique())

            else:
                minor_unique_peptides = []
           
 
            # Populate Minor Functions in function_distribution_dict
            self.function_distribution_dict['Minor Functions'] = {
                'counts': {group: minor_functions[group]['count'] for group in all_groups},
                'count_relative': {group: minor_functions[group]['rel_count'] for group in all_groups},
                'absorbance': {group: minor_functions[group]['absorbance'] for group in all_groups},
                'absorbance_relative': {group: minor_functions[group]['rel_absorbance'] for group in all_groups},
                'unique_peptide_count': len(minor_unique_peptides),
                'total_count': total_count,
                'total_absorbance': total_absorbance,
            }

                           # Set backward compatibility fields
            if use_count:
                self.function_distribution_dict['Minor Functions']['values'] = self.function_distribution_dict['Minor Functions']['counts']
                self.function_distribution_dict['Minor Functions']['relative'] = self.function_distribution_dict['Minor Functions']['count_relative']
                self.function_distribution_dict['Minor Functions']['total'] = self.function_distribution_dict['Minor Functions']['total_count']
            else:
                self.function_distribution_dict['Minor Functions']['values'] = self.function_distribution_dict['Minor Functions']['absorbance']
                self.function_distribution_dict['Minor Functions']['relative'] = self.function_distribution_dict['Minor Functions']['absorbance_relative']
                self.function_distribution_dict['Minor Functions']['total'] = self.function_distribution_dict['Minor Functions']['total_absorbance']
        
    def process_total_peptide_data_and_filter_dataframe(self):
       
        if self.merged_df is None or self.group_data_dict is None:
            return None
            
        # Initialize dictionary to store results for all groups
        total_peptide_results_dict = {}
        
        # Use consistent reference to merged dataframe
        self.filtered_df = self.merged_df.copy()
     
        # Initialize masks
        protein_mask = pd.Series(True, index=self.filtered_df.index)
        function_mask = pd.Series(True, index=self.filtered_df.index)
        
        protein_col = 'protein_name'
    
        # Create protein filter mask if applicable
        if self.protein_selector:
            if 'All Proteins (No Filter)' not in self.protein_selector.value and protein_col is not None:
                # First, update the protein list to plot
                selected_proteins = self.get_selected_proteins()
        
                # Create mask for matching proteins in description - fixed to properly match proteins
                protein_mask = self.filtered_df[protein_col].fillna('').apply(
                    lambda x: any(protein == x for protein in selected_proteins)
                )
                
                # If still no matches, try partial matching
                if protein_mask.sum() == 0:
                    protein_mask = self.filtered_df[protein_col].fillna('').apply(
                        lambda x: any(protein in x for protein in selected_proteins)
                    )
                
                # If still no matches, try case-insensitive matching
                if protein_mask.sum() == 0:
                    protein_mask = self.filtered_df[protein_col].fillna('').apply(
                        lambda x: any(protein.lower() in x.lower() for protein in selected_proteins)
                    )
            else:
                # Keep all rows if "All Proteins" is selected
                protein_mask = pd.Series(True, index=self.filtered_df.index)
        
        # Create function filter mask if applicable
        if self.function_selector:
            function_col = None
            for col_name in ['function', 'Function', 'FUNCTION']:
                if col_name in self.filtered_df.columns:
                    function_col = col_name
                    break
            
            if function_col:
                if 'All Functional Peptides' in self.function_selector.value:
                    # Keep all rows if "All Functional Peptides" is selected
                    selected_functions = self.all_functions
                    function_mask = self.filtered_df[function_col].apply(
                        lambda x: any(self.contains_function(x, func) for func in selected_functions))   

                elif 'Non-Functional Peptides' in self.function_selector.value:
                    # For non-functional peptides, select rows where function column is NaN
                    function_mask = self.filtered_df[function_col].isna()

                elif 'All Peptides (No Filter)' in self.function_selector.value:
                    function_mask = pd.Series(True, index=self.filtered_df.index)
                
                else: # Select individual functions
                    selected_functions = self.function_selector.value
                    function_mask = self.filtered_df[function_col].apply(
                        lambda x: any(self.contains_function(x, func) for func in selected_functions)
                    )


        # Apply both masks together
        if self.plot_func_or_pro.value == 'Both':
            combined_mask = protein_mask & function_mask
        elif self.plot_func_or_pro.value == 'Selected Protein(s)':
            combined_mask = protein_mask
        elif self.plot_func_or_pro.value == 'Selected Function(s)':
            combined_mask = function_mask
        else: # None
            combined_mask = pd.Series(True, index=self.filtered_df.index)
        #print("length of filtered df", len(self.filtered_df))
        self.filtered_df = self.filtered_df[combined_mask]

        # Debug output
        #print(f"Total rows before filtering: {len(self.merged_df)}")
        #print(f"Rows after protein filter: {protein_mask.sum()}")
        #print(f"Rows after function filter: {function_mask.sum()}")
        #print(f"Rows after combined filters: {combined_mask.sum()}")
        #print(f"Filtered DataFrame columns: {self.filtered_df.shape}")
        

        #print(f"Rows after OR filtering: {combined_mask.sum()}")
        
        # Process each group from the simplified group data structure

        for group_name, abundance_columns in self.group_data_dict.items():
            # Check if abundance_columns is a string or a list with a single item
            if isinstance(abundance_columns, str):
                abundance_columns = [abundance_columns]
            elif hasattr(abundance_columns, '__iter__') and not isinstance(abundance_columns, str) and len(abundance_columns) == 1:
                # If it's an iterable (like list) with one item, keep it as a list
                abundance_columns = list(abundance_columns)
            else:
                # If it's already a proper collection, convert to list to ensure consistency
                abundance_columns = list(abundance_columns)
            abundance_columns = list(abundance_columns)

            if group_name in self.group_selector.value:
                # Add "Avg_" prefix to abundance columns
                valid_abundance_cols = [col for col in abundance_columns if col in self.filtered_df.columns]
                if not valid_abundance_cols:
                    print(f"Warning: No valid abundance columns found for group {group_name}")
                    continue
                                
                # Calculate total abundance and SEM from the abundance columns
                temp_df = self.filtered_df[['unique ID'] + valid_abundance_cols].copy()
                
                # Convert abundance columns to numeric, forcing non-numeric values to NaN
                for col in valid_abundance_cols:
                    temp_df[col] = pd.to_numeric(temp_df[col], errors='coerce')
                
                # Additional filtering for valid data
                valid_data_mask = (
                    temp_df[valid_abundance_cols].notna().any(axis=1) & 
                    (temp_df[valid_abundance_cols] != 0).any(axis=1) &
                    temp_df['unique ID'].notna()
                )
                temp_df = temp_df[valid_data_mask]
                if temp_df.empty:
                    print(f"Warning: No valid data for group {group_name}")
                    # Add empty results to maintain group in output
                    total_peptide_results_dict[group_name] = {
                        'unique_peptides': 0,
                        'total_Absorbance': 0,
                        'total_sem': 0,
                        'abundance_sem': 0,
                        'count_sem': 0,
                        'replicate_data': {
                            'abundance_columns': valid_abundance_cols,
                            'replicate_counts': [0] * len(valid_abundance_cols),
                            'replicate_abundances': [0] * len(valid_abundance_cols)
                        }
                    }
                    continue
                    
                # Rest of the function remains the same...
                # Calculate peptide counts for each replicate
                replicate_counts = []
                for col in valid_abundance_cols:
                    count = temp_df[temp_df[col].notna() & (temp_df[col] != 0)]['unique ID'].nunique()
                    replicate_counts.append(count)
                
                # Calculate mean count and SEM across replicates
                if len(replicate_counts) > 1:
                    count_sem = np.std(replicate_counts, ddof=1) / np.sqrt(len(replicate_counts))
                else:
                    count_sem = 0
                    
                # Calculate abundance statistics
                abundances = temp_df[valid_abundance_cols].values.astype(float)
                peptide_means = np.nanmean(abundances, axis=1)
                total_abundance = np.nansum(peptide_means)
                # Calculate SEM for abundance
                peptide_sems = np.nanstd(abundances, axis=1) / np.sqrt(abundances.shape[1])
                total_sem = np.sqrt(np.nansum(peptide_sems ** 2))

                # Calculate total count for group
                all_unique_peptides = temp_df[
                    (temp_df[valid_abundance_cols] > 0).any(axis=1)
                ]['unique ID'].nunique()
                
                # Store results for this group
                total_peptide_results_dict[group_name] = {
                    'unique_peptides': all_unique_peptides,
                    'total_Absorbance': total_abundance,
                    'total_sem': total_sem,
                    'abundance_sem': total_sem,
                    'count_sem': count_sem,
                    'replicate_data': {
                        'abundance_columns': valid_abundance_cols,
                        'replicate_counts': replicate_counts,
                        'replicate_abundances': [temp_df[col].replace(0, np.nan).sum() for col in valid_abundance_cols]
                    }
                }
        self.total_peptide_results_dict = total_peptide_results_dict    
            # Debug output

        if self.abs_or_count.value == 'Count':
            use_count = True
        else:
            use_count = False
        self.sample_distribution_summary_df = self.create_sample_summary_df(use_count=use_count)
                        
    def process_bioactive_data(self):
        selected_groups = []
        for group in self.group_selector.value:
            if group not in selected_groups:
                selected_groups.append(group)
        # For other modes, use the merged dataframe
        df = self.merged_df.copy() if self.merged_df is not None else None
       # Create column names for selected groups
        Absorbance_cols = [f'Avg_{var}' for var in selected_groups]
        df_avg_cols = [col for col in df.columns if col.startswith('Avg_')]
        
        # Filter columns to only include selected groups
        for i in df_avg_cols:
            if i not in Absorbance_cols:
                del df[i]
        # Check if we have valid data with a function column
        if df is None or 'function' not in df.columns:
            print("Error: No valid dataframe or missing 'function' column")
            return None, None, None, None

        
        # Drop duplicates to get unique peptide counts
        unique_peptides_df = df.drop_duplicates(subset='unique ID')
        # Now calculate the derived metrics
        function_count_totals_dict = {}
        function_absorbance_totals_dict = {}
        function_group_metrics_dict = {}
        
        if self.plot_func_or_pro.value != 'Functional vs Non-Functional Peptides':
            # First, get all unique functions from the data
            all_functions = set()
            for func_str in df['function'].dropna():
                if isinstance(func_str, str):
                    funcs = [f.strip() for f in func_str.split(';')]
                    all_functions.update(funcs)
        else:
            all_functions = ['Functional Peptides','Non-Functional Peptides']

        # Calculate unique peptide counts and absorbance sums for each function
        function_unique_peptides = {}
        function_unique_absorbance = {}
        
        for function in all_functions:
            if self.plot_func_or_pro.value != 'Functional vs Non-Functional Peptides':
                # Create a mask for entries containing this function
                function_mask = df['function'].apply(lambda x: self.contains_function(x, function))
            else:
                if function == 'Functional Peptides':
                    function_mask = df['function'].notna()
                elif function == 'Non-Functional Peptides':
                    function_mask = df['function'].isna()
                
            # Get data for this function
            function_data = df[function_mask]
            
            # Store the unique peptide count
            function_unique_peptides[function] = function_data['unique ID'].nunique()
            
            # Calculate unique absorbance sum for each group
            function_unique_absorbance[function] = {}
            for group in selected_groups:
                column = f'Avg_{group}'
                if column in df.columns:
                    # Filter for non-zero values and sum
                    valid_data = function_data[(function_data[column] > 0) & function_data[column].notna()]
                    function_unique_absorbance[function][group] = valid_data[column].sum()
        
        # Calculate totals for each group
        Absorbance_cols = [f'Avg_{var}' for var in selected_groups]
        for column in Absorbance_cols:
            grouping_variable = column.replace('Avg_', '')
            
            # Filter and process data
            temp_df = df[['unique ID', 'function', column]].copy() if column in df.columns else None
            if temp_df is not None and not temp_df.empty:
                temp_df = temp_df[
                    (temp_df[column] != 0) & 
                    temp_df[column].notna() &
                    temp_df['function'].notna()
                ]
                
                if not temp_df.empty:
                    # Drop duplicates to get unique peptide counts
                    unique_peptides_df = temp_df.drop_duplicates(subset='unique ID')
                    
                    # Store total counts and absorbance for this group
                    function_count_totals_dict[grouping_variable] = len(unique_peptides_df)
                    function_absorbance_totals_dict[grouping_variable] = temp_df[column].sum()
        
        # Calculate totals for each function across all groups
        function_totals_dict = {}
        for function in all_functions:
            total_absorbance = 0
            total_count = 0
            
            for group in selected_groups:
                absorbance = self.unique_function_absorbance_dict.get(group, {}).get(function, 0)
                count = self.unique_function_counts_dict.get(group, {}).get(function, 0)
                total_absorbance += absorbance
                total_count += count
            
            function_totals_dict[function] = {
                'total_absorbance': total_absorbance,
                'total_count': total_count
            }
        
        # Create the metrics with the correct relative values
        for function in all_functions:
            function_group_metrics_dict[function] = {}
            
            # Store function totals
            function_total_absorbance = function_totals_dict[function]['total_absorbance']
            function_total_count = function_totals_dict[function]['total_count']
            
            function_group_metrics_dict[function]['total_absorbance'] = function_total_absorbance
            function_group_metrics_dict[function]['total_count'] = function_total_count
            
            # Add the unique peptide count and total absorbance sum for this function
            function_group_metrics_dict[function]['unique_peptide_count'] = function_unique_peptides.get(function, 0)
            
            # Now calculate the distribution of this function across groups
            for group in selected_groups:
                # Get absorbance and count for this function in this group
                absorbance = self.unique_function_absorbance_dict.get(group, {}).get(function, 0)
                count = self.unique_function_counts_dict.get(group, {}).get(function, 0)
                
                # Calculate relative metrics as percentage of function's total (distribution across samples)
                rel_absorbance = 0
                if function_total_absorbance > 0:
                    rel_absorbance = (absorbance / function_total_absorbance) * 100
                
                rel_count = 0
                if function_total_count > 0:
                    rel_count = (count / function_total_count) * 100
                
                # Store all metrics for this function and group
                function_group_metrics_dict[function][group] = {
                    'absorbance': absorbance,
                    'count': count,
                    'rel_absorbance': rel_absorbance,  # % of function's total absorbance across all groups
                    'rel_count': rel_count,  # % of function's total count across all groups
                }
                
                # Also store the group's unique absorbance for this function
                function_group_metrics_dict[function][group]['total_absorbance'] = function_unique_absorbance.get(function, {}).get(group, 0)
        
        # Save all calculated dictionaries as instance attributes

        self.function_count_totals_dict = function_count_totals_dict
        self.function_absorbance_totals_dict = function_absorbance_totals_dict
        self.function_group_metrics_dict = function_group_metrics_dict
        #return (unique_function_absorbance_dict, unique_function_counts_dict, 
        #        function_count_totals, function_absorbance_totals)  

    def on_color_scheme_change(self, change):
        """Update plot when color scheme changes"""
        if self.current_fig is not None and hasattr(self, 'plot_button'):
            # Trigger plot update by simulating a button click
                
            self.on_data_loaded_func_and_color_gen(change)
            
            self.on_plot_button_click(None)
        
    def _initialize_instructions(self):
        self.steptwo_output_html_message = """
        <div style='padding: 10px; background-color: #f8f9fa; border-left: 5px solid #007bff; margin: 10px 0;'>
            <h3>Step 2: Select Data to Visualize</h3>
            <p>Choose which data to include in your visualization:</p>
            
            <details>
                <summary style="font-weight: bold; cursor: pointer; padding: 5px; border-bottom: 1px solid #eee;">Sample Groups</summary>
                <ul style='list-style-type: none; margin-left: 20px;'>
                    <li><b>Select Groups:</b> Choose which sample groups to include in your visualization</li>
                    <li><b>Multi-select:</b> You can select multiple groups to compare in the same plot</li>
                </ul>
            </details>
            
            <details>
                <summary style="font-weight: bold; cursor: pointer; padding: 5px; border-bottom: 1px solid #eee;">Proteins & Functions</summary>
                <ul style='list-style-type: none; margin-left: 20px;'>
                    <li><b>Select Proteins:</b> Choose specific proteins to visualize from your dataset. Defualt option is top 10 proteins by Absorbance</li>
                    <li><b>Select Functions:</b> Choose specific bioactivitiesto analyze. Default option is all bioactivities</li>
                    <li><b>Multi-select:</b> You can select multiple items to include in your visualization</li>
                </ul>
            </details>
            
            <details>
                <summary style="font-weight: bold; cursor: pointer; padding: 5px; border-bottom: 1px solid #eee;">Plot Filter</summary>
                <ul style='list-style-type: none; margin-left: 20px;'>
                    <li><b>No Filter:</b> Visualize all data without filtering</li>
                    <li><b>Selected Protein(s):</b> Focus visualization on chosen proteins only</li>
                    <li><b>Selected Function(s):</b> Focus visualization on chosen bioactivitiesonly</li>
                    <li><b>Both:</b> Apply both protein and function filters simultaneously</li>
                    <li><b>Functional vs Non-Functional:</b> Compare peptides by categorizing them based on presence or absence of bioactive functions</li>                </ul>
            </details>
        </div>
        """
        
        self.steptwo_status_output = widgets.Output(
            layout=widgets.Layout(
                max_width='1000px',
                width='100%'
            )
        )

        with self.steptwo_status_output:
            display(HTML(self.steptwo_output_html_message))

        self.stepthree_output_html_message = """
        <div style='padding: 10px; background-color: #f8f9fa; border-left: 5px solid #007bff; margin: 10px 0;'>
            <h3>Step 3: Visualization Options</h3>
            <p>Choose how to visualize your selected data:</p>
            
            <!-- Primary Dropdown 1: Visualization Settings -->
            <details style="margin-bottom: 10px;">
                <summary style="font-weight: bold; cursor: pointer; padding: 5px; border-bottom: 1px solid #eee; background-color: #f8f9fa;">
                    Visualization Settings
                </summary>
                
                <!-- Plot Type Options -->
                <details style="margin-left: 20px; margin-top: 8px;">
                    <summary style="font-weight: bold; cursor: pointer; padding: 5px; border-bottom: 1px solid #eee;">Plot Type</summary>
                    <ul style='list-style-type: none; margin-left: 20px;'>
                        <li><b>Grouped Bar Plots:</b> Compare data across categories with bars grouped by category</li>
                        <li><b>Stacked Bar Plots:</b> Show composition of each category with stacked segments</li>
                        <li><b>Pie Charts:</b> Display proportion of each component as a slice of the whole</li>
                        <li><b>Correlation Scatter Plots:</b> Visualize relationships between sample groups</li>
                    </ul>
                </details>
                
                <!-- Scale Absorbance Options -->
                <details style="margin-left: 20px; margin-top: 8px;">
                    <summary style="font-weight: bold; cursor: pointer; padding: 5px; border-bottom: 1px solid #eee;">Scale Absorbance</summary>
                    <ul style='list-style-type: none; margin-left: 20px;'>
                        <li><b>Absolute:</b> Display raw values of either abudnance or peptide count on the y-axis</li>
                        <li><b>Relative:</b> Display as percentages of the total on the y-axis, with a scale of 0-100</li>
                    </ul>
                </details>
                
                <!-- Data Type Options -->
                <details style="margin-left: 20px; margin-top: 8px;">
                    <summary style="font-weight: bold; cursor: pointer; padding: 5px; border-bottom: 1px solid #eee;">Data Type</summary>
                    <ul style='list-style-type: none; margin-left: 20px;'>
                        <li><b>Absorbance:</b> Use absorbance values for visualization</li>
                        <li><b>Count:</b> Use peptide count values for visualization</li>
                    </ul>
                </details>
                
                <!-- Plot Orientation Options -->
                <details style="margin-left: 20px; margin-top: 8px;">
                    <summary style="font-weight: bold; cursor: pointer; padding: 5px; border-bottom: 1px solid #eee;">Plot Orientation</summary>
                    <ul style='list-style-type: none; margin-left: 20px;'>
                        <li><b>By Sample:</b> Organize data with samples as primary grouping and samples plotted on the x-axis</li>
                        <li><b>By Protein:</b> Organize data with proteins as primary grouping and proteins plotted on the x-axis</li>
                        <li><b>By Function:</b> Organize data with functions as primary grouping and functions plotted on the x-axis</li>
                    </ul>
                </details>
                
                <!-- Group Unselected Options -->
                <details style="margin-left: 20px; margin-top: 8px;">
                    <summary style="font-weight: bold; cursor: pointer; padding: 5px; border-bottom: 1px solid #eee;">Group Unselected Items</summary>
                    <ul style='list-style-type: none; margin-left: 20px;'>
                        <li><b>Group Unselected Proteins or Functions:</b> Combine unselected proteins or functions into a "Minor" category to deal with insignificant data</li>
                    </ul>
                </details>
                
                <!-- Correlation Type Options -->
                <details style="margin-left: 20px; margin-top: 8px;">
                    <summary style="font-weight: bold; cursor: pointer; padding: 5px; border-bottom: 1px solid #eee;">Correlation Type</summary>
                    <ul style='list-style-type: none; margin-left: 20px;'>
                        <li><b>Pearson:</b> Linear correlation between variables</li>
                        <li><b>Spearman:</b> Rank-based correlation that detects monotonic relationships</li>
                        <li><b>Log Transform:</b> Apply log10 transformation before correlation analysis</li>
                    </ul>
                </details>
            </details>
            
            <!-- Primary Dropdown 2: Appearance Settings -->
            <details style="margin-bottom: 10px;">
                <summary style="font-weight: bold; cursor: pointer; padding: 5px; border-bottom: 1px solid #eee; background-color: #f8f9fa;">
                    Appearance Settings
                </summary>
                
                <!-- Labels -->
                <details style="margin-left: 20px; margin-top: 8px;">
                    <summary style="font-weight: bold; cursor: pointer; padding: 5px; border-bottom: 1px solid #eee;">Labels and Titles</summary>
                    <ul style='list-style-type: none; margin-left: 20px;'>
                        <li><b>X Label:</b> Enter a custom label for the x-axis otherwise a default label will be used</li>
                        <li><b>Y Label:</b> Enter a custom label for the y-axis otherwise a default label will be used</li>
                        <li><b>Legend Title:</b> Enter a custom title for the plot legend otherwise a default title will be used</li>
                        <li><b>Plot Title:</b> Enter a custom title for the entire plot otherwise a default title will be used</li> 
                    </ul>
                </details>
                
                <!-- Color Scheme -->
                <details style="margin-left: 20px; margin-top: 8px;">
                    <summary style="font-weight: bold; cursor: pointer; padding: 5px; border-bottom: 1px solid #eee;">Color Scheme</summary>
                    <ul style='list-style-type: none; margin-left: 20px;'>
                        <li><b>DEFAULT PALETTE (HSV):</b> Standard color rotation for visualizations</li>
                        <li style="margin-top: 10px;"><b>QUALITATIVE PALETTES (RECOMMENDED):</b> Distinct colors for categorical data
                            <ul style='list-style-type: none; margin-left: 15px;'>
                                <li><i>Plotly, D3, G10, T10, Alphabet, Set1, Set2, Set3, Pastel1, Pastel2, Paired</i></li>
                                <li>Best for distinguishing between different categories</li>
                            </ul>
                        </li>
                        <li style="margin-top: 10px;"><b>SEQUENTIAL PALETTES:</b> Gradient from light to dark
                            <ul style='list-style-type: none; margin-left: 15px;'>
                                <li><i>Viridis, Cividis, Inferno, Magma, Plasma, Hot, Jet, Blues, Greens, Reds, etc.</i></li>
                                <li>Best for showing intensity or magnitude</li>
                            </ul>
                        </li>
                        <li style="margin-top: 10px;"><b>DIVERGING PALETTES:</b> Two contrasting colors with neutral middle
                            <ul style='list-style-type: none; margin-left: 15px;'>
                                <li><i>Spectral, RdBu, RdYlBu, RdYlGn, PiYG, PRGn, BrBG, RdGy</i></li>
                                <li>Best for data with meaningful midpoint (like correlations)</li>
                            </ul>
                        </li>
                        <li style="margin-top: 10px;"><b>CYCLICAL PALETTES:</b> Colors that loop smoothly
                            <ul style='list-style-type: none; margin-left: 15px;'>
                                <li><i>IceFire, Edge, Twilight</i></li>
                                <li>Best for circular or periodic data</li>
                            </ul>
                        </li>
                        <li style="margin-top: 10px;"><b>SINGLE COLORS:</b> Individual color options
                            <ul style='list-style-type: none; margin-left: 15px;'>
                                <li><i>red, green, blue, yellow, purple, orange, cyan, etc.</i></li>
                                <li>Used for correlation plots and some grouped bar plots</li>
                                <li>Applied to all elements or as a base color</li>
                            </ul>
                        </li>
                    </ul>
                </details>
            </details>
            
            <p style="margin-top: 10px;">Configure your visualization options and click "Generate/Update Data" to create your plots.</p>
        </div>
        """
        
        self.stepthree_status_output = widgets.Output(
            layout=widgets.Layout(
                max_width='1000px',
                width='100%'
            )
        )
        
        with self.stepthree_status_output:
            display(HTML(self.stepthree_output_html_message))

    def display_handler(self):
        """Display the protein analysis interface"""

                # Create layout
        self.handler_widget_box = widgets.VBox([
            self.steptwo_status_output,
            self.groups_grid,
            self.profunc_grid, 
            self.plot_func_or_pro,
        ], layout=widgets.Layout(
            width='1000px',
            height='auto',
            margin='0px',
            padding='0px',
            overflow='hidden'
        ))
        display(self.handler_widget_box)
        display(self.stepthree_status_output)
           
        grid = GridspecLayout(
            1, 2,  # 1 rows, 2 columns
            width='750px', 
            height='auto',
            overflow='hidden',
            grid_gap='0px',  # Already set to zero
        )
        
        # Left column widgets
        left_column = VBox([
            widgets.HTML("<h3><u>Visualization Settings:</u></h3>"),
            self.plot_type_row,
            self.plot_type_row_two,
            self.minor_row,
            self.corr_box,
        ], layout=widgets.Layout(
            width='350px',
            margin='0px',
            padding='0px',
            overflow='hidden'
        ))
        
        # Right column widgets
        right_column = VBox([
            widgets.HTML("<h3><u>Appearance Settings:</u></h3>"),
            self.xlabel_widget,
            self.ylabel_widget,
            self.legend_widget,
            self.title_widget,
            self.color_scheme
        ], layout=widgets.Layout(
            width='400px',
            margin='0px',
            padding='0px',
            overflow='hidden'
        ))
        

        # Place widgets in the grid
        grid[0, 0] = left_column    # First row, first column
        grid[0, 1] = right_column   # First row, second column
        
        
        display(grid)

    def _fetch_protein_names(self, accession_str):
        """
        Fetch protein names from the proteins dictionary.
        Returns a list of protein names, using the full protein name.
        """
        names = []
        for acc in accession_str.split('; '):
            if acc in self.protein_dict:
                # Use the full protein name instead of splitting it
                name = self.protein_dict[acc]['name']
                names.append(name)
            else:
                names.append(acc)
        return names

  
    def create_sample_summary_df(self, use_count=False):
        """
        Create a summary DataFrame from total_peptide_results_dict.
        
        Args:
            use_count (bool): If True, use peptide counts, otherwise use abundance values
        
        Returns:
            pd.DataFrame: DataFrame with sample metrics including relative values
        """
        try:
            # Check if we have the required dictionary
            if not hasattr(self, 'total_peptide_results_dict') or not self.total_peptide_results_dict:
                print("Missing total_peptide_results_dict")
                return None
                
            # Create a list for DataFrame
            data_rows = []
            
            # Determine which values to use based on use_count parameter
            if use_count:
                value_key = 'unique_peptides'
                error_key = 'count_sem'
                value_column = 'Peptide_Count'
                error_column = 'Count_SEM'
            else:
                value_key = 'total_Absorbance'
                error_key = 'abundance_sem'
                value_column = 'Total_Abundance'
                error_column = 'Abundance_SEM'
            
            # Calculate total for relative values
            total_value = sum(group_data_dict[value_key] for group_data_dict in self.total_peptide_results_dict.values())
            
            # Create a row for each sample
            for sample, data in self.total_peptide_results_dict.items():
                value = data[value_key]
                error = data[error_key]
                
                # Calculate relative percentage
                rel_value = (value / total_value * 100) if total_value > 0 else 0
                
                # Get number of replicates
                num_replicates = len(data.get('replicate_data', {}).get('replicate_counts', []))
                
                # Create the row
                row = {
                    'Sample': sample,
                    value_column: value,
                    error_column: error,
                    f'Relative_{value_column}': rel_value,
                    'Replicates': num_replicates
                }
                
                # Add additional data if available
                if 'replicate_data' in data:
                    # Calculate mean of replicates for more accurate CV%
                    if use_count:
                        replicate_values = data['replicate_data'].get('replicate_counts', [])
                    else:
                        replicate_values = data['replicate_data'].get('replicate_abundances', [])
                    
                    if replicate_values:
                        mean = sum(replicate_values) / len(replicate_values)
                        # Calculate SD from SEM: SD = SEM * sqrt(n)
                        sd = error * (len(replicate_values) ** 0.5) if len(replicate_values) > 0 else 0
                        # Calculate CV%
                        cv_percent = (sd / mean * 100) if mean > 0 else 0
                        row['CV%'] = cv_percent
                
                data_rows.append(row)
                
            # Create DataFrame
            df = pd.DataFrame(data_rows)
            
            # Sort by sample name for consistency
            df = df.sort_values('Sample')
            
            # Format numeric columns
            df[f'Relative_{value_column}'] = df[f'Relative_{value_column}'].round(2)
            if 'CV%' in df.columns:
                df['CV%'] = df['CV%'].round(2)
            
            if not use_count:
                # Format abundance values in scientific notation
                df[value_column] = df[value_column].apply(lambda x: f"{x:.2e}")
                df[error_column] = df[error_column].apply(lambda x: f"{x:.2e}")
            else:
                # Format count values as integers
                df[value_column] = df[value_column].astype(int)
                df[error_column] = df[error_column].round(2)
                
            # Add a Total row
            total_row = {
                'Sample': 'Total',
                value_column: total_value if use_count else f"{total_value:.2e}",
                error_column: None,  # Can't meaningfully combine SEMs
                f'Relative_{value_column}': 100.0,
                'Replicates': sum(row['Replicates'] for row in data_rows)
            }
            
            if 'CV%' in df.columns:
                total_row['CV%'] = None  # Can't meaningfully combine CVs
                
            df = safe_concat([df, pd.DataFrame([total_row])], ignore_index=True)
            
            return df
            
        except Exception as e:
            print(f"Error creating sample summary DataFrame: {str(e)}")
            import traceback
            traceback.print_exc()
            return None

In [6]:
class Plotter:
    def __init__(self, data_transformer, data_handler):
        self.data_transformer = data_transformer
        self.data_handler = data_handler
        self.state_manager = PlotState()
        # Set up output widgets
        self.plot_output = widgets.Output()
        self.export_output = widgets.Output()
        # Initialize necessary properties
        self.current_fig = None
        self.protein_df = None
        self.sum_df = None
        
        # Create UI buttons
        self._create_buttons()
    
        # Connect event handlers explicitly
        self.plot_button.on_click(self.on_plot_button_click)
        self.download_plot_button.on_click(self.on_download_plot_click)

        data_handler_methods = [
            # Data processing methods
            'process_total_peptide_data_and_filter_dataframe',
            'process_protein_data',
            'process_bioactive_data',
            'calculate_bioactivt_count_and_dict',
            'calculate_group_metrics',
            'create_function_df',
            'reorganize_by_function',
            
            # Helper methods
            'get_selected_proteins',
            'contains_function',
            'get_color_sequence',
            '_fetch_protein_names',
            
            # UI related methods
            #'update_group_options',
            #'populate_protein_selector',
            #'on_plot_func_or_pro_change',
            #'on_plot_type_change',
            #'on_data_loaded_func_and_color_gen',
            'on_color_scheme_change',
            'get_single_color'

        ]
        # Import important methods
        self._import_methods_from_data_handler(data_handler_methods)
        
        self.data_transformer.plot_lock.observe(self._on_plot_lock_change_plotter, names='value')

    def _on_plot_lock_change_plotter(self, change):
        """Handle changes in Plot_lock state"""
        if change.new == False and change.old == True:
            self.plot_button.disabled = self.data_transformer.plot_lock.value
       
    def _import_methods_from_data_handler(self, method_list):
        """Import specific methods from data_handler"""
        for method_name in method_list:
            if hasattr(self.data_handler, method_name):
                setattr(self, method_name, getattr(self.data_handler, method_name))
                #print(f"Imported {method_name} from data_handler")
            else:
                print(f"WARNING: {method_name} not found in data_handler")
     
    def _create_buttons(self):
        """Create the UI buttons"""
        self.download_plot_button = widgets.Button(
            description='Download Interactive Plot',
            button_style='info',
            icon='file',
            layout=widgets.Layout(width='200px'),
            disabled=True
        )
        
        
        self.plot_button = widgets.Button(
            description='Generate/Update Data',
            button_style='success',
            icon='refresh',
            layout=widgets.Layout(width='200px'),
            disabled=self.data_transformer.plot_lock.value
        )
        
    def get_data_attribute(self, attr_name, default=None):
        """Safely get an attribute from data_handler or data_transformer"""
        # Try data_handler first
        if hasattr(self.data_handler, attr_name):
            return getattr(self.data_handler, attr_name)
        
        # Try data_transformer next
        elif hasattr(self.data_transformer, attr_name):
            return getattr(self.data_transformer, attr_name)
        
        # Finally try self
        elif hasattr(self, attr_name):
            return getattr(self, attr_name)
        
        # Return default if not found
        return default
    
    def generate_download_link(self, content, filename, filetype='text/csv'):
        """Generate a download link for any content"""
        if isinstance(content, pd.DataFrame):
            if filetype == 'text/csv':
                content = content.to_csv(index=False)
            else:
                content = content.to_csv(index=True)
        if isinstance(content, str):
            content = content.encode()
        b64 = base64.b64encode(content).decode()
        return f"""
            <a id="download_link" href="data:{filetype};base64,{b64}" 
               download="{filename}"
               style="display: none;">
                Download {filename}
            </a>
            <script>
                document.getElementById('download_link').click();
            </script>
            """
    
    def on_download_plot_click(self, b):
        """Handle plot download button click with automatic download"""
        if self.current_fig is not None:
            timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
            plot_filename = f'protein_plot_{timestamp}.html'
            
            with self.export_output:
                self.export_output.clear_output(wait=True)
                display(HTML(self.generate_download_link(
                    self.current_fig.to_html(),
                    plot_filename,
                    'text/html'
                )))
        else:
            print("Please generate a plot first.")
    
    def display(self):
        widget_box = widgets.HBox([
            self.plot_button,
            self.download_plot_button,
        ])
        # Bottom row spanning both columns
        bottom_row = VBox([
            widgets.HTML("<h3><u>Display and Export</u></h3>"),
            widget_box,
            self.export_output
        ], layout=widgets.Layout(
            width='900px',
            margin='0px',
            padding='0px',
            overflow='hidden',
        ))

        display(bottom_row)
        display(self.plot_output)
        
    def on_export_button_click(self, b):
        """Handle data export with automatic download"""
        if self.protein_df is not None:
            timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
            data_filename = f'protein_absorbance_analysis_{timestamp}.csv'
            
            with self.export_output:
                self.export_output.clear_output(wait=True)
                display(HTML(self.generate_download_link(
                    self.protein_df,
                    data_filename,
                    'text/csv'
                )))
        else:
            print("Please generate the analysis first.")       
        if self.data_transformer.merged_df is None or self.data_transformer.group_data_dict is None:
            return None
            
        # Initialize dictionary to store results for all groups
        total_peptide_results_dict = {}
        
        # Use consistent reference to merged dataframe
        self.filtered_df = self.data_transformer.merged_df.copy()
       
        # Initialize masks
        protein_mask = pd.Series(True, index=self.filtered_df.index)
        function_mask = pd.Series(True, index=self.filtered_df.index)

        protein_col = 'protein_name'


        # Create protein filter mask if applicable
        if self.protein_selector:
            if 'All Proteins (No Filter)' not in self.protein_selector.value and protein_col is not None:
                selected_proteins = self.selected_proteins
                #print(f"Selected proteins ({len(selected_proteins)}): {selected_proteins[:3]}...")
        
                # Create mask for matching proteins in description
                protein_mask = self.filtered_df[protein_col].fillna('').apply(
                    lambda x: any(protein in x for protein in selected_proteins)
                )
                
                # Debug output
                #print(f"Rows matched with protein matching: {protein_mask.sum()}")
                
                # If still no matches, try case-insensitive matching
                if protein_mask.sum() == 0:
                    protein_mask = self.filtered_df[protein_col].fillna('').apply(
                        lambda x: any(protein.lower() in x.lower() for protein in selected_proteins)
                    )
                    #print(f"Rows matched with case-insensitive matching: {protein_mask.sum()}")
            else:
                # Keep all rows if "All Proteins" is selected
                protein_mask = pd.Series(True, index=self.filtered_df.index)
        
        # Create function filter mask if applicable
        if self.function_selector:
            function_col = None
            for col_name in ['function', 'Function', 'FUNCTION']:
                if col_name in self.filtered_df.columns:
                    function_col = col_name
                    break
            
            if function_col:
                if 'All Functional Peptides' not in self.function_selector.value:
                    selected_functions = self.function_selector.value
                    function_mask = self.filtered_df[function_col].apply(
                        lambda x: any(self.contains_function(x, func) for func in selected_functions)
                    )
                else:
                    # Keep all rows if "All Functional Peptides" is selected
                    function_mask = pd.Series(True, index=self.filtered_df.index)

        # Apply both masks together
        if self.plot_func_or_pro.value == 'Both':
            combined_mask = protein_mask & function_mask
        elif self.plot_func_or_pro.value == 'Selected Protein(s)':
            combined_mask = protein_mask
        elif self.plot_func_or_pro.value == 'Selected Function(s)':
            combined_mask = function_mask
        else: # None
            combined_mask = pd.Series(True, index=self.filtered_df.index)
        #print("length of filtered df", len(self.filtered_df))
        self.filtered_df = self.filtered_df[combined_mask]
        # Debug output
        #print(f"Total rows before filtering: {len(self.data_transformer.merged_df)}")
        #print(f"Rows after protein filter: {protein_mask.sum()}")
        #print(f"Rows after function filter: {function_mask.sum()}")
        #print(f"Rows after combined filters: {combined_mask.sum()}")
        #print(f"Filtered DataFrame columns: {self.filtered_df.shape}")
        

        #print(f"Rows after OR filtering: {combined_mask.sum()}")

        # Process each group from the simplified group data structure
        for group_name, abundance_columns in self.data_transformer.group_data_dict.items():
            if group_name in self.group_selector.value:
                # Calculate total abundance and SEM from the abundance columns

                valid_abundance_cols = [f"Avg_{col}" for col in abundance_columns 
                                    if f"Avg_{col}" in self.filtered_df.columns]
                
                if not valid_abundance_cols:
                    print(f"Warning: No valid abundance columns found for group {group_name}")
                    continue
                            
                # Filter for non-zero, non-null values in any abundance column
                temp_df = self.filtered_df[['unique ID'] + valid_abundance_cols].copy()
                
                # Convert abundance columns to numeric, forcing non-numeric values to NaN
                for col in valid_abundance_cols:
                    temp_df[col] = pd.to_numeric(temp_df[col], errors='coerce')
                
                # Additional filtering for valid data
                valid_data_mask = (
                    temp_df[valid_abundance_cols].notna().any(axis=1) & 
                    (temp_df[valid_abundance_cols] != 0).any(axis=1) &
                    temp_df['unique ID'].notna()
                )
                temp_df = temp_df[valid_data_mask]
                
                if temp_df.empty:
                    print(f"Warning: No valid data for group {group_name}")
                    # Add empty results to maintain group in output
                    total_peptide_results_dict[group_name] = {
                        'unique_peptides': 0,
                        'total_Absorbance': 0,
                        'total_sem': 0,
                        'abundance_sem': 0,
                        'count_sem': 0,
                        'replicate_data': {
                            'abundance_columns': valid_abundance_cols,
                            'replicate_counts': [0] * len(valid_abundance_cols),
                            'replicate_abundances': [0] * len(valid_abundance_cols)
                        }
                    }
                    continue
                    
                # Rest of the function remains the same...
                # Calculate peptide counts for each replicate
                replicate_counts = []
                for col in valid_abundance_cols:
                    count = temp_df[temp_df[col].notna() & (temp_df[col] != 0)]['unique ID'].nunique()
                    replicate_counts.append(count)
                
                # Calculate mean count and SEM across replicates
                if len(replicate_counts) > 1:
                    count_sem = np.std(replicate_counts, ddof=1) / np.sqrt(len(replicate_counts))
                else:
                    count_sem = 0
                    
                # Calculate abundance statistics
                abundances = temp_df[valid_abundance_cols].values.astype(float)
                peptide_means = np.nanmean(abundances, axis=1)
                total_abundance = np.nansum(peptide_means)
                
                # Calculate SEM for abundance
                peptide_sems = np.nanstd(abundances, axis=1) / np.sqrt(abundances.shape[1])
                total_sem = np.sqrt(np.nansum(peptide_sems ** 2))

                # Calculate total count for group
                all_unique_peptides = temp_df[
                    (temp_df[valid_abundance_cols] > 0).any(axis=1)
                ]['unique ID'].nunique()
                
                # Store results for this group
                total_peptide_results_dict[group_name] = {
                    'unique_peptides': all_unique_peptides,
                    'total_Absorbance': total_abundance,
                    'total_sem': total_sem,
                    'abundance_sem': total_sem,
                    'count_sem': count_sem,
                    'replicate_data': {
                        'abundance_columns': valid_abundance_cols,
                        'replicate_counts': replicate_counts,
                        'replicate_abundances': [temp_df[col].replace(0, np.nan).sum() for col in valid_abundance_cols]
                    }
                }
           
            return total_peptide_results_dict, self.filtered_df

    def plot_total_peptides(self):
        """Plot total peptides for each group"""
        data = self.total_peptide_results_dict
        if not data:
            print("No data to plot")
            return None, None

        first_color = self.get_single_color()  # Get first element from the list

            


        # Common styling configurations
        COMMON_LAYOUT = {
            'template': 'plotly_white',
            'height': 800,
            'width': 1000,
            'margin': dict(t=100, l=100, r=100),
            'showlegend': False,
            'font': {'color': 'black'},
        }
        
        AXIS_STYLE = {
            'showline': True,
            'linewidth': 1,
            'linecolor': 'black',
            'mirror': False,
            'gridcolor': 'lightgray',
            'showgrid': True,
            'zeroline': False,
        }
        
        def create_title(text):
            return {
                'text': text,
                'y': 0.95,
                'x': 0.5,
                'xanchor': 'center',
                'yanchor': 'top',
                'font': {'size': 18, 'color': 'black'}
            }

        # Prepare data
        groups = list(data.keys())
        plot_data = {
            'abundances': [data[group]['total_Absorbance'] for group in groups],
            'abundance_sems': [data[group]['abundance_sem'] for group in groups],
            'counts': [data[group]['unique_peptides'] for group in groups],
            'count_sems': [data[group]['count_sem'] for group in groups]
        }
          
        # Determine which plot to create based on use_count
        if hasattr(self, 'abs_or_count') and self.abs_or_count.value:
            if self.abs_or_count.value == 'Count':
                # Create count figure
                fig = go.Figure()
                # Add count bars
                fig.add_trace(go.Bar(
                    x=groups,
                    y=plot_data['counts'],
                    name='Peptide Count',
                    marker=dict(
                        color=first_color,
                        line=dict(color='black', width=1)
                    ),
                    error_y=dict(
                        type='data',
                        array=plot_data['count_sems'],
                        visible=True,
                        thickness=1.5,
                        width=4,
                        color='#000000'
                    ),
                    hovertemplate=(
                        "Group: %{x}<br>"
                        "Unique Peptides: %{y:.0f}<br>"
                        "SEM: %{error_y.array:.1f}<br>"
                        "<extra></extra>"
                    )
                ))
                
                # Add count labels
                fig.add_trace(go.Scatter(
                    x=groups,
                    y=[c + (s * 1.2) for c, s in zip(plot_data['counts'], plot_data['count_sems'])],
                    mode='text',
                    text=[f"{int(c):,}" for c in plot_data['counts']],
                    textposition='top center',
                    textfont=dict(size=12),
                    showlegend=False,
                    hoverinfo='none'
                ))
      
                fig.update_yaxes(tickformat=",d")
                
            else: # Create abundance figure
                fig = go.Figure()
                # Add abundance bars
                fig.add_trace(go.Bar(
                    x=groups,
                    y=plot_data['abundances'],
                    name='Total Absorbance',
                    marker=dict(
                        color=first_color,
                        line=dict(color='black', width=1)
                    ),
                    error_y=dict(
                        type='data',
                        array=plot_data['abundance_sems'],
                        visible=True,
                        thickness=1.5,
                        width=4,
                        color='#000000'
                    ),
                    hovertemplate=(
                        "Group: %{x}<br>"
                        "Total Abundance: %{y:.2e}<br>"
                        "SEM: %{error_y.array:.2e}<br>"
                        "<extra></extra>"
                    )
                ))
                
                # Add abundance labels
                fig.add_trace(go.Scatter(
                    x=groups,
                    y=[a + s for a, s in zip(plot_data['abundances'], plot_data['abundance_sems'])],
                    mode='text',
                    text=[f"{a:.2e}" for a in plot_data['abundances']],
                    textposition='top center',
                    textfont=dict(size=14),
                    showlegend=False,
                    hoverinfo='none'
                ))


            # Update layout
            fig.update_layout(
                **COMMON_LAYOUT,
                title=create_title(self.plot_title),
                xaxis_title=self.x_axis_label,
                yaxis_title=self.y_axis_label,
                xaxis=AXIS_STYLE,
                yaxis=AXIS_STYLE
            )
            
            # Configure axes
            fig.update_xaxes(
                tickangle=45,
                title_font={"size": 18},
                tickfont={"size": 16}
            )
            
            fig.update_yaxes(
                title_font={"size": 18},
                tickfont={"size": 16},
                gridcolor="lightgray",
                showgrid=True,
                type="log",
                exponentformat="e"
            )
                # Mark generation as complete
        #self.state_manager.generate_completed()
        
        return fig
   
    def generate_plot_title(self):
        selected_groups = self.selected_groups
        if self.plot_type.value != 'Corr. Scatter Plots':
            base_title = f'{self.abs_or_count.value} Distribution - {self.invert_plot.value}' 
        else:
            if len(selected_groups) >2:
                base_title = f'Scatter Plot Matrix'
            else:
                base_title = f"Correlation Scatter Plot {selected_groups[0]} vs. {selected_groups[1]}"

        protein_title = ""
        function_title = ""
        function_filter_prefix = ""

        # Add protein/function filter info
        if hasattr(self, 'plot_func_or_pro'):
            function_filter_prefix = "Filtered By:"
            protein_title = ""
            function_title = ""
            
            # Handle protein selector
            if self.plot_func_or_pro.value in ['Selected Protein(s)', 'Both']:
                    if 'All Proteins' in self.selected_proteins:
                        protein_title = "<br>Protein(s): All"
                    else:
                        selected_proteins = self.selected_proteins
                        if len(selected_proteins) > 3:
                            protein_display = ", ".join(selected_proteins[:3]) + f"... (+{len(selected_proteins)-3} more)"
                        else:
                            protein_display = ", ".join(selected_proteins)
                        protein_title = f"<br>Protein(s): {protein_display}"

            # Handle function selector
            if self.plot_func_or_pro.value in ['Selected Function(s)', 'Both']:
                    if 'All Functions' in self.selected_functions:
                        function_title = "<br>Function(s): All"
                    else:
                        selected_functions = self.selected_functions
                        if len(selected_functions) > 3:
                            function_display = ", ".join(selected_functions[:3]) + f"... (+{len(selected_functions)-3} more)"
                        else:
                            function_display = ", ".join(selected_functions)
                        function_title = f"<br>Function(s): {function_display}"


            # Only add prefix if we have actual filters
            if not protein_title and not function_title:
                function_filter_prefix = ""

            # Combine filter titles without extra spaces
            self.filters_combined = "".join(filter(None, [function_filter_prefix, protein_title, function_title]))
        else:
            # Ensure filters_combined is set even if plot_func_or_pro doesn't exist
            self.filters_combined = ""

        # Generate final title
        if self.title_widget.value != '':
            return self.title_widget.value
            
        if self.plot_func_or_pro.value == 'Both':
            if self.plot_type.value != 'Corr. Scatter Plots':
                return f"{base_title} - {self.invert_plot.value} ({self.filters_combined})"
            else:
                return f"{base_title} - ({self.filters_combined})"

        return base_title
    
    def get_plot_labels(self, use_count=False, selected_groups=None, title=None, orientation=None, is_relative_metric=None):
        """Generate standardized plot labels including x_axis, y_axis, title, and legend title."""
        # Determine if we're using count based on widget value if not explicitly provided
        if self.abs_or_count.value == 'Count':
            use_count = True
        else:
            use_count = False
        selected_groups = self.selected_groups 
        plot_type = self.plot_func_or_pro.value
        # Use provided orientation or get from widget
        if orientation is None:
            orientation = self.invert_plot.value
            
        # Determine if metric is relative if not explicitly provided
        relative_metric = self.metric_type.value
        
        # Get the plot title from custom method or generate based on parameters
        if title is None:
            plot_title = self.generate_plot_title()


        # Determine y-axis title based on metric type
        if use_count:
            metric_base = "Peptide Count"
        else:
            metric_base = "Summed Absrobance"
            


        if plot_type == 'Both' or plot_type == 'No Filter':
            if self.plot_type.value == 'Grouped Bar Plots':
                if use_count:
                    y_axis_label = f"Unique Peptide Count"
                else:
                    y_axis_label = f"log<sub>10</sub> (Summed Absorbance)"

        if relative_metric == 'Relative':
            y_axis_label = f"Relative {metric_base} (%)"
        else:
            y_axis_label = f"{metric_base}"       
       
                
        # Determine x-axis title and legend title based on orientation
        if orientation == 'By Protein':
            x_axis_label = 'Proteins'
            legend_title = 'Samples'
        elif orientation == 'By Function':
            x_axis_label = 'Functions'
            legend_title = 'Samples'
        else: # By Sample
            x_axis_label = 'Samples'
            if plot_type == 'Selected Function(s)' or plot_type == 'Functional vs Non-Functional Peptides':
                legend_title = 'Functions'
            elif plot_type == 'Selectd Protein(s)':
                legend_title = 'Proteins'
            else:
                legend_title = ''

        
        # Update titles from widgets if available
        if hasattr(self, 'legend_widget') and self.legend_widget.value:
            legend_title = self.legend_widget.value
        
        if hasattr(self, 'xlabel_widget') and self.xlabel_widget.value:
            x_axis_label = self.xlabel_widget.value
            
        if hasattr(self, 'ylabel_widget') and self.ylabel_widget.value:
            y_axis_label = self.ylabel_widget.value
            
        # Add group context to legend title if available and not overridden by widget
        # if not hasattr(self, 'legend_widget') or not self.legend_widget.value:
        #    if selected_groups and len(selected_groups) > 0:
        #        group_str = " by " + "/".join(selected_groups)
        #        legend_title += group_str
            
        # Store as strings, not tuples
        self.x_axis_label = str(x_axis_label)
        self.y_axis_label = str(y_axis_label)
        self.plot_title = str(plot_title)
        self.legend_title = str(legend_title)
        
        return (self.x_axis_label, self.y_axis_label, self.plot_title, self.legend_title)
 
    def plot_stacked_bar_scaled(self, selected_groups, use_count=False):

        # Set up items to display based on plot type
        plot_type = self.plot_func_or_pro.value
        
        # Create figure object before adding traces
        fig = go.Figure()
        
        # Check if we're using relative metrics
        is_relative_metric = hasattr(self, 'metric_type') and 'relative' in self.metric_type.value.lower()
        orientation = self.invert_plot.value


        if orientation== 'By Sample':
            # Initialize the scaled_df based on plot type
            if plot_type == 'Selected Function(s)' or plot_type == 'Functional vs Non-Functional Peptides':
                scaled_df = self.function_df.copy()
            elif plot_type == 'Selected Protein(s)':  # Selected Protein(s)
                scaled_df = self.protein_df.copy()
            elif plot_type == 'No Filter':
                scaled_df = self.sample_distribution_summary_df.copy()
            elif plot_type == 'Both':
                scaled_df = safe_concat([self.function_df, self.protein_df])

            # For relative metric, ensure all relative columns are properly calculated
            if is_relative_metric:
                for group in selected_groups:
                    value_col = self.value_cols[group]
                    rel_col = self.rel_cols[group]


            # Calculate total sums for each group
            total_sums = {}
            for group in selected_groups:
                if use_count:
                    if plot_type == 'Selected Function(s)':
                        total_sum = self.function_count_totals_dict[group]
                    elif plot_type == 'Selected Protein(s)':  # Selected Protein(s)
                        total_sum = self.protein_count_bysample_dict[group]
                    elif plot_type == 'Both':
                        total_sum = self.abundance_count_by_sample_dict[group]['unique_peptides']
                    elif plot_type == 'Functional vs Non-Functional Peptides': 
                        total_sum = self.abundance_count_by_sample_dict[group]['unique_peptides']

                else:  # abundance
                    if plot_type == 'Selected Function(s)':
                        total_sum = self.function_absorbance_totals_dict[group]

                    elif plot_type == 'Selected Protein(s)':  # Selected Protein(s)
                        sample_key = self.value_cols[group]
                        if sample_key in self.sum_df['Sample'].values:
                            total_sum = self.sum_df.loc[self.sum_df['Sample'] == sample_key, 'Total_Sum'].values[0]
                        else:
                            total_sum = self.protein_df[sample_key].sum()
                    elif plot_type == 'Both':
                        total_sum = self.abundance_count_by_sample_dict[group]['total_abundance']
                    elif plot_type == 'Functional vs Non-Functional Peptides': 
                        total_sum = self.abundance_count_by_sample_dict[group]['total_abundance']

                if plot_type != 'No Filter':
                    total_sums[group] = total_sum

             # Handle minor items and get selected items
            if plot_type == 'Selected Function(s)':
                selected_items =  self.selected_functions
                #minor_label = 'Minor Functions'
            elif plot_type == 'Selected Protein(s)':# Selected Protein(s)
                selected_items = self.selected_proteins
               #minor_label = 'Minor Proteins'
            elif plot_type == 'No Filter':
                selected_items = selected_groups
                if use_count:
                    total_sum = scaled_df[scaled_df['Sample'] =='Total']['Peptide_Count'].sum()
                else:
                    total_sum = scaled_df[scaled_df['Sample'] == 'Total']['Total_Abundance'].sum()
            elif plot_type == 'Functional vs Non-Functional Peptides':
                selected_items = list(self.function_df['Description'].unique())
            # Get colors
            if plot_type == 'Selected Function(s)' or plot_type == 'Both' or plot_type == 'Functional vs Non-Functional Peptides':
                colors = [self.function_color_map.get(f, '#CCCCCC') for f in selected_items]
            elif plot_type == 'Selected Protein(s)' or plot_type == 'Both':  # Selected Protein(s)
                colors = self.get_color_sequence(len(self.selected_proteins))
                if self.plot_minor.value:
                    colors.append('#808080')
            elif plot_type == 'No Filter':
                colors = self.get_color_sequence(len(selected_groups))

            # Add traces for each item
            for i, item in enumerate(selected_items):
                y_values = []
                hover_texts = []
                
                # Create display name for the item
                display_name = self.redact_string_descriptions(item)

                if plot_type != 'No Filter':
                    item_data = scaled_df[scaled_df['Description'] == item]
                    for group in selected_groups:
                        if len(item_data) > 0:
                            value_col = self.value_cols[group]
                            rel_col = self.rel_cols[group]
                            total_value_unique = total_sums[group]
                            if len(item_data) > 0:
                                value = item_data[value_col].iloc[0]
                                rel_value = item_data[rel_col].iloc[0] if rel_col in item_data.columns else 0


                                # Calculate y-value based on metric type
                                if is_relative_metric:
                                    y_value = rel_value
                                else:
                                    y_value = rel_value/100 * total_value_unique if total_value_unique > 0 else 0
                    
                            y_values.append(y_value)

                            # Create hover text
                            hover_text = (
                                f"{'Function' if plot_type == 'Selected Function(s)' else 'Protein'}: {item}<br>"
                                f"Sample: {group}<br>"
                                f"Relative {self.metric_name}: {rel_value:.2f}%<br>"
                                f"Absolute {self.metric_name}: {value:{self.num_format}}<br>"
                            )
                            hover_texts.append(hover_text)
                        
                        else:
                            y_values.append(0)
                            hover_texts.append(f"No data for {item} in {group}")
                else:  
                    item_data = scaled_df[scaled_df['Sample'] == item]
                    if len(item_data) > 0:
                        if plot_type == 'No Filter':
                            value_col = 'Peptide_Count' if use_count else 'Total_Abundance'
                            rel_col = 'Relative_Peptide_Count' if use_count else 'Relative_Total_Abundance'
                            rel_value = item_data[rel_col].iloc[0] if rel_col in item_data.columns else 0

                            #total_value_unique = total_sums[group][0]
                            value = item_data[value_col].iloc[0]
                            value = int(float(value))
                            rel_value = int(float(rel_value))

                            # Calculate y-value based on metric type
                            if is_relative_metric:
                                y_value = rel_value
                            else:
                               # y_value = rel_value/100 * total_value_unique if total_value_unique > 0 else 0
                                y_value = value
                            y_values.append(y_value)

                            # Create hover text
                            new_name = 'Peptide Count' if use_count else 'Total Absorbance'
                            hover_text = (

                                f"Sample: {group}<br>"
                                f"Relative {new_name}: {rel_value:.2f}%<br>"
                                f"Absolute {new_name}: {value:{self.num_format}}<br>"
                            )
                            hover_texts.append(hover_text)
                color = colors[i] if i < len(colors) else '#CCCCCC'
                if plot_type != 'No Filter' and self.plot_minor.value == True:
                    if item == 'Minor Functions' or item == 'Minor Proteins':
                        color = '#808080'
                # Add trace with explicit legend settings
                fig.add_trace(go.Bar(
                    name=display_name,
                    x=selected_groups if plot_type != 'No Filter' else ['Total'],
                    y=y_values,
                    marker_color=color,
                    hovertext=hover_texts,
                    hoverinfo='text',
                    showlegend=True  # Explicitly show in legend
                ))

            # Filter out zero values and sort the data
            if plot_type == 'Selected Function(s)' or plot_type == 'Functional vs Non-Functional Peptides':
                # Sort functions based on total values
                if not is_relative_metric:
                    function_totals_dict = scaled_df[self.value_cols.values()].sum(axis=1)
                    scaled_df['Order'] = function_totals_dict
                    scaled_df = scaled_df.sort_values(by='Order', ascending=False).reset_index(drop=True)
            elif plot_type == 'Selected Protein(s)':  # Selected Protein(s)
                # Sort proteins based on selected_proteins order
                if hasattr(self, 'selected_proteins'):
                    description_order = {desc: i for i, desc in enumerate(self.selected_proteins)}
                    scaled_df['Order'] = scaled_df['Description'].map(description_order)
                    scaled_df = scaled_df.sort_values(by='Order').reset_index(drop=True)              

        else:  # 'By Protein or Function'
            # Get colors based on selected color scheme
            colors = self.get_color_sequence(len(selected_groups))
        
            all_items = []
            items_to_process = []
            if orientation == 'By Function' or plot_type == 'Selected Function(s)' or plot_type == 'Both':
                items_to_process = self.selected_functions
            elif orientation == 'By Protein' or plot_type == 'Selected Protein(s)' or plot_type == 'Both':
                items_to_process = self.selected_proteins
            if plot_type == 'Functional vs Non-Functional Peptides':
                items_to_process = []
                items_to_process = list(self.function_df['Description'].unique())

            # Define dictionary to store totals
            proteinfunc_totals = {}
            # Populate proteinfunc_totals dictionary
            for profunc in items_to_process:
                if (orientation == 'By Function' and profunc in self.function_distribution_dict):
                    proteinfunc_totals[profunc] = self.function_distribution_dict[profunc]
                elif (orientation == 'By Protein' and profunc in self.protein_sample_distribution_dict):
                    proteinfunc_totals[profunc] = self.protein_sample_distribution_dict[profunc]        
                if plot_type == 'Functional vs Non-Functional Peptides':
                    proteinfunc_totals[profunc] = self.function_distribution_dict[profunc]

            # Get unique proteins/functions for x-axis
            all_items = list(proteinfunc_totals.keys())
            # Create display names for all items
            display_items = [self.redact_string_descriptions(item) for item in all_items]

            # Create a trace for each group
            for i, group in enumerate(selected_groups):
                y_values = []
                hover_texts = []
                
                # For each protein/function, get its value for this group
                for profunc in all_items:
                    data = proteinfunc_totals[profunc]
                                

                    # Skip if no data for this group
                    if 'values' not in data or group not in data['values']:
                        y_values.append(0)
                        hover_texts.append(f"No data for {profunc} in {group}")
                        continue
                        
                    #print("data",data)
                    abs_value = data['values'][group]
                    total_value = data['total_absorbance']
                    rel_percentage = data['relative'][group] if 'relative' in data and group in data['relative'] else 0
                    
                    # Calculate y-value based on metric type
                    if is_relative_metric:
                        y_value = rel_percentage
                    else:
                        if use_count:
                            # Get unique peptide count for the function
                            unique_peptide_total = data.get('unique_peptide_count', 0)
                            # Scale by the relative percentage
                            y_value = unique_peptide_total * (rel_percentage / 100) if unique_peptide_total > 0 else 0
                        else:
                            # Get the total absorbance for the function
                            total_absorbance = data.get('total_absorbance', 0)
                            # Scale by the relative percentage
                            y_value = total_absorbance * (rel_percentage / 100) if total_absorbance > 0 else 0

                    y_values.append(y_value)
                    
                    # Format hover text
                    abs_count_label = "Count" if use_count else "Abundance"
                    hover_text = (
                        f"{'Function' if plot_type == 'Selected Function(s)' else 'Protein'}: {profunc}<br>"
                        f"Sample: {group}<br>"
                        f"Sample's contribution: {rel_percentage:.2f}%<br>"
                        f"{abs_count_label} in sample: {abs_value:{self.num_format}}<br>"
                    )
                    hover_texts.append(hover_text)
                                # Get color for this item

                color = colors[i] if i < len(colors) else '#CCCCCC'

                # Add trace for this group (moved outside the inner loop)
                fig.add_trace(go.Bar(
                    name=group,
                    x=display_items,  # Use display_items for x-axis
                    y=y_values,
                    marker_color=color,
                    hovertext=hover_texts,
                    hoverinfo='text',
                    showlegend=True
                ))
        if is_relative_metric:
            fig.update_layout(yaxis_range=[0, 100])
    
  

        fig.update_layout(
            barmode='stack',
            title={
                'text': self.plot_title,
                'y': 0.95,
                'x': 0.5,
                'xanchor': 'center',
                'yanchor': 'top',
                'font': {"size": 18, 'color': 'black'}
            },
            xaxis_title=self.x_axis_label,
            yaxis_title=self.y_axis_label,
            yaxis=dict(
                showline=True,
                gridcolor='lightgray',
                showgrid=True,
                showticklabels=True,
                linewidth=1,
                linecolor='black',
                mirror=False,
                zeroline=False,  # Don't show zero line
                range=[0, 100] if is_relative_metric else None  # Set range to [0,100] for relative metrics
            ),
            xaxis=dict(
                showline=True,
                linewidth=1,
                linecolor='black',
                mirror=False,
                tickangle=-90 if orientation == 'By Sample' else 45  # Adjust tick angle based on orientation
            ),
            legend_title=self.legend_title,
            legend={
                'yanchor': "top",
                'y': 0.95,
                'xanchor': "left",
                'x': 1.05,
                'traceorder': 'normal',
                'font': {"size": 16, 'color': 'black'},
                'bgcolor': 'rgba(255, 255, 255, 0.9)'
            },
            showlegend=True,
            template='plotly_white',
            height=820,
            width=1200,
            margin=dict(
                t=100,
                l=100,
                r=100,
                b=100
            ),
            hoverlabel=dict(
                bgcolor="white",
                font_size=14,
                font_family="Arial"
            ))
        
        fig.update_xaxes(
            tickangle=45,
            title_font={"size": 18},
            tickfont={"size": 16},
            tickfont_color="black",  # Black tick labels
            title_font_color="black",  # Black axis title                
        )
        
        # Update Y axis formatting based on metric
        if is_relative_metric:
            tick_format = ".1f"  # Format as percentage with one decimal place for relative metrics
            showticklabels_tf = True
        else:
            if use_count:
                tick_format = ""  # Regular integers for counts
            else:
                tick_format = ".1e"  # Scientific notation for abundance
            showticklabels_tf = False
        fig.update_yaxes(
            title_font={"size": 18},
            tickfont={"size": 16},
            tickfont_color="black",  # Black tick labels
            title_font_color="black",  # Black axis title
            gridcolor="lightgray",  # Light gray grid lines
            showgrid=True,  # Show grid lines
            zeroline=False,  # Hide zero line
            exponentformat='E',
            showexponent='all',
            tickformat=tick_format,
            showticklabels=showticklabels_tf
        )
        
        # Always add scatter trace for totals, but calculate differently based on orientation
        if not is_relative_metric:  # Only show totals for absolute metrics
            if orientation== 'By Sample':
                # Sample-wise totals calculation
                # Format based on metric
                if plot_type == 'No Filter':
                    total_sum = int(float(total_sum))

                if use_count:
                    text_format = [f"{int(total_sums[group])}" for group in selected_groups] if plot_type != 'No Filter' else [f"{int(total_sum)}"]
                else: #abundance
                    text_format = [f"{total_sums[group]:.2e}" for group in selected_groups] if plot_type != 'No Filter' else [f"{total_sum:.2e}"]
        
                fig.add_trace(go.Scatter(
                    x=selected_groups if plot_type != 'No Filter' else ['Total'],
                    y=[total_sums[group] for group in selected_groups] if plot_type != 'No Filter' else [total_sum],
                    mode='text',
                    text=text_format,
                    textposition='top center',
                    textfont=dict(size=12, color='black'),
                    showlegend=True,
                    name=f'Show Total {self.metric_name}',
                    hoverinfo='none',
                    texttemplate='%{text}'
                ))
            else: #By Protein
                # Add totals display
                if plot_type == 'Selected Function(s)' or self.invert_plot.value == 'By Function':
                    # Function-wise totals
                    items_to_show_temp = [f for f in self.selected_functions]
                elif plot_type == 'Selected Protein(s)' or self.invert_plot.value == 'By Protein':
                    items_to_show_temp = [p for p in self.selected_proteins]
                if plot_type == 'Functional vs Non-Functional Peptides':
                    items_to_show_temp = list(self.function_df['Description'].unique())
                text_format = []
                y_values = []
                
                items_to_show = []
                # Convert set to list to avoid the error
                for item in items_to_show_temp:
                    if item not in items_to_show:

                        items_to_show.append(item)

                # Create display names for items to show
                display_items_to_show = [self.redact_string_descriptions(item) for item in items_to_show]

                for item in items_to_show:
                    if item in proteinfunc_totals:
                        if use_count:
                            # Use the unique peptide count
                            total = proteinfunc_totals[item]['unique_peptide_count']
                            text_format.append(f"{int(total)}")
                            y_values.append(total)
                        else: #abundance
                            # Sum the unique peptide absorbance values
                            total = proteinfunc_totals[item]['total_absorbance']
                            text_format.append(f"{total:.2e}")
                            y_values.append(total)
                    else:
                        text_format.append("0")
                        y_values.append(0)
                # Add the totals trace
                fig.add_trace(go.Scatter(
                    x=display_items_to_show,
                    y=y_values,
                    mode='text',
                    text=text_format,
                    textposition='top center',
                    textfont=dict(size=12, color='black'),
                    showlegend=True,
                    name=f'Show Total {self.metric_name}',
                    hoverinfo='none',
                    texttemplate='%{text}'
                ))     
                    
        # Mark generation as complete
       #self.state_manager.generate_completed()
        
        return fig     
    
    def create_grouped_bar_plot(self, selected_groups, use_count=False):
        """Generate interactive Plotly grouped bar plots for proteins or functions"""
        # Check if we have dat
        if (self.plot_func_or_pro.value == 'Selected Function(s)' and not self.function_distribution_dict) or \
        (self.plot_func_or_pro.value != 'Selected Function(s)' and (self.protein_df is None or len(self.protein_df) == 0)):
            print("No data available for plotting.")
            return None

        try:
            # Check if we're using relative metrics
            is_relative_metric = hasattr(self, 'metric_type') and 'relative' in self.metric_type.value.lower()
            
            # Create figure
            fig = go.Figure()
            display_items = []
            # Determine orientation
            orientation = self.invert_plot.value if hasattr(self, 'invert_plot') else 'By Sample'
            
            # Set up items to display based on plot type
            plot_type = self.plot_func_or_pro.value
            if orientation == 'By Function':
                display_items = self.selected_functions
                data_dict = self.function_distribution_dict
            elif orientation == 'By Protein': # orientation == 'By Protein'
                display_items = self.selected_proteins
                data_dict = self.protein_sample_distribution_dict
 
            if orientation == 'By Sample' and plot_type == 'Selected Function(s)':
                display_items = self.selected_functions
                data_dict = self.function_distribution_dict
            elif orientation == 'By Sample' and plot_type == 'Selected Protein(s)':
                display_items = self.selected_proteins
                data_dict = self.protein_sample_distribution_dict
            
            if plot_type == 'Functional vs Non-Functional Peptides':
                display_items = ['Functional Peptides','Non-Functional Peptides']
                data_dict = self.function_distribution_dict

            # Based on orientation, determine categories and bars
            if orientation != 'By Sample':
                # Filter to only include items that have data
                display_items = [item for item in display_items if item in data_dict]
                
                if not selected_groups or not display_items:
                    print("No valid groups or items selected for plotting.")
                    return None
                categories = display_items
                bar_groups = selected_groups
                color_sequence = self.get_color_sequence(len(selected_groups))
                color_mapping = {group: color_sequence[i] for i, group in enumerate(selected_groups)}
            else:  # 'By Sample'
                categories = selected_groups
                bar_groups = display_items
                color_sequence = self.get_color_sequence(len(display_items))
                color_mapping = {item: color_sequence[i] for i, item in enumerate(display_items)}
                # Special color for Minor items
                minor_key = 'Minor Functions' if plot_type == 'Selected Function(s)' else 'Minor Proteins'
                if minor_key in bar_groups:
                    color_mapping[minor_key] = '#808080'  # Grey
            
            # Calculate bar positions
            n_bar_groups = len(bar_groups)
            bar_width = 0.8 / n_bar_groups
            
            # Create display names for categories
            display_categories = [self.redact_string_descriptions(cat) for cat in categories]
            
            # For each bar group, create a trace
            for idx, bar_group in enumerate(bar_groups):
                x_positions = [i + (idx - n_bar_groups/2 + 0.5) * bar_width for i in range(len(categories))]
                values = []
                hover_text = []
                
                # Create display name for the bar group
                display_bar_group = self.redact_string_descriptions(bar_group)
                
                for i, category in enumerate(categories):
                    if orientation != 'By Sample':
                        item = category
                        group = bar_group
                    else:
                        item = bar_group
                        group = category
                    
                    # Skip if item doesn't have data
                    if item not in data_dict:
                        values.append(0)
                        hover_text.append(f"{'Function' if orientation == 'By Function' else 'Protein'}: {item}<br>"
                                        f"Sample: {group}<br>Value: 0")
                        continue
                    
                    # Get data
                    item_data = data_dict[item]
                    # Get the appropriate values based on metric type
                        
                    if use_count:
                        abs_value = item_data['counts'].get(group, 0)
                        rel_percentage = item_data['count_relative'].get(group, 0)
                    else:
                        abs_value = item_data['absorbance'].get(group, 0)
                        rel_percentage = item_data['absorbance_relative'].get(group, 0)
                    
                    # Determine y-value based on metric type
                    if is_relative_metric:
                        y_value = rel_percentage
                        hover = (f"{'Function' if plot_type == 'Selected Function(s)' else 'Protein'}: {item}<br>"
                                f"Sample: {group}<br>"
                                f"Relative Contribution: {rel_percentage:.1f}%<br>"
                                f"{self.metric_name}: {abs_value:{self.num_format}}")
                    else:
                        y_value = abs_value
                        hover = (f"{'Function' if plot_type == 'Selected Function(s)' else 'Protein'}: {item}<br>"
                                f"Sample: {group}<br>"
                                f"{self.metric_name}: {abs_value:{self.num_format}}<br>"
                                f"Relative Contribution: {rel_percentage:.1f}%")
                    
                    values.append(y_value)
                    hover_text.append(hover)
                
                # Only add trace if we have valid values
                if any(v > 0 for v in values):
                    fig.add_trace(go.Bar(
                        name=display_bar_group,
                        x=x_positions,
                        y=values,
                        width=bar_width * 0.9,
                        marker_color=color_mapping.get(bar_group, 'gray'),
                        hovertext=hover_text,
                        hoverinfo='text'
                    ))
                        
        
            # Update layout
            fig.update_layout(
                title={
                    'text': self.plot_title,
                    'y': .975,
                    'x': 0.5,
                    'xanchor': 'center',
                    'yanchor': 'top',
                    'font': {'size': 18, 'color': 'black'}
                },
                xaxis_title=self.x_axis_label,
                yaxis_title=self.y_axis_label,
                legend_title=self.legend_title,
                legend={'yanchor': "top", 'y': 1.0, 'xanchor': "left", 'x': 1.05, 'traceorder': 'normal', 'font': {'size': 12, 'color': 'black'}},
                showlegend=True,
                template='plotly_white',
                height=750,
                width=1100,
                margin=dict(t=100, l=100, r=200),
                hoverlabel=dict(
                    bgcolor="white",
                    font_size=12,
                    font_family="Arial"
                ),
                barmode='group',
                xaxis=dict(
                    showline=True,
                    linewidth=1,
                    linecolor='black',
                    mirror=False
                ),
                yaxis=dict(
                    showline=True,
                    linewidth=1,
                    linecolor='black',
                    mirror=False
                )
            )
            
            # Update axis properties
            fig.update_xaxes(
                ticktext=display_categories,
                tickvals=list(range(len(categories))),
                tickangle=45,
                title_font={"size": 18},
                tickfont={"size": 16},
                tickfont_color="black",
                title_font_color="black",
            )
            
            # Set y-axis format based on plot type
            if use_count and not is_relative_metric:
                # Absolute count
                fig.update_yaxes(
                    type='linear',
                    tickformat=",d",  # Format with commas for thousands
                    title_font={"size": 18},
                    tickfont={"size": 16},
                    tickfont_color="black",
                    title_font_color="black",
                    gridcolor="lightgray",
                    showgrid=True,
                    zeroline=False,
                )
            elif not use_count and not is_relative_metric:
                # Absolute abundance
                fig.update_yaxes(
                    type='log',
                    exponentformat='E',
                    showexponent='all',
                    title_font={"size": 18},
                    tickfont={"size": 16},
                    tickfont_color="black",
                    title_font_color="black",
                    gridcolor="lightgray",
                    showgrid=True,
                    zeroline=False,
                )
            else:
                # Relative metrics (both count and abundance)
                fig.update_yaxes(
                    type='linear',
                    range=[0, 100],
                    title_font={"size": 18},
                    tickfont={"size": 16},
                    tickfont_color="black",
                    title_font_color="black",
                    gridcolor="lightgray",
                    showgrid=True,
                    zeroline=False,
                )
                
            # Mark generation as complete
            #self.state_manager.generate_completed()
            
            return fig
            
        except Exception as e:
            print(f"Error creating grouped bar plot: {str(e)}")
            traceback.print_exc()
            return None
           
    def create_pie_charts(self, selected_groups, use_count=False):
        """Create pie charts for protein or function data with pre-calculated counts or abundance"""
        try:
            # Determine if we're plotting proteins, functions, or both
            plot_type = self.plot_func_or_pro.value if hasattr(self, 'plot_func_or_pro') else 'Selected Protein(s)'

            if hasattr(self, 'invert_plot'):
                orientation = self.invert_plot.value

            
            if orientation == 'By Sample':
                # One pie chart per sample
                # Use function_df or protein_df based on plot type
                if plot_type == 'Selected Function(s)' or plot_type == 'Functional vs Non-Functional Peptides':
                    if not hasattr(self, 'function_df') or self.function_df is None or self.function_df.empty:
                        print("No function data available to plot")
                        return None
                    scaled_df = self.function_df.copy()
                elif plot_type == 'Selected Protein(s)':  # Default to protein plotting
                    if not hasattr(self, 'protein_df') or self.protein_df is None or self.protein_df.empty:
                        print("No protein data available to plot")
                        return None
                    scaled_df = self.protein_df.copy()
               
                # Inside create_pie_charts, modify the section for 'Both' or 'No Filter':
                elif plot_type == 'Both' or plot_type == 'No Filter':
                    try:
                        # Get metrics for all samples
                        metrics_dict = self.abundance_count_by_sample_dict
                        if not metrics_dict:
                            print("No data available for plotting")
                            return None
                            
                        # Create a single figure
                        fig = go.Figure()
                                        
                        # Prepare data for plotting
                        labels = list(metrics_dict.keys())
                        if use_count:
                            values = [group_data_dict['unique_peptides'] for group_data_dict in metrics_dict.values()]
                            relative_values = [group_data_dict['relative_peptides'] for group_data_dict in metrics_dict.values()]
                            metric_label = 'Peptide Count'
                            hover_format = ',.0f'  # Format as integer with commas
                        else:
                            values = [group_data_dict['total_abundance'] for group_data_dict in metrics_dict.values()]
                            relative_values = [group_data_dict['relative_abundance'] for group_data_dict in metrics_dict.values()]
                            metric_label = 'Total Abundance'
                            hover_format = '.2e'  # Scientific notation
                            
                        # Get colors for samples
                        sample_colors = self.get_color_sequence(len(metrics_dict))
                        
                        # Create customdata array for hover template
                        customdata = relative_values
                        
                        # Create single pie chart
                        fig.add_trace(
                            go.Pie(
                                labels=labels,
                                values=values,
                                name='Sample Distribution',
                                marker_colors=sample_colors,
                                textposition='inside',
                                textinfo='percent',
                                customdata=customdata,  # Add the relative values as custom data
                                hovertemplate=(
                                    "Sample: %{label}<br>"
                                    f"{metric_label}: %{{value:{hover_format}}}<br>"
                                    "Percentage: %{percent}<br>"
                                    f"Relative {metric_label}: %{{customdata:.1f}}%<br>"
                                    "<extra></extra>"
                                ),
                                hole=0.0,
                                showlegend=True
                            )
                        )
                        
                        # Update layout for single pie chart
                        fig.update_layout(
                            height=600,
                            width=800,
                            title={
                                'text': self.plot_title,
                                'y': 0.95 ,
                                'x': 0.5,
                                'xanchor': 'center',
                                'yanchor': 'top',
                                'font': {"size": 20, 'color': 'black'}
                            },
                            showlegend=True,
                            legend={
                                'title': 'Samples',
                                'yanchor': "middle",
                                'y': 0.5,
                                'xanchor': "left",
                                'x': 1.1,
                                'font': {"size": 12},
                            },
                            margin=dict(t=100, b=50, l=50, r=150),
                            paper_bgcolor='rgba(255,255,255,1)',
                            plot_bgcolor='rgba(255,255,255,1)',
                            font=dict(
                                family="Arial, sans-serif",
                                size=14,
                                color="black"
                            )
                        )
                        
                        return fig
                    
                    except Exception as e:
                        print(f"Error creating single pie chart: {str(e)}")
                        import traceback
                        traceback.print_exc()
                        return None

                # Get the list of groups/samples to plot
                if hasattr(self, 'selected_groups'):
                    selected_groups = self.group_selector.value
                else:
                    # Fallback if selected_groups is not available
                    selected_groups = list(self.value_cols.keys()) if isinstance(self.value_cols, dict) else [col.replace(self.value_prefix, '') for col in self.value_cols]
                
                # Map group names to actual column names
                sample_columns = []
                sample_names = []
                
                if isinstance(self.value_cols, dict):
                    # If value_cols is a dictionary (group -> column)
                    for group in selected_groups:
                        if group in self.value_cols:
                            sample_columns.append(self.value_cols[group])
                            sample_names.append(group)
                else:
                    # If value_cols is a list of columns
                    sample_columns = self.value_cols
                    sample_names = [col.replace(self.value_prefix, '') for col in sample_columns]
                
                # Filter to only selected items (proteins or functions)
                if (plot_type == 'Selected Function(s)' or plot_type == 'Both'):
                    display_items = list(self.selected_functions)
                    if 'All Functions' in display_items:
                        display_items = [f for f in scaled_df['Description'].unique() if f != 'All Functions']
                    if hasattr(self, 'plot_minor') and self.plot_minor.value:
                        if 'Minor Functions' in scaled_df['Description'].values:
                            display_items.append('Minor Functions')
                
                elif (plot_type == 'Selected Protein(s)' or plot_type == 'Both'):  # Proteins
                    if hasattr(self, 'selected_proteins') and self.selected_proteins:
                        display_items = self.selected_proteins.copy()
                elif plot_type == 'Functional vs Non-Functional Peptides':
                    display_items = list(self.function_df['Description'].unique())

                scaled_df = scaled_df[scaled_df['Description'].isin(display_items)]
                
                # Calculate grid layout
                num_samples = len(sample_columns)
                num_cols = min(3, num_samples)  # Maximum 3 columns
                num_rows = (num_samples + num_cols - 1) // num_cols  # Ceiling division
                if num_rows > 4:
                    display(HTML(f"""
                    <div style='color: #856404; background-color: #fff3cd; border: 1px solid #ffeeba; border-radius: 4px; padding: 10px; margin: 10px 0;'>
                        <strong>Warning:</strong> Creating a large subplot grid with {num_rows} rows and {num_cols} columns.<br>
                        This may affect performance. Consider limiting the number through the protein or function selector.
                    </div>
                    """))

                # Apply redact function to sample names for subplot titles
                redacted_sample_names = [self.redact_string_descriptions(name, max_length=50) for name in sample_names]

                # Create figure with grid layout
                fig = make_subplots(
                    rows=num_rows,
                    cols=num_cols,
                    specs=[[{'type': 'pie'} for _ in range(num_cols)] for _ in range(num_rows)],
                    subplot_titles=redacted_sample_names
                )
                
                # Get unique items for coloring
                if (plot_type == 'Selected Function(s)' or plot_type == 'Both'):
                    # Use existing function color map
                    color_map = self.function_color_map
                    unique_items = scaled_df['Description'].unique().tolist()

                elif (plot_type == 'Selected Protein(s)' or plot_type == 'Both'):  # Proteins
                    unique_items = scaled_df['Description'].unique().tolist()
                    if 'Minor Proteins' in unique_items:
                       unique_items.remove('Minor Proteins')

                elif plot_type == 'Functional vs Non-Functional Peptides':
                    unique_items = list(self.function_df['Description'].unique())

                # Use the existing color sequence function for proteins
                item_colors = self.get_color_sequence(len(unique_items))
                
                # Create a color map, setting Minor Proteins to grey
                color_map = {item: color for item, color in zip(unique_items, item_colors)}
                if 'Minor Proteins' in scaled_df['Description'].values:
                    color_map['Minor Proteins'] = '#808080'  # Grey color for minor proteins

                # First pie chart will set the legend for all
                first_chart = True
                
                # Create a pie chart for each sample
                for i, col_name in enumerate(sample_columns):
                    # Calculate which row and column this chart belongs in
                    row_idx = i // num_cols + 1
                    col_idx = i % num_cols + 1
                    
                    sample_name = sample_names[i]
                    
                    # Get data for this sample
                    sample_data = scaled_df[['Description', col_name]].copy()
                    sample_data = sample_data[sample_data[col_name] > 0]
                    
                    if sample_data.empty:
                        continue
                                        
                    # Sort by value but ensure Minor items are at the end
                    try:
                        # Check for either Minor Proteins or Minor Functions using any()
                        has_minor = (
                            sample_data['Description'].str.contains('Minor Proteins', na=False).any() or 
                            sample_data['Description'].str.contains('Minor Functions', na=False).any()
                        )
                        
                        if has_minor:
                            # Create mask for minor items
                            minor_mask = (
                                sample_data['Description'].str.contains('Minor Proteins', na=False) | 
                                sample_data['Description'].str.contains('Minor Functions', na=False)
                            )
                            
                            # Separate minor and other rows
                            minor_rows = sample_data[minor_mask]
                            other_rows = sample_data[~minor_mask]
                            
                            # Sort other rows and concatenate with minor rows
                            other_rows = other_rows.sort_values(by=col_name, ascending=False)
                            sample_data = safe_concat([other_rows, minor_rows], ignore_index=True)
                        else:
                            # If no minor items, just sort normally
                            sample_data = sample_data.sort_values(by=col_name, ascending=False)
                            
                    except Exception as e:
                        print(f"Error during sorting: {str(e)}")
                        # Fallback to basic sorting if anything goes wrong
                        sample_data = sample_data.sort_values(by=col_name, ascending=False)
                    
                    # Get colors for the current sample's items
                    colors = [color_map.get(item, '#CCCCCC') for item in sample_data['Description']]
                                        
                    # First determine the label type
                    try:
                        # Split the plot type and get base word
                        plot_words = orientation.split()
                        if len(plot_words) > 1:
                            base_word = plot_words[1]  # e.g., "Proteins" or "Functions"
                            plot_type_label = base_word[:-1]  # e.g., "Protein" or "Function"
                        else:
                            # Fallback if plot_type doesn't have multiple words
                            plot_type_label = "Item"
                                                # Create the pie chart
                        display_label = [self.redact_string_descriptions(l) for l in sample_data['Description']]
                        # Create the pie chart
                        fig.add_trace(
                            go.Pie(
                                labels=display_label,
                                values=sample_data[col_name],
                                name=sample_name,
                                marker_colors=colors,
                                textposition='inside',
                                textinfo='percent',
                                hovertemplate=(
                                    f"{plot_type_label}: %{{label}}<br>"
                                    f"{self.metric_name}: %{{value:{self.num_format}}}<br>"
                                    f"Percentage: %{{percent}}<br>"
                                    f"Sample: {sample_name}<br>"
                                    f"<extra></extra>"
                                ),
                                hole=0.0,
                                showlegend=first_chart
                            ),
                            row=row_idx, col=col_idx  # Add these parameters

                        )
                    except Exception as e:
                        print(f"Error processing plot type label: {str(e)}")
                        # Fallback to generic label if there's an error
                        plot_type_label = "Item"
                        
            else:  # 'By Protein or By Function'
                if orientation == 'By Function':
                    # Use function distribution data
                    if not hasattr(self, 'function_distribution_dict') or not self.function_distribution_dict:
                        print("No function distribution data available")
                        return None
                    
                    distribution_data = self.function_distribution_dict
                    unique_items = list(distribution_data.keys())
                    
                    # Filter functions based on selection
                    if plot_type != 'Functional vs Non-Functional Peptides':
                        selected_functions = self.selected_functions
                        if 'All Functions' not in selected_functions:
                            filtered_items = []
                            for item in selected_functions:
                                if item in distribution_data:
                                    filtered_items.append(item)
                            
                            if self.plot_minor.value and 'Minor Functions' in distribution_data:
                                filtered_items.append('Minor Functions')
                            
                            unique_items = filtered_items
                        
                else:  # ortientation == 'By Protein'
                    if not hasattr(self, 'protein_sample_distribution_dict') or not self.protein_sample_distribution_dict:
                        print("No protein distribution data available")
                        return None
                    
                    distribution_data = self.protein_sample_distribution_dict
                    unique_items = list(distribution_data.keys())
                    
                    # Filter to only include proteins we want to plot
                    if hasattr(self, 'selected_proteins') and self.selected_proteins:
                        filtered_items = []
                        for item in self.selected_proteins:
                            if item in distribution_data:
                                filtered_items.append(item)
                                               
                        unique_items = filtered_items
                
                
                
                #unique_items_temp = unique_items
                #unique_items = []

                #for item in unique_items_temp:
                #    if item not in unique_items:
                #        unique_items.append(item)
                
                # Create pie charts using the selected distribution data
                num_items = len(unique_items)
                num_cols = min(3, num_items)
                num_rows = (num_items + num_cols - 1) // num_cols
                # Add this before creating the figure
                if num_rows > 4:
                    display(HTML(f"""
                    <div style='color: #856404; background-color: #fff3cd; border: 1px solid #ffeeba; border-radius: 4px; padding: 10px; margin: 10px 0;'>
                        <strong>Warning:</strong> Creating a large subplot grid with {num_rows} rows and {num_cols} columns.<br>
                        This may affect performance. Consider limiting the number through the protein or function selector.
                    </div>
                    """))

                # Apply redact function to unique items for subplot titles
                redacted_items = [self.redact_string_descriptions(item, max_length=50) for item in unique_items]

                # Create figure with grid layout
                fig = make_subplots(
                    rows=num_rows, cols=num_cols,
                    specs=[[{'type': 'pie'} for _ in range(num_cols)] for _ in range(num_rows)],
                    subplot_titles=redacted_items
                )
                
                # Get sample colors - use a different color sequence for samples
                sample_colors = self.get_color_sequence(len(selected_groups))
                sample_color_map = {
                    group.replace('Avg_', ''): color 
                    for group, color in zip(selected_groups, sample_colors)
                }
                
                # First pie chart will set the legend for all
                first_chart = True
                
                # Create a pie chart for each protein/function
                for i, item in enumerate(unique_items):
                    row_idx = i // num_cols + 1
                    col_idx = i % num_cols + 1
                    
                    item_data = distribution_data[item]
                    
                    if use_count:
                        values_dict = item_data['counts']
                        total = item_data['total_count']
                    else:
                        values_dict = item_data['absorbance']
                        total = item_data['total_absorbance']
                    
                    values = []
                    labels = []
                    colors_list = []
                    
                    for group in selected_groups:
                        # Remove 'Avg_' prefix if present for matching
                        group_key = group.replace('Avg_', '')
                        value = values_dict.get(group_key, 0)
                        if value > 0:
                            values.append(value)
                            labels.append(group_key)
                            colors_list.append(sample_color_map[group_key])
                    
                    if not values:
                        continue
                    display_label = [self.redact_string_descriptions(l) for l in labels]
                    fig.add_trace(
                        go.Pie(
                            labels=display_label,
                            values=values,
                            name=item,
                            marker_colors=colors_list,
                            textposition='inside',
                            textinfo='percent',
                            hovertemplate=(
                                f"Sample: %{{label}}<br>"
                                f"{self.metric_name}: %{{value:{self.num_format}}}<br>"
                                f"Percentage: %{{percent}}<br>"
                                f"Total {self.metric_name}: {total:{self.num_format}}<br>"
                                f"<extra></extra>"
                            ),
                            hole=0.0,
                            showlegend=first_chart
                        ),
                        row=row_idx, col=col_idx
                    )
                    first_chart = False

            y_val = 0.95 if num_rows < 3 else 0.98
            # Set text size based on number of rows
            plot_title = self.plot_title
            if num_rows > 6:
                plot_title = ''
                text_size = 12
            elif num_rows <= 3:
                text_size = 18
            elif num_rows >= 3 and num_rows <= 6:
                text_size = 14

                
            fig.update_layout(
                height=500 * num_rows,
                width=min(1400, 450 * num_cols),
                title_text = plot_title,
                title={
                    'y': y_val,
                    'x': 0.5,
                    'xanchor': 'center',
                    'yanchor': 'top',
                    'font': {"size": text_size, 'color': 'black'}
                },
                showlegend=True,
                legend={
                    'title': self.legend_title,
                    'yanchor': "top",
                    'y': 0.99,
                    'xanchor': "left",
                    'x': 1.02,
                    'font': {"size": 12},
                },
                margin=dict(t=100, b=50, l=50, r=150),
                paper_bgcolor='rgba(255,255,255,1)',
                plot_bgcolor='rgba(255,255,255,1)',
                font=dict(
                    family="Arial, sans-serif",
                    size=14,
                    color="black"
                )
            )
            
            # Mark generation as complete
            #self.state_manager.generate_completed()
            self.download_plot_button.disabled = False
            
            return fig
            
        except Exception as e:
            print(f"Error creating pie charts: {str(e)}")
            import traceback
            traceback.print_exc()
            return None    
    
    def on_plot_button_click(self, b):       
        # Disable the plot button to prevent multiple clicks
        self.plot_button.disabled = True
        
        if self.current_fig is not None:
            #self.state_manager.generate_completed()
            self.download_plot_button.disabled = False
        
        with self.plot_output:
            self.plot_output.clear_output(wait=True)
            
            # Create and display progress bar
            progress = widgets.FloatProgress(
                value=0,
                min=0,
                max=3,
                description='Loading:',
                bar_style='info',
                style={'bar_color': '#2196F3'},
                orientation='horizontal'
            )
            display(progress)
            
            # Retrieve data for the plot from data handler
            selected_groups,  plot_type, use_count, orientation, plot_filter = self.retrieve_data_for_plot()
            
            # Update progress bar to 1/3
            progress.value = 1

            # check for invalid user selection
            warnings = list(set(self.check_invalid_user_selection(selected_groups, plot_type, use_count, orientation, plot_filter)))
            # Display any warnings before continuing
            if len(warnings) == 1:
                display_warning(warnings[0])
            elif len(warnings) > 1:
                for warning in warnings:
                    display_warning(warning)

            # create the plot based on the plot type and orientation
            if plot_type == 'Grouped Bar Plots':           
                if orientation == 'By Sample':
                    if (plot_filter == 'No Filter' or plot_filter == 'Both') and self.metric_type.value == 'Absolute':
                        self.current_fig = self.plot_total_peptides()
                    elif (plot_filter == 'No Filter' or plot_filter == 'Both') and self.metric_type.value == 'Relative':
                        display_warning("Invalid combination of Plot Filter 'No Filter' or 'Both', Plot Orientation 'By Sample', and Plot Metric of 'Relative'.<br>Currently this combination is not supported, A Relative Stacked Bar Plot Is being generated instead.<br>In the generation of a Grouped Bar Plot Plot Filter 'No Filter' or 'Both' can be used in combination with Plot Orientation of 'By Sample', and Plot Metric of <b>'Absolute'</b>.")
                        self.current_fig = self.plot_stacked_bar_scaled(
                            selected_groups=selected_groups,
                            use_count=use_count
                        )
                    else:
                        self.current_fig = self.create_grouped_bar_plot(
                                selected_groups=selected_groups,
                                use_count=use_count
                            )
                else: # invert_plot == 'By Function or Protein'
                    # Modified plot_stacked_bar_scaled to use count columns if needed
                    self.current_fig = self.create_grouped_bar_plot(
                            selected_groups=selected_groups,
                            use_count=use_count
                        )
                if self.current_fig is None:
                    display_warning("Error generating Grouped Bar Plot.<br>Please upload all required files first.<br>Error creating plot. Please check your data.")
                    # Re-enable the plot button
                    self.plot_button.disabled = False
                    return None

            # Create and display the appropriate plot based on selection and metric
            if plot_type == 'Stacked Bar Plots':
                if plot_filter == 'Both':
                    display_warning("Invalid combination of Plot Filter 'Both', for Stack Bar Plot plot type'.<br>Currently this combination is not supported.<br>Plot Filter 'No Filter' or 'Selected Function(s)' or 'Selected Protein(s)' can be used for stack bar plot type.")
                    # Re-enable the plot button
                    self.plot_button.disabled = False
                    return None
                #elif plot_filter == 'No Filter':
                #    display_warning("Invalid combination of Plot Filter 'No Filter', for Stack Bar Plot plot type'.<br>Currently this combination is not supported.<br>Plot Filter 'No Filter' or 'Selected Function(s)' or 'Selected Protein(s)' can be used for stack bar plot type.")
                #    display_warning("We recommend using the Grouped Bar Plot option when the Plot Filter is set to <b>'No Filter'</b>.")
                #    # Re-enable the plot button
                #    self.plot_button.disabled = False
                #    return None
                else:
                    try:
                        self.current_fig = self.plot_stacked_bar_scaled(
                            selected_groups=selected_groups,
                            use_count=use_count
                        )
                    except Exception as e:
                        print(f"Error creating stacked bar plots: {str(e)}")
                        traceback.print_exc()
                        # Re-enable the plot button
                        self.plot_button.disabled = False

            # create the pie chart plot
            if plot_type == 'Pie Charts':  # Pie Chart
                try:
                    self.current_fig = self.create_pie_charts(
                        selected_groups=selected_groups, 
                        use_count=use_count
                    )
                    
                except Exception as e:
                    print(f"Error creating pie charts: {str(e)}")
                    traceback.print_exc()
                    # Re-enable the plot button
                    self.plot_button.disabled = False
            
            # create the correlation scatter plot
            if plot_type == 'Corr. Scatter Plots':
                # If exactly 2 groups, use original scatter plot
                if len(selected_groups) == 2:
                    # Use original correlation plot with same groups for both axes
                    self.current_fig = self.create_correlation_plot(selected_groups)
                elif len(selected_groups) > 2:
                    # Use SPLOM for 3+ groups
                    self.current_fig = self.create_correlation_splom(selected_groups)
                else:
                    # Handle the case where there are fewer than 2 groups
                    display_warning("Please select at least 2 groups for correlation analysis")
                    # Re-enable the plot button
                    self.plot_button.disabled = False
                    return None
            
            # Update progress bar to 2/3
            progress.value = 2
                
            # display the plot if it is not None else display an error message
            if self.current_fig:
                # Update progress bar to complete
                progress.value = 3
                progress.description = "Loaded"
                # Create a new output area for the plot to preserve warnings
                #plot_display = widgets.Output()
                #with plot_display:
                #   display(self.current_fig)
                
                # Display the plot below any warnings
                #display(plot_display)
                display(self.current_fig)

            else:
                display_warning("Error generating Plot.<br>Please upload all required files first.<br>Error creating plot. Please check your data.")
                # Re-enable the plot button
                self.plot_button.disabled = False
                return None
            
            # Re-enable the plot button after successful plot generation
            self.plot_button.disabled = False
            self.download_plot_button.disabled = False
    
    def redact_string_descriptions(self, input_str, max_length=35):
        """Redacts protein or function descriptions in a string to a maximum length."""

        if not isinstance(input_str, str):
            return str(input_str)  # Convert non-strings to string
            
        # If the entire string is shorter than max_length, return as is
        if len(input_str) <= max_length:
            return input_str
        
        # For entries that are too long, truncate and add ellipsis
        return input_str[:max_length-3] + '...'

    def retrieve_data_for_plot(self):
        # Get plot configuration
        self.plot_func_or_pro = self.get_data_attribute('plot_func_or_pro')
        self.plot_type = self.get_data_attribute('plot_type')
        self.plot_minor= self.get_data_attribute('plot_minor')
        self.invert_plot = self.get_data_attribute('invert_plot')
        self.abs_or_count = self.get_data_attribute('abs_or_count')
        self.group_selector = self.get_data_attribute('group_selector')

        self.xlabel_widget = self.get_data_attribute('xlabel_widget')
        self.ylabel_widget = self.get_data_attribute('ylabel_widget')
        self.legend_widget = self.get_data_attribute('legend_widget')
        self.title_widget = self.get_data_attribute('title_widget')
        self.metric_type = self.get_data_attribute('metric_type')
        self.function_color_map = self.get_data_attribute('function_color_map')
        self.selected_groups = selected_groups = self.group_selector.value
        self.process_total_peptide_data_and_filter_dataframe()
        self.total_peptide_results_dict = self.get_data_attribute('total_peptide_results_dict')
        self.filtered_df = self.get_data_attribute('filtered_df')
        self.log_transform = self.get_data_attribute('log_transform')
        self.correlation_type = self.get_data_attribute('correlation_type')
        self.abundance_count_by_sample_dict = self.calculate_group_metrics()

        # function data
        self.create_function_df()
        self.function_selector = self.get_data_attribute('function_selector')

        if self.plot_func_or_pro != 'No Filter':
            self.selected_functions = self.get_data_attribute('selected_functions')
        else:
            self.selected_functions = [func for func in self.get_data_attribute('selected_functions') if func not in ['Minor Functions', 'Functional Peptides', 'Non-Functional Peptides']]
        if self.function_selector.value == ('All Functional Peptides',) and self.plot_func_or_pro != 'Functional vs Non-Functional Peptides':
            self.selected_functions = [func for func in self.get_data_attribute('selected_functions') if func not in ['Minor Functions', 'Functional Peptides', 'Non-Functional Peptides']]
        self.function_df = self.get_data_attribute('function_df')
        self.reorganize_by_function()
        self.function_absorbance_totals_dict = self.get_data_attribute('function_absorbance_totals_dict')
        self.function_count_totals_dict = self.get_data_attribute('function_count_totals_dict')
        self.function_group_metrics_dict = self.get_data_attribute('function_group_metrics_dict')
        self.function_distribution_dict = self.get_data_attribute('function_distribution_dict') 

        # protein data
        self.process_protein_data()
        self.protein_selector = self.get_data_attribute('protein_selector')
        self.selected_proteins = self.get_data_attribute('selected_proteins')
        self.all_proteins = self.get_data_attribute('all_proteins')
        self.protein_df = self.get_data_attribute('protein_df')
        self.protein_count_bysample_dict = self.get_data_attribute('protein_count_bysample_dict')
        self.protein_sample_distribution_dict = self.get_data_attribute('protein_sample_distribution_dict')
        self.sum_df = self.get_data_attribute('sum_df')
        self.sample_distribution_summary_df = self.get_data_attribute('sample_distribution_summary_df')


        # Check if using count metric
        use_count = False
        self.metric_name = "Abundance"
        if hasattr(self, 'abs_or_count'):
            if self.abs_or_count.value == 'Count':
                use_count = True
                self.metric_name = "Unique Peptide Count"
                self.value_prefix = "Count_"
                self.rel_prefix = "Rel_Count_"
                self.num_format = ",.0f"  # Integer format for counts
                
            else: #abundance
                self.metric_name = "Summed Absorbance"
                self.value_prefix = "Avg_"
                self.rel_prefix = "Rel_Avg_"
                self.num_format = ",.2e"  # Scientific notation for abundance
    
        # Create mapping from sample name to column names
        self.value_cols = {var: f'{self.value_prefix}{var}' for var in selected_groups}
        self.rel_cols = {var: f'{self.rel_prefix}{var}' for var in selected_groups}
            
        # Get the selected plot type (Bar or Pie)
        if hasattr(self, 'plot_type'):
            plot_type = self.plot_type.value

        if hasattr(self, 'invert_plot'):
            orientation = self.invert_plot.value

        if hasattr(self, 'plot_func_or_pro'):
            plot_filter = self.plot_func_or_pro.value
            
        return selected_groups,  plot_type, use_count, orientation, plot_filter

    def create_correlation_splom(self, selected_groups):
        """Generate correlation matrix as a SPLOM using pre-filtered data"""
        if self.data_transformer.merged_df is None or self.data_transformer.group_data_dict is None:
            return None
        
        if not selected_groups or len(selected_groups) < 3:
            display_warning("At least 3 groups are required for SPLOM visualization")
            return None
            
        # Get the already filtered dataframe
        df = self.filtered_df.copy() if hasattr(self, 'filtered_df') else self.data_transformer.merged_df.copy()
        
        # Check if all required columns exist
        dimensions = []
        for group in selected_groups:
            col_name = f"Avg_{group}"
            if col_name not in df.columns:
                display_warning(f"Column {col_name} not found in the dataframe")
                continue
            dimensions.append(group)
        
        
        if len(dimensions) < 3:
            display_warning("Not enough valid columns found for SPLOM visualization")
            return None
        
        # Try to get color scheme from self, default to Carolina Blue if multiple schemes selected
        try:
            color_result = self.get_single_color()
            # Fix: Ensure colors is always a list, even if a single string is returned
            if isinstance(color_result, list) and len(color_result) > 0:
                colors = color_result
            elif isinstance(color_result, str):
                # If a single string color is returned, wrap it in a list
                colors = [color_result]
            else:
                colors = ['#4B9CD3']  # Carolina Blue
                display_warning("Please choose a single color scheme otherwise Carolina Blue will be used by default.")
        except Exception as e:
            print(f"Error getting colors: {e}")
            colors = ['#4B9CD3']  # Carolina Blue
            display_warning("Please choose a single color scheme otherwise Carolina Blue will be used by default.")
        
        
        # Get log transform and correlation type settings from plotter
        use_log = self.log_transform.value if hasattr(self, 'log_transform') else False
        correlation_type = self.correlation_type.value if hasattr(self, 'correlation_type') else 'Pearson'
        
        # Create dimensions list for the SPLOM
        splom_dimensions = []
        
        # Ensure positive values for all dimensions
        filtered_df = df.copy()
        for group in dimensions:
            col_name = f"Avg_{group}"
            filtered_df = filtered_df[filtered_df[col_name] > 0]
        
        if len(filtered_df) == 0:
            display_warning("No valid data points found for the selected groups")
            return None
        
        # Process data for each dimension
        splom_dimensions = []
        for group in dimensions:
            col_name = f"Avg_{group}"
            # Apply log transformation based on setting
            if use_log:
                values = np.log10(filtered_df[col_name])
                label = f"Log<sub>10</sub> ({group})"
            else:
                values = filtered_df[col_name]
                label = group
                
            splom_dimensions.append(dict(values=values, label=label))
                
        # Store all correlations for legend
        all_correlations = []
        correlation_traces = []
        
        # Calculate all pairwise correlations using selected correlation type
        for i, group1 in enumerate(dimensions):
            for j, group2 in enumerate(dimensions):
                if i >= j:  # Skip diagonal and lower half
                    continue
                
                col1 = f"Avg_{group1}"
                col2 = f"Avg_{group2}"
                
                # Calculate correlation
                if len(filtered_df) <= 1:
                    corr_text = "n/a"
                    continue
                else:
                    if use_log:
                        x_values = np.log10(filtered_df[col1])
                        y_values = np.log10(filtered_df[col2])
                        tickformater = '.2f'
                    else:
                        x_values = filtered_df[col1]
                        y_values = filtered_df[col2]
                        tickformater = '.2e'
                    
                    if correlation_type == 'Pearson':
                        corr, _ = pearsonr(x_values, y_values)
                        corr_text = f"{corr:.3f}"
                        corr_symbol = "<i>r</i>"
                    elif correlation_type == 'Spearman':
                        corr, _ = spearmanr(x_values, y_values)
                        corr_text = f"{corr:.3f}"
                        corr_symbol = "ρ"
                                    
                # Create legend entries as invisible traces
                legend_text = f"{group1}-{group2}: {corr_symbol} = {corr_text}"
                correlation_traces.append(
                    go.Scatter(
                        x=[None],
                        y=[None],
                        mode='markers',
                        marker=dict(color=colors[0]),
                        name=legend_text,
                        showlegend=True,
                        legendgroup=f"corr_{i}_{j}"
                    )
                )

        # Prepare custom data for hover text
        id_column = 'unique ID' if 'unique ID' in filtered_df.columns else 'Peptide ID'
        function_column = 'function' if 'function' in filtered_df.columns else 'Function'
        protein_column = 'protein_name' if 'protein_name' in filtered_df.columns else 'Protein'
        
        # Ensure all required columns exist or provide defaults
        if id_column not in filtered_df.columns:
            filtered_df[id_column] = "Unknown"
        if function_column not in filtered_df.columns:
            filtered_df[function_column] = "Unknown"
        if protein_column not in filtered_df.columns:
            filtered_df[protein_column] = "Unknown"
            
        # Create custom data array with all relevant data
        customdata = []
        for i in range(len(filtered_df)):
            row_data = [
                filtered_df[id_column].iloc[i],
                filtered_df[function_column].fillna('N/A').iloc[i],
                filtered_df[protein_column].fillna('N/A').iloc[i]
            ]
            customdata.append(row_data)
        
        # Create the SPLOM trace with custom hover template
        splom_trace = go.Splom(
            dimensions=splom_dimensions,
            #name=f'{correlation_type} Correlation Values',
            marker=dict(
                color=colors[0],
                size=8,
                line=dict(width=1, color='white')
            ),
            diagonal=dict(visible=False),
            hovertemplate="<b>Peptide ID:</b> %{customdata[0]}<br>" +
                        "<b>Function:</b> %{customdata[1]}<br>" +
                        "<b>Protein:</b> %{customdata[2]}<br>" +
                        "<b>%{xaxis.title.text}:</b> %{x:" + tickformater + "}<br>" +
                        "<b>%{yaxis.title.text}:</b> %{y:" + tickformater + "}<br>" +
                        "<extra></extra>",
            customdata=customdata,
            showlegend=False,
            showupperhalf=False,
        )
        
        # Combine the main trace with correlation legend traces
        all_traces = [splom_trace] + correlation_traces
        
        # Create figure with all traces
        fig = go.Figure(data=all_traces)
        
        # Use existing plot labels if available
        plot_title = self.plot_title if hasattr(self, 'plot_title') else "Correlation Matrix"
                
        # Update layout for title, size, etc.
        fig.update_layout(
            title=dict(
                text=plot_title,
                font=dict(size=14 if 'Filtered By' in plot_title else 18, color='black'),
                x=0.5,
                xanchor='center'
            ),
            width=250 * len(dimensions),
            height=250 * len(dimensions),
            template='plotly_white',
        )

        # Calculate appropriate ranges for each dimension to avoid including zero
        axis_ranges = []
        for dimension in splom_dimensions:
            values = dimension['values']
            min_val = values.min() * 0.95  # 5% padding below min
            max_val = values.max() * 1.05  # 5% padding above max
            axis_ranges.append([min_val, max_val])

        # For SPLOM, we need to update each axis individually in the layout
        for i in range(1, len(dimensions) + 1):
            fig.update_layout({
                f'xaxis{i}': dict(
                    tickfont=dict(color='black', size=14),
                    title_font=dict(color='black', size=16),
                    zeroline=False,
                    range=axis_ranges[i-1]
                ),
                f'yaxis{i}': dict(
                    tickfont=dict(color='black', size=14),
                    title_font=dict(color='black', size=16),
                    zeroline=False,
                    range=axis_ranges[i-1]
                )
            })
        legend_title = self.legend_title if self.legend_title != '' else f'{correlation_type} Correlation Values'
        fig.update_layout(
            #xaxis=dict(visible=False),
            yaxis=dict(visible=False),
            showlegend=True,  # Add this line to enable the legend
            legend=dict(
                title=dict(text=legend_title, font=dict(size=16, color="black")),
                y=1,
                x=0.8,
                font=dict(size=14, color="black")
            )
        )
        # Mark generation as complete
        #self.state_manager.generate_completed()
        
        return fig
    
    def create_correlation_plot(self, selected_groups):
        """Generate correlation plot with custom labels using pre-filtered data"""
        if self.data_transformer.merged_df is None or self.data_transformer.group_data_dict is None:
            return None
        
        if len(selected_groups) != 2:
            display_warning("Exactly 2 groups are required for the scatter plot visualization")
            return None
            
        # Get the already filtered dataframe
        df = self.filtered_df.copy() if hasattr(self, 'filtered_df') else self.data_transformer.merged_df.copy()
        
        # Use the 2 selected groups
        group1 = selected_groups[0]
        group2 = selected_groups[1]
        
        # Create a single subplot
        fig = go.Figure()
        
        # Column names for the two groups
        col1 = f"Avg_{group1}"
        col2 = f"Avg_{group2}"
        
        if col1 not in df.columns or col2 not in df.columns:
            display_warning(f"Columns {col1} or {col2} not found in the dataframe")
            return None
        
        # Use Carolina Blue as default color
        carolina_blue = '#4B9CD3'
        
        # Try to get color scheme from plotter
        try:
            color_result = self.get_single_color()
            # Fix: Ensure colors is always a list, even if a single string is returned
            if isinstance(color_result, list) and len(color_result) > 0:
                colors = color_result
            elif isinstance(color_result, str):
                # If a single string color is returned, wrap it in a list
                colors = [color_result]
            else:
                colors = [carolina_blue]
                display_warning("Please choose a single color scheme otherwise Carolina Blue will be used by default.")
        except Exception as e:
            print(f"Error getting colors: {e}")
            colors = [carolina_blue]
            display_warning("Please choose a single color scheme otherwise Carolina Blue will be used by default.")
        
        
        # Get log transform and correlation type settings from plotter
        use_log = self.log_transform.value if hasattr(self, 'log_transform') else False
        correlation_type = self.correlation_type.value if hasattr(self, 'correlation_type') else 'Pearson'
        
        # Ensure positive values
        filtered_df = df[(df[col1] > 0) & (df[col2] > 0)].copy()
        
        if len(filtered_df) == 0:
            display_warning(f"No valid data points found for {group1} vs {group2}")
            return None
            
        # Apply log transformation if requested
        if use_log:
            x_values = np.log10(filtered_df[col1])
            y_values = np.log10(filtered_df[col2])
            x_label_prefix = "Log<sub>10</sub> "
            y_label_prefix = "Log<sub>10</sub> "
            tickformater = '.2f'  # 2 decimal places for log values
            xaxislabel = f'{x_label_prefix}({group1})'
            yaxislabel = f'{y_label_prefix}({group2})'
        else:
            x_values = filtered_df[col1]
            y_values = filtered_df[col2]
            x_label_prefix = ""
            y_label_prefix = ""
            tickformater = '.1e'  # 1 decimal place with exponential notation
            xaxislabel = f'{group1}'
            yaxislabel = f'{group2}'
        
        # Calculate correlation based on selected method
        correlation_text = 'n/a'
        if len(filtered_df) > 1:
            if correlation_type == 'Pearson':
                corr, _ = pearsonr(x_values, y_values)
                correlation_text = f'<i>r</i> = {corr:.3f}'
            elif correlation_type == 'Spearman':
                corr, _ = spearmanr(x_values, y_values)
                correlation_text = f'ρ = {corr:.3f}'
        
        # Create hover data
        hover_data = [filtered_df['unique ID']]
        if 'function' in filtered_df.columns:
            hover_data.append(filtered_df['function'].fillna('N/A'))
        else:
            hover_data.append(['N/A'] * len(filtered_df))
            
        if 'protein_name' in filtered_df.columns:
            hover_data.append(filtered_df['protein_name'])
            
        # Create hover data
        hover_columns = ['unique ID']
        if 'function' in filtered_df.columns:
            hover_columns.append('function')
        if 'protein_name' in filtered_df.columns:
            hover_columns.append('protein_name')
            
        # Create customdata array
        customdata = []
        for col in hover_columns:
            if col == 'function':
                customdata.append(filtered_df[col].fillna('N/A'))
            else:
                customdata.append(filtered_df[col])

        # Add scatter trace
        fig.add_trace(
            go.Scatter(
                x=x_values,
                y=y_values,
                mode='markers',
                name=f'Correlation: {correlation_text}',
                marker=dict(color=colors[0]),
                hovertemplate="<b>Peptide ID:</b> %{customdata[0]}<br>" +
                            "<b>Function:</b> %{customdata[1]}<br>" +
                            "<b>Protein:</b> %{customdata[2]}<br>" +
                            "<b>%{xaxis.title.text}:</b> %{x:" + tickformater + "}<br>" +
                            "<b>%{yaxis.title.text}:</b> %{y:" + tickformater + "}<br>" +
                            "<extra></extra>",              
                customdata=np.column_stack(customdata)
            )
        )
        
        # Add trendline if we have enough points
        if len(filtered_df) > 1:
            z = np.polyfit(x_values, y_values, 1)
            x_range = np.linspace(x_values.min(), x_values.max(), 100)
            fig.add_trace(
                go.Scatter(
                    x=x_range,
                    y=np.poly1d(z)(x_range),
                    mode='lines',
                    line=dict(color=colors[0], dash='dash'),
                    name='Trendline',
                    showlegend=True,
                    hovertemplate='<extra></extra>'
                )
            )
        
        # Use existing plot labels if available
        plot_title = self.plot_title if hasattr(self, 'plot_title') else "Correlation Matrix"
        legend_title = self.legend_title if self.legend_title != '' else f'{correlation_type} Correlation Values'
        # Update layout
        fig.update_layout(
            title=dict(
                text=plot_title,
                font=dict(size=12 if 'Filtered By' in plot_title else 18, color='black'),
                x=0.5,
                xanchor='center'
            ),
            xaxis_title=xaxislabel,
            yaxis_title=yaxislabel,
            xaxis=dict(
                    title_font={"size": 16},
                    tickfont={"size": 14},
                    tickfont_color="black",  # Black tick labels
                    title_font_color="black",  # Black axis title      
                    tickformat=tickformater,
               
            ),
            yaxis=dict(
                    title_font={"size": 16},
                    tickfont={"size": 14},
                    tickfont_color="black",  # Black tick labels
                    title_font_color="black",  # Black axis title     
                    tickformat=tickformater,
                
            ),
            height=500,
            width=600,
            template='plotly_white',
            showlegend=True,
            legend=dict(
                yanchor="top",
                title=dict(text=legend_title, font=dict(size=16, color="black")),
                y=0.99,
                xanchor="right",
                x=0.99,
                bgcolor="rgba(255, 255, 255, 0.8)",
                font=dict(size=14)
            ),
            margin=dict(t=100, b=80, l=80, r=50)
        )
        
        # Mark generation as complete
        #self.state_manager.generate_completed()
        
        return fig     

    def check_invalid_user_selection(self, selected_groups, plot_type, use_count, orientation, plot_filter):
        # Initialize warnings list
        warnings = []
        
        # Check if any groups are selected
        if not selected_groups:
            warnings.append("No sample groups selected.<br>Please select at least one sample group to generate a plot.")
            
        selected_proteins = self.protein_selector.value
        selected_functions = self.function_selector.value
        
        # Check for protein selection when needed
        if plot_filter in ['Selected Protein(s)', 'Both']:
            if not hasattr(self, 'selected_proteins') or not self.selected_proteins or (len(self.selected_proteins) == 1 and 'All Proteins (No Filter)' in self.selected_proteins):
                warnings.append(f"No proteins selected.<br>Please select at least one specific protein for '{plot_filter}' filter type.")
            
            # Check if 'All Proteins' is selected along with individual proteins
            elif hasattr(self, 'selected_proteins') and 'All Proteins (No Filter)' in selected_proteins:
                # Check if any individual proteins are also selected
                if len(selected_proteins) > 1 and 'All Proteins (No Filter)' in selected_proteins:
                    warnings.append("All Proteins (No Filter)' is selected along with individual proteins.<br>All proteins will be used for analysis.")
                
        # Check for function selection when needed
        if plot_filter in ['Selected Function(s)', 'Both']:
            if not hasattr(self, 'selected_functions') or not self.selected_functions or (len(self.selected_functions) == 1 and 'All Functional Peptides' in self.selected_functions):
                warnings.append(f"No functions selected.<br>Please select at least one specific function for '{plot_filter}' filter type.")
                           
                
            elif 'All Functional Peptides' in selected_functions:
                for func in selected_functions:
                    if func in selected_functions and func != 'All Functional Peptides':
                        warnings.append("Invalid selection: 'All Functional Peptides' cannot be combined with other Individual functions criteria.")


        # Check if merged data is available
        if self.data_transformer.merged_df is None:
            warnings.append("No data available.<br>Please upload the merged data file first.")

        self.get_plot_labels()

        # check for invalid combinations of plot types and orientations
        if plot_filter == 'Selected Function(s)' and orientation == 'By Protein':
            warnings.append("Invalid combination of Plot Filter and Plot Orientation.<br>Current selection: 'Selected Function(s)' for Plot Filter and 'By Protein' for Plot Orientation.")
            
        if plot_filter == 'Selected Protein(s)' and orientation == 'By Function':
            warnings.append("Invalid combination of Plot Filter and Plot Orientation.<br>Current selection: 'Selected Protein(s)' for Plot Filter and 'By Function' for Plot Orientation.")
            
        return warnings

In [7]:
data_transformer = DataTransformation()
# Initialize the interface
data_transformer.setup_data_loading_ui()

# Create instances
data_handler = DataHandler(data_transformer)
data_handler.display_handler()

# Create Plotter
plotter = Plotter(data_transformer, data_handler)
plotter.display()

VBox(children=(Output(layout=Layout(max_width='1000px', width='100%')), HTML(value='<u>Upload Data File:</u>')…

VBox(children=(Output(layout=Layout(max_width='1000px', width='100%')), GridBox(children=(VBox(children=(HTML(…

Output(layout=Layout(max_width='1000px', width='100%'))

GridspecLayout(children=(VBox(children=(HTML(value='<h3><u>Visualization Settings:</u></h3>'), HBox(children=(…

VBox(children=(HTML(value='<h3><u>Display and Export</u></h3>'), HBox(children=(Button(button_style='success',…

Output()