In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
import json, io, base64, re, os
import plotly.graph_objects as go
from IPython.display import display, HTML, clear_output
import plotly.express as px
import ipywidgets as widgets

# Initialize settings
import _settings as settings

# Global variables from settings
spec_translate_list = settings.SPEC_TRANSLATE_LIST


In [2]:
class DataTransformation:
    def __init__(self):
        self.merged_df = None
        self.output_area = None
        self.merged_uploader = None

    def create_download_link(self, file_path, label):
        """Create a download link for a file."""
        if os.path.exists(file_path):
            # Read file content and encode it as base64
            with open(file_path, 'rb') as f:
                content = f.read()
            b64_content = base64.b64encode(content).decode('utf-8')

            # Generate the download link HTML
            return widgets.HTML(f"""
                <a download="{os.path.basename(file_path)}" 
                   href="data:application/octet-stream;base64,{b64_content}" 
                   style="color: #0366d6; text-decoration: none; margin-left: 20px; font-size: 14px;">
                    {label}
                </a>
            """)
        else:
            # Show an error message if the file does not exist
            return widgets.HTML(f"""
                <span style="color: red; margin-left: 20px; font-size: 14px;">
                    File "{file_path}" not found!
                </span>
            """)

    def setup_data_loading_ui(self):
        """Initialize and display the data loading UI."""
        # Create file upload widget
        self.merged_uploader = widgets.FileUpload(
            accept='.csv,.txt,.tsv,.xlsx',
            multiple=False,
            description='Upload Data File',
            layout=widgets.Layout(width='300px'),
            style={'description_width': 'initial'}
        )

        self.output_area = widgets.Output()

        # Create upload box with example link
        merged_box = widgets.HBox([
            self.merged_uploader,
            self.create_download_link("example_merged_dataframe.csv", "Example")
        ], layout=widgets.Layout(align_items='center'))

        # Create left column with upload widgets
        upload_widgets = widgets.VBox([
            widgets.HTML("<h3><u>Upload Data File:</u></h3>"),
            merged_box,
            self.output_area
        ], layout=widgets.Layout(
            width='400px',
            margin='0 20px 0 0'
        ))

        # Create grid layout
        grid = widgets.GridBox(
            [upload_widgets],
            layout=widgets.Layout(
                grid_template_columns='auto auto',
                grid_gap='20px',
                width='900px'
            )
        )

        display(grid)

        # Register observer
        self.merged_uploader.observe(self._on_merged_upload_change, names='value')

    def _validate_and_clean_data(self, df):
        """
        Validate and clean the uploaded data, dropping rows with blank values in key columns.
        Returns tuple of (cleaned_df, warnings, errors)
        """
        warnings = []
        errors = []

        # Check required columns exist
        required_columns = [
            'Master Protein Accessions', 
            'Positions in Proteins',
            'unique ID'
        ]
        
        # Check that at least one Avg_ column exists
        avg_columns = [col for col in df.columns if col.startswith('Avg_')]
        if not avg_columns:
            errors.append("No columns starting with 'Avg_' found in the data")
            return None, warnings, errors
            
        # Add Avg_ columns to required columns
        required_columns.extend(avg_columns)
        
        missing = set(required_columns) - set(df.columns)
        if missing:
            errors.append(f"Missing required columns: {', '.join(missing)}")
            return None, warnings, errors

        cleaned_df = df.copy()

        # Handle blank values by dropping rows and issuing warnings
        for column in required_columns:
            blank_count = cleaned_df[column].isna().sum()
            if blank_count > 0:
                warnings.append(f"Dropping {blank_count} rows with blank values in {column} column")
                cleaned_df = cleaned_df.dropna(subset=[column])

        # Check for invalid characters in non-blank rows
        if len(cleaned_df) > 0:
            # Check Positions in Proteins
            invalid_pos = cleaned_df['Positions in Proteins'].apply(
                lambda x: ',' in str(x) or ':' in str(x)
            )
            if invalid_pos.any():
                errors.append(
                    "Found invalid characters (',' or ':') in Positions in Proteins column. "
                    "Please update the file and upload again."
                )

            # Check Master Protein Accessions
            invalid_acc = cleaned_df['Master Protein Accessions'].apply(
                lambda x: ',' in str(x) or ':' in str(x)
            )
            if invalid_acc.any():
                errors.append(
                    "Found invalid characters (',' or ':') in Master Protein Accessions column. "
                    "Please update the file and upload again."
                )

        return cleaned_df, warnings, errors

    def _load_merged_data(self, file_data):
        """
        Load and validate merged data file
        Returns tuple of (dataframe, status)
        """
        try:
            content = bytes(file_data.content)
            filename = file_data.name
            extension = filename.split('.')[-1].lower()

            file_stream = io.BytesIO(content)

            # Load data based on file extension
            try:
                if extension == 'csv':
                    df = pd.read_csv(file_stream)
                elif extension in ['txt', 'tsv']:
                    df = pd.read_csv(file_stream, delimiter='\t')
                elif extension == 'xlsx':
                    df = pd.read_excel(file_stream)
                else:
                    display(HTML(f'<b style="color:red;">Error: Unsupported file format</b>'))
                    return None, 'no'
            except Exception as e:
                display(HTML(f'<b style="color:red;">Error reading file: {str(e)}</b>'))
                return None, 'no'

            # Validate and clean data
            cleaned_df, warnings, errors = self._validate_and_clean_data(df)

            # Display warnings about dropped rows
            if warnings:
                warning_html = "<br>".join([
                    f'<b style="color:orange;">Warning: {w}</b>'
                    for w in warnings
                ])
                display(HTML(warning_html))

            # Display errors if any
            if errors:
                error_html = "<br>".join([
                    f'<b style="color:red;">Error: {e}</b>'
                    for e in errors
                ])
                display(HTML(error_html))
                return None, 'no'

            if cleaned_df is not None and len(cleaned_df) > 0:
                # Process protein information
                
                # Add information about remaining rows and processed proteins
                display(HTML(
                    f'<b style="color:green;">Processed data contains {len(cleaned_df)} rows '
                    f'after removing blank values.</b><br>'
                ))
                return cleaned_df, 'yes'
            else:
                display(HTML('<b style="color:red;">Error: No valid data rows remaining after cleaning</b>'))
                return None, 'no'

        except Exception as e:
            display(HTML(f'<b style="color:red;">Error processing file: {str(e)}</b>'))
            return None, 'no'

    def _on_merged_upload_change(self, change):
        """Handle merged data file upload"""
        if change['type'] == 'change' and change['name'] == 'value':
            with self.output_area:
                self.output_area.clear_output()
                if change['new'] and len(change['new']) > 0:
                    file_data = change['new'][0]
                    df, status = self._load_merged_data(file_data)
                    if status == 'yes' and df is not None:
                        self.merged_df = df  # Only set merged_df if validation passed
                        display(HTML(
                            f'<b style="color:green;">Data imported successfully with '
                            f'{df.shape[0]} rows and {df.shape[1]} columns.</b>'
                        ))

In [3]:
def generate_download_link(content, filename, filetype='text/csv'):
    """Generate a download link for any content"""
    if isinstance(content, str):
        content = content.encode()
    b64 = base64.b64encode(content).decode()
    return f"""
        <a download="{filename}" href="data:{filetype};base64,{b64}" 
           class="download-link" 
           style="background-color: #4CAF50;
                  border: none;
                  color: white;
                  padding: 10px 20px;
                  text-align: center;
                  text-decoration: none;
                  display: inline-block;
                  font-size: 14px;
                  margin: 4px 2px;
                  cursor: pointer;
                  border-radius: 4px;">
            Download {filename}
        </a>
    """

In [4]:
class BioactivePlotter:
    def __init__(self, data_transformer):
        self.data_transformer = data_transformer
        self.plot_output = widgets.Output()
        self.export_output = widgets.Output()
        self.current_fig_abs = None
        self.current_fig_rel = None
        self.info_output = widgets.Output()

        # Define the function list excluding "Minor Functions (<1%)"
        self.function_list = [
            'ACE-inhibitory', 'Ameliorates insulin resistance', 'Antianxiety', 'Anticancer',
            'Antimicrobial', 'Antioxidant', 'Antithrombotic', 'Cholesterol regulation',
            'Cytotoxic', 'DPP-IV Inhibitory', 'Immunomodulatory', 'Increase calcium uptake',
            'Increase cellular growth', 'Opioid', 'Osteoanabolic', 'Prolyl endopeptidase-inhibitory',
            'Antithrombitic', 'Increase mucin secretion', 'Satiety', 'Cytomodulatory'
        ]
        
        # Initialize widgets
        self.setup_widgets()
        
    def setup_widgets(self):
        """Initialize UI widgets"""
        # Create plot control widgets
        self.plot_button = widgets.Button(
            description='Generate Plot',
            button_style='success',
            icon='chart-bar',
            layout=widgets.Layout(width='200px')
        )
        
        self.export_button = widgets.Button(
            description='Export Data',
            button_style='info',
            icon='save',
            layout=widgets.Layout(width='200px'),
            disabled=True
        )
        
        self.download_plot_button = widgets.Button(
            description='Download Plot',
            button_style='info',
            icon='download',
            layout=widgets.Layout(width='200px'),
            disabled=True
        )
        
        # Add group selection widget
        self.group_select = widgets.SelectMultiple(
            description='Groups:',
            options=[],
            layout=widgets.Layout(width='300px', height='100px'),
            style={'description_width': 'initial'}
        )
        
        # Add plot type selection
        self.plot_type = widgets.RadioButtons(
            options=['All Plots', 'Bar Plots Only', 'Pie Charts Only'],
            value='All Plots',
            description='Plot Type:',
            style={'description_width': 'initial'},
            layout=widgets.Layout(width='300px')
        )
        
        # Add bar plot type selection
        self.bar_plot_type = widgets.RadioButtons(
            options=['Absolute Abundance', 'Relative Abundance'],
            value='Absolute Abundance',
            description='Bar Plot Type:',
            style={'description_width': 'initial'},
            layout=widgets.Layout(width='300px')
        )
        
        # Add color scheme selector
        self.color_scheme = widgets.Dropdown(
            options=['Viridis', 'Plasma', 'HSV', 'Rainbow', 'Spectral', 'RdYlBu'],
            value='HSV',
            description='Color Scheme:',
            style={'description_width': 'initial'},
            layout=widgets.Layout(width='400px')
        )
        
        # Add label customization
        self.xlabel_widget = widgets.Text(
            value='Sample Type',
            description='X Label:',
            style={'description_width': 'initial'},
            layout=widgets.Layout(width='400px')
        )
        
        self.ylabel_widget = widgets.Text(
            value='Scaled Absolute Abundance',
            description='Y Label:',
            style={'description_width': 'initial'},
            layout=widgets.Layout(width='400px')
        )
        
        # Create layout with sections
        controls_box = widgets.VBox([
            widgets.HTML("<h4>Plot Controls:</h4>"),
            self.group_select,
            self.plot_type,
            self.bar_plot_type
        ])
        
        appearance_box = widgets.VBox([
            widgets.HTML("<h4>Appearance Settings:</h4>"),
            self.xlabel_widget,
            self.ylabel_widget,
            self.color_scheme
        ])
        
        button_box = widgets.VBox([
            widgets.HTML("<h4>Actions:</h4>"),
            widgets.HBox([self.plot_button, self.export_button, self.download_plot_button])
        ])
        
        # Create main layout
        self.widget_box = widgets.VBox([
            widgets.HTML("<h3><u>Bioactive Peptide Plot Controls:</u></h3>"),
            controls_box, 
            appearance_box,
            button_box,
            self.info_output,
            self.plot_output,
            self.export_output
        ])
        
        # Add button callbacks
        self.plot_button.on_click(self._on_plot_button_click)
        self.export_button.on_click(self._on_export_button_click)
        self.download_plot_button.on_click(self._on_download_plot_click)
        
        # Add observer for data changes
        self.data_transformer.merged_uploader.observe(self._update_group_options, names='value')

    def _update_group_options(self, change):
        """Update group selection options when data changes"""
        if self.data_transformer.merged_df is not None:
            # Get all Avg_ columns
            avg_columns = [col.replace('Avg_', '') for col in self.data_transformer.merged_df.columns 
                         if col.startswith('Avg_')]
            
            # Update group selection options
            self.group_select.options = avg_columns
            # Select all groups by default
            self.group_select.value = avg_columns
            
    def _process_bioactive_data(self):
        """Process bioactive peptide data for visualization"""
        if self.data_transformer.merged_df is None:
            return None
            
        df = self.data_transformer.merged_df
        if 'function' not in df.columns:
            return None
            
        unique_function_absorbance = {}
        avg_columns = [col for col in df.columns if col.startswith('Avg_')]
        
        for column in avg_columns:
            grouping_variable = column.replace('Avg_', '')
            
            # Filter and process data
            temp_df = df[['unique ID', 'function', column]].copy()
            temp_df = temp_df[
                (temp_df[column] != 0) & 
                temp_df[column].notna() &
                temp_df['function'].notna()
            ]
            
            if temp_df.empty:
                continue
            
            # Process functions
            temp_df.loc[:, 'function'] = temp_df['function'].fillna('').str.split(';')
            exploded_df = temp_df.explode('function')
            exploded_df.loc[:, 'function'] = exploded_df['function'].str.strip()
            exploded_df = exploded_df[exploded_df['function'] != '']
            
            if not exploded_df.empty:
                function_grouped = exploded_df.groupby('function')[column].sum()
                unique_function_absorbance[grouping_variable] = function_grouped.to_dict()
        
        return unique_function_absorbance
        
    def process_function_percentages(self, data_dict, threshold=1):
        """Process data to combine functions below threshold into 'Minor Functions'"""
        total = sum(data_dict.values())
        processed_data = {}
        minor_functions_sum = 0
        
        # Calculate percentages and filter
        for func, value in data_dict.items():
            percentage = (value / total) * 100
            if percentage >= threshold:
                processed_data[func] = value
            else:
                minor_functions_sum += value
                
        # Add minor functions if any exist
        if minor_functions_sum > 0:
            processed_data[f'Minor Functions (<{threshold}%)'] = minor_functions_sum
            
        return processed_data
        
    def create_pie_charts(self, unique_function_absorbance):
        """Create pie charts for counts and abundances"""
        if not unique_function_absorbance:
            return None
            
        # Process the data to get counts
        results = self._process_export_data()
        if results is None:
            return None
            
        combined_df, combined_count_df, combined_absorbance_df = results
            
        pie_figs = []
        for group in unique_function_absorbance.keys():
            # Get data for this group
            absorbance_data = unique_function_absorbance[group]
            
            # Process abundance data with threshold
            processed_abundance = self.process_function_percentages(absorbance_data, threshold=1)
            
            # Get correct count data from combined_count_df
            count_data = {}
            for func in combined_count_df.index:
                if func != 'Counts of peptides':
                    count = combined_count_df.loc[func, group]
                    if count > 0:
                        count_data[func] = count
                        
            # Process count data with threshold
            processed_counts = self.process_function_percentages(count_data, threshold=1)
            
            # Create count pie chart first
            count_fig = go.Figure()
            count_labels = list(processed_counts.keys())
            count_values = list(processed_counts.values())
            count_colors = self._get_color_sequence(len(count_labels))
            
            count_fig.add_trace(go.Pie(
                labels=count_labels,
                values=count_values,
                marker=dict(colors=count_colors),
                textinfo='percent+label',
                hovertemplate="Function: %{label}<br>" +
                            "Count: %{value}<br>" +
                            "Percentage: %{percent}<br>" +
                            "<extra></extra>",
                #title=f"Count Distribution<br>{group}"
            ))
            
            # Create abundance pie chart
            abundance_fig = go.Figure()
            labels = list(processed_abundance.keys())
            values = list(processed_abundance.values())
            colors = self._get_color_sequence(len(labels))
            
            abundance_fig.add_trace(go.Pie(
                labels=labels,
                values=values,
                marker=dict(colors=colors),
                textinfo='percent+label',
                hovertemplate="Function: %{label}<br>" +
                            "Abundance: %{value:.2e}<br>" +
                            "Percentage: %{percent}<br>" +
                            "<extra></extra>",
                #title=f"Abundance Distribution<br>{group}<br><br>"
            ))
            
            pie_figs.extend([count_fig, abundance_fig])
            
        return pie_figs
        
    def _get_color_sequence(self, n_colors):
        """Get color sequence based on selected scheme"""
        if self.color_scheme.value.lower() in ['rainbow', 'hsv']:
            return [f'hsl({h},70%,60%)' for h in np.linspace(0, 330, n_colors)]
        
        try:
            color_sequence = getattr(px.colors.sequential, self.color_scheme.value, None)
            if color_sequence is None:
                color_sequence = getattr(px.colors.diverging, self.color_scheme.value, None)
            
            if color_sequence:
                if n_colors >= len(color_sequence):
                    indices = np.linspace(0, len(color_sequence)-1, n_colors)
                    return [color_sequence[int(i)] for i in indices]
                else:
                    return color_sequence[:n_colors]
        except:
            pass
            
        return [f'hsl({h},70%,60%)' for h in np.linspace(0, 330, n_colors)]
    
    def plot_stacked_bioactive_peptides(self, unique_function_absorbance):
        """Generate interactive Plotly stacked bar plots for bioactive peptides"""
        if not unique_function_absorbance:
            return None, None
            
        # Get all unique functions present in the data
        all_functions = set()
        for group_data in unique_function_absorbance.values():
            all_functions.update(group_data.keys())
            
        # Filter functions to only include those in our predefined list that are also in the data
        functions = [f for f in self.function_list if f in all_functions]
        
        # Add any functions that are in the data but not in our predefined list
        additional_functions = sorted([f for f in all_functions if f not in self.function_list])
        functions.extend(additional_functions)
        
        # Prepare data
        groups = list(unique_function_absorbance.keys())
        
        # Get colors for all functions
        colors = self._get_color_sequence(len(functions))
        
        # Calculate abundances
        plot_data = {func: [] for func in functions}
        total_abundances = []
        
        for group in groups:
            total = 0
            for func in functions:
                abundance = unique_function_absorbance[group].get(func, 0)
                plot_data[func].append(abundance)
                total += abundance
            total_abundances.append(total)

        # Create absolute abundance plot
        fig1 = go.Figure()
        for idx, func in enumerate(functions):
            hover_text = [
                f"Function: {func}<br>" +
                f"Sample: {group}<br>" +
                f"Absolute Abundance: {abundance:.2e}"
                for group, abundance in zip(groups, plot_data[func])
            ]
            
            fig1.add_trace(go.Bar(
                name=func,
                x=groups,
                y=plot_data[func],
                marker_color=colors[idx],
                hovertext=hover_text,
                hoverinfo='text'
            ))

        # Create relative abundance plot
        fig2 = go.Figure()
        for idx, func in enumerate(functions):
            relative_values = [
                100 * abundance / total if total > 0 else 0
                for abundance, total in zip(plot_data[func], total_abundances)
            ]
            
            hover_text = [
                f"Function: {func}<br>" +
                f"Sample: {group}<br>" +
                f"Relative Abundance: {value:.1f}%"
                for group, value in zip(groups, relative_values)
            ]
            
            fig2.add_trace(go.Bar(
                name=func,
                x=groups,
                y=relative_values,
                marker_color=colors[idx],
                hovertext=hover_text,
                hoverinfo='text'
            ))

        # Update layout for both plots
        for fig, title, yaxis_title in [
            (fig1, 'Distribution of Bioactive Peptides by Function', self.ylabel_widget.value),
            (fig2, 'Relative Distribution of Bioactive Peptides by Function', 'Relative Abundance (%)')
        ]:
            fig.update_layout(
                barmode='stack',
                title={
                    'text': title,
                    'y': 0.95,
                    'x': 0.5,
                    'xanchor': 'center',
                    'yanchor': 'top'
                },
                xaxis_title=self.xlabel_widget.value,
                yaxis_title=yaxis_title,
                legend_title="Bioactive Function",
                legend={'yanchor': "top", 'y': 1, 'xanchor': "left", 'x': 1.05},
                showlegend=True,
                template='plotly_white',
                height=600,
                width=1000,
                margin=dict(t=100, l=100, r=200),
                hoverlabel=dict(
                    bgcolor="white",
                    font_size=12,
                    font_family="Arial"
                )
            )
            
            fig.update_xaxes(
                tickangle=45,
                title_font={"size": 14},
                tickfont={"size": 12}
            )
            
            fig.update_yaxes(
                title_font={"size": 14},
                tickfont={"size": 12}
            )
            
            fig1.update_yaxes(
                title_font={"size": 14},
                tickfont={"size": 12},
                exponentformat='E',
                showexponent='all'
            )            

        return fig1, fig2


    def _on_plot_button_click(self, b):
        """Handle plot button click"""
        if not self.group_select.value:
            with self.info_output:
                self.info_output.clear_output(wait=True)
                display(HTML("<b style='color:red'>Please select at least one group.</b>"))
            return
            
        with self.plot_output:
            self.plot_output.clear_output(wait=True)
            
            # Process and plot data
            unique_function_absorbance = self._process_bioactive_data()
            if unique_function_absorbance:
                # Filter data for selected groups
                selected_groups = list(self.group_select.value)
                filtered_absorbance = {k: v for k, v in unique_function_absorbance.items() 
                                    if k in selected_groups}
                
                plot_type = self.plot_type.value
                
                if plot_type in ['All Plots']:
                    # Create bar plots
                    self.current_fig_abs, self.current_fig_rel = self.plot_stacked_bioactive_peptides(
                        filtered_absorbance
                    )
                    self.current_fig_abs.show()
                    self.current_fig_rel.show()
                            
                if plot_type in ['Bar Plots Only']:
                    # Create bar plots
                    self.current_fig_abs, self.current_fig_rel = self.plot_stacked_bioactive_peptides(
                        filtered_absorbance
                    )
                    if self.current_fig_abs is not None:
                        # Show only selected bar plot type
                        if self.bar_plot_type.value == 'Absolute Abundance':
                            self.current_fig_abs.show()
                        else:
                            self.current_fig_rel.show()    
                            
                if plot_type in ['All Plots', 'Pie Charts Only']:
                    pie_charts = self.create_pie_charts(filtered_absorbance)
                    if pie_charts:
                        for i in range(0, len(pie_charts), 2):
                            if i + 1 < len(pie_charts):
                                fig = go.Figure()
                                
                                # Add the first pie chart (Count) with more space
                                for trace in pie_charts[i].data:
                                    trace.domain = {'row': 0, 'column': 0, 'x': [0, 0.45]}  # Reduced width
                                    fig.add_trace(trace)
                                
                                # Add the second pie chart (Abundance) with more space
                                for trace in pie_charts[i+1].data:
                                    trace.domain = {'row': 0, 'column': 1, 'x': [0.55, 1]}  # Increased gap
                                    fig.add_trace(trace)
                                
                                group_name = selected_groups[i//2]
                                fig.update_layout(
                                    height=700,
                                    width=1600,  # Increased overall width
                                    margin=dict(t=150, b=50, l=100, r=100),  # Increased side margins
                                    annotations=[
                                        dict(
                                            text=f"Count Distribution<br>{group_name}",
                                            x=0.225,  # Adjusted x position
                                            y=1.1,
                                            font=dict(size=20),
                                            showarrow=False,
                                            xanchor='center',
                                            yanchor='bottom'
                                        ),
                                        dict(
                                            text=f"Abundance Distribution<br>{group_name}",
                                            x=0.775,  # Adjusted x position
                                            y=1.1,
                                            font=dict(size=20),
                                            showarrow=False,
                                            xanchor='center',
                                            yanchor='bottom'
                                        )
                                    ],
                                    showlegend=False
                                )
                                
                                # Update font sizes and text positioning
                                fig.update_traces(
                                    textfont_size=14,
                                    textposition='outside',
                                    pull=0.02  # Slightly separate slices for better label spacing
                                )
                                
                                fig.show()
                
                # Enable the export and download buttons
                self.export_button.disabled = False
                self.download_plot_button.disabled = False
            else:
                print("No bioactive data available for plotting.")
                self.export_button.disabled = True
                self.download_plot_button.disabled = True
                
    def _process_export_data(self):
        """Process data for export into Excel format"""
        unique_function_absorbance = self._process_bioactive_data()
        if not unique_function_absorbance:
            return None
            
        # Get all groups and functions
        groups = list(unique_function_absorbance.keys())
        all_functions = set()
        for group_data in unique_function_absorbance.values():
            all_functions.update(group_data.keys())
            
        # Calculate function counts
        df = self.data_transformer.merged_df
        summed_function_count = {}
        unique_function_counts = {}
        unique_function_count_averages = {}
        summed_function_abundance = {}
        
        for group in groups:
            abundance_column = f'Avg_{group}'
            if abundance_column not in df.columns:
                continue
                
            # Filter and process data
            temp_df = df[['unique ID', 'function', abundance_column]].copy()
            temp_df = temp_df[
                (temp_df[abundance_column] != 0) & 
                temp_df[abundance_column].notna() &
                temp_df['function'].notna()
            ]
            
            # Drop duplicates and calculate counts
            filtered_df = temp_df.drop_duplicates(subset='unique ID')
            unique_peptide_count = filtered_df['unique ID'].nunique()
            total_sum = filtered_df[abundance_column].sum()
            
            # Store the totals
            summed_function_abundance[group] = total_sum
            summed_function_count[group] = unique_peptide_count
            
            # Process functions
            filtered_df.loc[:, 'function'] = filtered_df['function'].fillna('').str.split(';')
            exploded_df = filtered_df.explode('function')
            exploded_df.loc[:, 'function'] = exploded_df['function'].str.strip()
            exploded_df = exploded_df[exploded_df['function'] != '']
            
            if not exploded_df.empty:
                # Count functions
                function_counts = exploded_df['function'].value_counts().to_dict()
                unique_function_counts[group] = function_counts
                
                # Calculate averages (using 1 since we're using averaged columns)
                function_averages = {func: count for func, count in function_counts.items()}
                unique_function_count_averages[group] = function_averages
        
        # Create DataFrames for export
        peptide_count_df = pd.DataFrame.from_dict(
            summed_function_count,
            orient='index',
            columns=['Counts of peptides']
        )
        
        function_count_df = pd.DataFrame.from_dict(
            unique_function_counts,
            orient='index'
        ).fillna(0).astype(int)
        
        combined_count_df = pd.concat([peptide_count_df, function_count_df], axis=1).T
        
        # Create abundance DataFrames
        peptide_absorbance_df = pd.DataFrame.from_dict(
            summed_function_abundance,
            orient='index',
            columns=['Summed Abundance']
        )
        
        function_absorbance_df = pd.DataFrame.from_dict(
            unique_function_absorbance,
            orient='index'
        ).fillna(0)
        
        combined_absorbance_df = pd.concat(
            [peptide_absorbance_df, function_absorbance_df],
            axis=1
        ).T
        
        # Create combined DataFrame with formatted values
        combined_df = pd.DataFrame(
            index=combined_absorbance_df.index,
            columns=combined_absorbance_df.columns
        )
        
        for col in combined_absorbance_df.columns:
            for idx in combined_absorbance_df.index:
                abundance = combined_absorbance_df.loc[idx, col]
                count = (combined_count_df.loc['Counts of peptides', col]
                        if idx == 'Summed Abundance'
                        else combined_count_df.loc[idx, col])
                combined_df.loc[idx, col] = "-" if (abundance == 0 and count == 0) else f"{abundance:.2e} ({round(count)})"
        
        combined_df.rename(index={'Summed Abundance': 'Total'}, inplace=True)
        
        return combined_df, combined_count_df, combined_absorbance_df

    
    def _on_download_plot_click(self, b):
        """Handle plot download based on selected plot type"""
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        
        with self.export_output:
            self.export_output.clear_output(wait=True)
            
            # Handle different plot types
            if self.plot_type.value == 'Bar Plots Only':
                # Download only selected bar plot type
                if self.bar_plot_type.value == 'Absolute Abundance' and self.current_fig_abs is not None:
                    display(HTML(f'''
                        <div id="download_container_{timestamp}">
                            <a id="download_link_{timestamp}" 
                               href="data:text/html;charset=utf-8;base64,{base64.b64encode(self.current_fig_abs.to_html().encode()).decode()}" 
                               download="bioactive_absolute_abundance_{timestamp}.html"
                               style="display: none;"></a>
                            <div style="color: green; padding: 10px;">Starting download of absolute abundance plot...</div>
                            <script>
                                document.getElementById('download_link_{timestamp}').click();
                            </script>
                        </div>
                    '''))
                elif self.bar_plot_type.value == 'Relative Abundance' and self.current_fig_rel is not None:
                    display(HTML(f'''
                        <div id="download_container_{timestamp}">
                            <a id="download_link_{timestamp}" 
                               href="data:text/html;charset=utf-8;base64,{base64.b64encode(self.current_fig_rel.to_html().encode()).decode()}" 
                               download="bioactive_relative_abundance_{timestamp}.html"
                               style="display: none;"></a>
                            <div style="color: green; padding: 10px;">Starting download of relative abundance plot...</div>
                            <script>
                                document.getElementById('download_link_{timestamp}').click();
                            </script>
                        </div>
                    '''))
                
            elif self.plot_type.value == 'Pie Charts Only':
                # Process and recreate the pie charts for download
                unique_function_absorbance = self._process_bioactive_data()
                if unique_function_absorbance:
                    selected_groups = list(self.group_select.value)
                    filtered_absorbance = {k: v for k, v in unique_function_absorbance.items() 
                                        if k in selected_groups}
                    pie_charts = self.create_pie_charts(filtered_absorbance)
                    
                    if pie_charts:
                        # Create download links for each group's combined pie charts
                        for i in range(0, len(pie_charts), 2):
                            if i + 1 < len(pie_charts):
                                group_name = selected_groups[i//2]
                                # Create combined figure
                                fig = go.Figure()
                                
                                # Add count pie chart
                                for trace in pie_charts[i].data:
                                    trace.domain = {'row': 0, 'column': 0, 'x': [0, 0.48]}
                                    fig.add_trace(trace)
                                
                                # Add abundance pie chart
                                for trace in pie_charts[i+1].data:
                                    trace.domain = {'row': 0, 'column': 1, 'x': [0.52, 1]}
                                    fig.add_trace(trace)
                                
                                # Update layout
                                fig.update_layout(
                                    height=700,
                                    width=1400,
                                    margin=dict(t=150, b=50, l=50, r=50),
                                    annotations=[
                                        dict(
                                            text=f"Count Distribution<br>{group_name}",
                                            x=0.24,
                                            y=1.1,
                                            font=dict(size=20),
                                            showarrow=False,
                                            xanchor='center',
                                            yanchor='bottom'
                                        ),
                                        dict(
                                            text=f"Abundance Distribution<br>{group_name}",
                                            x=0.76,
                                            y=1.1,
                                            font=dict(size=20),
                                            showarrow=False,
                                            xanchor='center',
                                            yanchor='bottom'
                                        )
                                    ],
                                    showlegend=False
                                )
                                
                                # Update font sizes
                                fig.update_traces(
                                    textfont_size=14,
                                    textposition='outside'
                                )
                                
                                # Create download link for this group's pie charts
                                display(HTML(f'''
                                    <div id="download_container_{timestamp}_{i}">
                                        <a id="download_link_{timestamp}_{i}" 
                                           href="data:text/html;charset=utf-8;base64,{base64.b64encode(fig.to_html().encode()).decode()}" 
                                           download="bioactive_pie_charts_{group_name}_{timestamp}.html"
                                           style="display: none;"></a>
                                        <div style="color: green; padding: 10px;">Starting download of pie charts for {group_name}...</div>
                                        <script>
                                            document.getElementById('download_link_{timestamp}_{i}').click();
                                        </script>
                                    </div>
                                '''))
                
            elif self.plot_type.value == 'All Plots':
                # Process data for pie charts
                unique_function_absorbance = self._process_bioactive_data()
                if unique_function_absorbance:
                    selected_groups = list(self.group_select.value)
                    filtered_absorbance = {k: v for k, v in unique_function_absorbance.items() 
                                        if k in selected_groups}
                    pie_charts = self.create_pie_charts(filtered_absorbance)

                    # Create a container for all downloads
                    display(HTML(f'''
                        <div id="download_container_{timestamp}">
                            <!-- Bar plot downloads -->
                            <a id="download_link_abs_{timestamp}" 
                               href="data:text/html;charset=utf-8;base64,{base64.b64encode(self.current_fig_abs.to_html().encode()).decode()}" 
                               download="bioactive_absolute_abundance_{timestamp}.html"
                               style="display: none;"></a>
                            <a id="download_link_rel_{timestamp}" 
                               href="data:text/html;charset=utf-8;base64,{base64.b64encode(self.current_fig_rel.to_html().encode()).decode()}" 
                               download="bioactive_relative_abundance_{timestamp}.html"
                               style="display: none;"></a>
                            
                            <div style="color: green; padding: 10px;">Starting downloads...</div>
                            <script>
                                // Function to create and trigger pie chart downloads
                                function downloadPieCharts() {{
                                    // Content will be added by subsequent display commands
                                }}
                                
                                // Download bar plots first
                                document.getElementById('download_link_abs_{timestamp}').click();
                                setTimeout(() => {{
                                    document.getElementById('download_link_rel_{timestamp}').click();
                                    // Start pie chart downloads after bar plots
                                    setTimeout(downloadPieCharts, 500);
                                }}, 500);
                            </script>
                        </div>
                    '''))

                    # Add pie chart downloads
                    if pie_charts:
                        for i in range(0, len(pie_charts), 2):
                            if i + 1 < len(pie_charts):
                                group_name = selected_groups[i//2]
                                
                                # Create combined figure for each group
                                fig = go.Figure()
                                
                                # Add count pie chart
                                for trace in pie_charts[i].data:
                                    trace.domain = {'row': 0, 'column': 0, 'x': [0, 0.45]}
                                    fig.add_trace(trace)
                                
                                # Add abundance pie chart
                                for trace in pie_charts[i+1].data:
                                    trace.domain = {'row': 0, 'column': 1, 'x': [0.55, 1]}
                                    fig.add_trace(trace)
                                
                                # Update layout
                                fig.update_layout(
                                    height=700,
                                    width=1600,
                                    margin=dict(t=150, b=50, l=100, r=100),
                                    annotations=[
                                        dict(
                                            text=f"Count Distribution<br>{group_name}",
                                            x=0.225,
                                            y=1.1,
                                            font=dict(size=20),
                                            showarrow=False,
                                            xanchor='center',
                                            yanchor='bottom'
                                        ),
                                        dict(
                                            text=f"Abundance Distribution<br>{group_name}",
                                            x=0.775,
                                            y=1.1,
                                            font=dict(size=20),
                                            showarrow=False,
                                            xanchor='center',
                                            yanchor='bottom'
                                        )
                                    ],
                                    showlegend=False
                                )
                                
                                # Update font sizes
                                fig.update_traces(
                                    textfont_size=14,
                                    textposition='outside',
                                    pull=0.02
                                )
                                
                                # Add download for this group's pie charts
                                display(HTML(f'''
                                    <div id="pie_download_{timestamp}_{i}">
                                        <a id="download_link_pie_{timestamp}_{i}" 
                                           href="data:text/html;charset=utf-8;base64,{base64.b64encode(fig.to_html().encode()).decode()}" 
                                           download="bioactive_pie_charts_{group_name}_{timestamp}.html"
                                           style="display: none;"></a>
                                        <script>
                                            setTimeout(() => {{
                                                document.getElementById('download_link_pie_{timestamp}_{i}').click();
                                            }}, {1000 + (i * 500)});
                                        </script>
                                    </div>
                                '''))
    def _on_export_button_click(self, b):
        """Handle data export"""
        try:
            # Process the data
            results = self._process_export_data()
            if results is None:
                with self.export_output:
                    self.export_output.clear_output(wait=True)
                    display(HTML('<div style="color: red; padding: 10px;">No bioactive data to export.</div>'))
                return
                
            combined_df, combined_count_df, combined_absorbance_df = results
            
            # Create Excel file in memory
            output = io.BytesIO()
            with pd.ExcelWriter(output, engine='openpyxl') as writer:
                combined_df.to_excel(writer, sheet_name='combined', index=True)
                combined_count_df.to_excel(writer, sheet_name='count', index=True)
                combined_absorbance_df.to_excel(writer, sheet_name='absorbance', index=True)
            
            # Get the value of the BytesIO buffer
            excel_data = output.getvalue()
            
            # Generate filename
            timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
            filename = f"Processed_mbpdb_results_{timestamp}.xlsx"
            
            # Create download link
            with self.export_output:
                self.export_output.clear_output(wait=True)
                display(HTML(f'''
                    <div id="export_container_{timestamp}">
                        <a id="export_link_{timestamp}" 
                           href="data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64,{base64.b64encode(excel_data).decode()}" 
                           download="{filename}"
                           style="display: none;"></a>
                        <div style="color: green; padding: 10px;">Starting download of {filename}...</div>
                    </div>
                    <script>
                        document.getElementById('export_link_{timestamp}').click();
                    </script>
                '''))
                
        except Exception as e:
            with self.export_output:
                self.export_output.clear_output(wait=True)
                display(HTML(f'<div style="color: red; padding: 10px;">Error exporting data: {str(e)}</div>'))
    def display(self):
        """Display the bioactive peptide analysis interface"""
        display(self.widget_box)

In [5]:
# Initialize the interface
data_transformer = DataTransformation()
data_transformer.setup_data_loading_ui()

# Create bioactive plotter
bioactive_plotter = BioactivePlotter(data_transformer)
bioactive_plotter.display()

GridBox(children=(VBox(children=(HTML(value='<h3><u>Upload Data File:</u></h3>'), HBox(children=(FileUpload(va…

VBox(children=(HTML(value='<h3><u>Bioactive Peptide Plot Controls:</u></h3>'), VBox(children=(HTML(value='<h4>…