# HySprint Sample Attribution Management Dashboard

This dashboard allows you to manage and override attributions for HySprint samples in NOMAD. It provides the following key features:

1. **Authentication**: Connect to a NOMAD Oasis instance using your credentials
2. **Data Retrieval**: Fetch HySprint sample data from NOMAD
3. **Attribution Management**: Override sample attributions locally
4. **Visualization**: View statistics and charts about sample attributions and uploads

The dashboard uses a tab-based interface for easy navigation between different functions.

## Import Required Libraries

In [5]:
# Import necessary libraries
import ipywidgets as widgets
from ipywidgets import HBox, VBox, Button, Text, Password, Label, Dropdown
from IPython.display import display, clear_output, FileLink
import time # For simulation delay
import os   # To read environment variables
import json # To handle JSON responses and local files
import pandas as pd # For data manipulation
import numpy as np  # For numerical operations
import matplotlib.pyplot as plt # For creating charts
import seaborn as sns # For enhanced visualizations
import plotly.express as px # For interactive plots
import plotly.graph_objects as go # For additional plot types
from pathlib import Path # For file path handling
from datetime import datetime, timedelta # For date operations
import re # For regular expressions
import requests # For API calls

# Configure matplotlib for better display in notebook
%matplotlib inline
plt.style.use('seaborn-v0_8-whitegrid')

# Import the NOMAD authentication module from the external notebook
%run 'nomad_auth.ipynb'

## Define Utility Functions

In [6]:
# Define utility functions for data handling and visualization
# These functions are specific to this dashboard and not part of the auth module

# --- Data Retrieval Functions ---

def get_hysprint_data(client, max_entries=500):
    """
    Retrieve HySprint sample data from NOMAD
    
    Parameters:
    -----------
    client: NomadClient
        Authenticated NOMAD API client
    max_entries: int
        Maximum number of entries to retrieve
        
    Returns:
    --------
    pandas.DataFrame
        DataFrame containing HySprint sample data
    """
    try:
        # Placeholder for actual API call
        # This would be replaced with the actual NOMAD API call
        # In a real implementation, you would use the client.get_entries() method
        
        # Simulate data retrieval
        print(f"Retrieving up to {max_entries} HySprint samples...")
        time.sleep(1)  # Simulate network delay
        
        # Create a sample DataFrame with mock data
        # In a real implementation, this would be populated with API response data
        import pandas as pd
        import numpy as np
        from datetime import datetime, timedelta
        
        # Generate sample data
        n_samples = min(max_entries, 100)  # Cap at 100 for demonstration
        
        # Sample authors
        authors = ['John Smith', 'Maria Garcia', 'Ahmed Hassan', 'Sophia Chen', 'David Mueller', 'Ana Patel']
        
        # Generate random dates within the last year
        today = datetime.now()
        date_start = today - timedelta(days=365)
        random_dates = [date_start + timedelta(days=np.random.randint(0, 365)) for _ in range(n_samples)]
        date_strings = [date.strftime('%Y-%m-%d') for date in random_dates]
        
        # Generate sample IDs
        upload_ids = [f"upload_{100000 + i}" for i in range(n_samples)]
        sample_names = [f"Sample_{i+1}" for i in range(n_samples)]
        lab_ids = [f"LAB{2023000 + i}" for i in range(n_samples)]
        
        # Create DataFrame
        df = pd.DataFrame({
            'upload_id': upload_ids,
            'sample_name': sample_names,
            'lab_id': lab_ids,
            'upload_date': date_strings,
            'main_author': np.random.choice(authors, size=n_samples),
            'cell_area': np.random.uniform(0.5, 2.0, size=n_samples).round(3),
            'efficiency': np.random.uniform(5, 25, size=n_samples).round(2)
        })
        
        print(f"Retrieved {len(df)} samples")
        return df
        
    except Exception as e:
        print(f"Error retrieving HySprint data: {str(e)}")
        return None

# --- Attribution Management Functions ---

def load_attributions(filename='nomad_samples_with_authors.csv'):
    """
    Load attribution overrides from file
    
    Parameters:
    -----------
    filename: str
        Path to the attribution file
        
    Returns:
    --------
    dict
        Dictionary of attribution overrides
    """
    attributions = {}
    try:
        # Check if file exists
        if not os.path.exists(filename):
            print(f"Attribution file {filename} not found. Starting with empty attributions.")
            return attributions
            
        # Load attributions from CSV
        df = pd.read_csv(filename)
        
        # Convert to dictionary
        for _, row in df.iterrows():
            if 'upload_id' in row and 'main_author' in row:
                attributions[row['upload_id']] = {
                    'main_author': row['main_author'],
                    'override_date': row.get('override_date', datetime.now().strftime('%Y-%m-%d'))
                }
                
        print(f"Loaded {len(attributions)} attribution overrides")
        return attributions
        
    except Exception as e:
        print(f"Error loading attributions: {str(e)}")
        return attributions

def save_attributions(attributions, filename='nomad_samples_with_authors.csv'):
    """
    Save attribution overrides to file
    
    Parameters:
    -----------
    attributions: dict
        Dictionary of attribution overrides
    filename: str
        Path to save the attribution file
        
    Returns:
    --------
    bool
        True if successful, False otherwise
    """
    try:
        # Convert to DataFrame
        data = []
        for upload_id, attr_info in attributions.items():
            data.append({
                'upload_id': upload_id,
                'main_author': attr_info.get('main_author', ''),
                'override_date': attr_info.get('override_date', datetime.now().strftime('%Y-%m-%d'))
            })
            
        df = pd.DataFrame(data)
        
        # Save to CSV
        df.to_csv(filename, index=False)
        print(f"Saved {len(attributions)} attribution overrides to {filename}")
        return True
        
    except Exception as e:
        print(f"Error saving attributions: {str(e)}")
        return False

# --- Visualization Functions ---

def create_sample_table(df, attributions):
    """
    Create an interactive table for sample attribution management
    
    Parameters:
    -----------
    df: pandas.DataFrame
        DataFrame containing sample data
    attributions: dict
        Dictionary of attribution overrides
        
    Returns:
    --------
    ipywidgets.Widget
        Interactive table widget
    """
    # For demonstration, we'll create a simplified table
    # In a real implementation, you would use a more sophisticated table widget
    
    # Create a container for the table
    table_container = widgets.VBox()
    rows = []
    
    # Create header row
    header = widgets.HBox([
        widgets.Label('Upload ID', layout=widgets.Layout(width='150px')),
        widgets.Label('Sample Name', layout=widgets.Layout(width='150px')),
        widgets.Label('Current Author', layout=widgets.Layout(width='150px')),
        widgets.Label('Override Author', layout=widgets.Layout(width='200px')),
        widgets.Label('Actions', layout=widgets.Layout(width='100px'))
    ], layout=widgets.Layout(margin='5px 0', font_weight='bold'))
    rows.append(header)
    
    # Create rows for each sample (limit to first 20 for performance)
    for _, row in df.head(20).iterrows():
        upload_id = row['upload_id']
        current_author = row['main_author']
        
        # Create input for override author
        override_input = widgets.Text(
            value=attributions.get(upload_id, {}).get('main_author', ''),
            placeholder='Enter override author',
            layout=widgets.Layout(width='200px')
        )
        
        # Create save button
        save_button = widgets.Button(
            description='Save',
            button_style='primary',
            layout=widgets.Layout(width='80px')
        )
        
        # Define save button click handler
        def make_save_handler(btn_upload_id, btn_input):
            def save_handler(b):
                # Get the override value
                override_value = btn_input.value.strip()
                
                # Update attributions
                if override_value:
                    attributions[btn_upload_id] = {
                        'main_author': override_value,
                        'override_date': datetime.now().strftime('%Y-%m-%d')
                    }
                    btn_input.style.background = '#d4f7d4'  # Light green
                else:
                    # Remove attribution if empty
                    if btn_upload_id in attributions:
                        del attributions[btn_upload_id]
                    btn_input.style.background = ''  # Reset background
            return save_handler
        
        save_button.on_click(make_save_handler(upload_id, override_input))
        
        # Highlight overridden values
        if upload_id in attributions and attributions[upload_id].get('main_author'):
            override_input.style.background = '#d4f7d4'  # Light green
        
        # Create row
        row_widget = widgets.HBox([
            widgets.Label(upload_id, layout=widgets.Layout(width='150px')),
            widgets.Label(row['sample_name'], layout=widgets.Layout(width='150px')),
            widgets.Label(current_author, layout=widgets.Layout(width='150px')),
            override_input,
            save_button
        ], layout=widgets.Layout(margin='2px 0'))
        
        rows.append(row_widget)
    
    # Create note about pagination
    if len(df) > 20:
        note = widgets.HTML(f"<em>Note: Showing first 20 of {len(df)} samples.</em>")
        rows.append(note)
        
    # Combine all rows into the container
    table_container.children = rows
    return table_container

def create_author_charts(df, attributions):
    """
    Create charts for author attribution analysis
    
    Parameters:
    -----------
    df: pandas.DataFrame
        DataFrame containing sample data
    attributions: dict
        Dictionary of attribution overrides
        
    Returns:
    --------
    ipywidgets.Widget
        Container with charts
    """
    # Create a copy of the dataframe
    df_with_overrides = df.copy()
    
    # Apply attribution overrides
    for upload_id, attr_info in attributions.items():
        if upload_id in df_with_overrides['upload_id'].values:
            idx = df_with_overrides.index[df_with_overrides['upload_id'] == upload_id].tolist()[0]
            df_with_overrides.at[idx, 'main_author'] = attr_info.get('main_author', df_with_overrides.at[idx, 'main_author'])
    
    # Container for charts
    charts_container = widgets.VBox()
    outputs = []
    
    # Original author distribution
    orig_output = widgets.Output()
    with orig_output:
        orig_counts = df['main_author'].value_counts()
        plt.figure(figsize=(8, 5))
        plt.bar(orig_counts.index, orig_counts.values, color='skyblue')
        plt.xticks(rotation=45, ha='right')
        plt.title('Original Author Distribution')
        plt.xlabel('Author')
        plt.ylabel('Number of Samples')
        plt.tight_layout()
        plt.show()
    outputs.append(orig_output)
    
    # Updated author distribution (with overrides)
    updated_output = widgets.Output()
    with updated_output:
        updated_counts = df_with_overrides['main_author'].value_counts()
        plt.figure(figsize=(8, 5))
        plt.bar(updated_counts.index, updated_counts.values, color='lightgreen')
        plt.xticks(rotation=45, ha='right')
        plt.title('Author Distribution with Overrides')
        plt.xlabel('Author')
        plt.ylabel('Number of Samples')
        plt.tight_layout()
        plt.show()
    outputs.append(updated_output)
    
    # Time series of uploads by author
    ts_output = widgets.Output()
    with ts_output:
        # Convert dates to datetime
        df_with_overrides['upload_date'] = pd.to_datetime(df_with_overrides['upload_date'])
        
        # Group by date and author
        df_grouped = df_with_overrides.groupby([df_with_overrides['upload_date'].dt.to_period('M'), 'main_author']).size().unstack(fill_value=0)
        
        # Plot time series
        plt.figure(figsize=(10, 6))
        for col in df_grouped.columns:
            plt.plot(df_grouped.index.astype(str), df_grouped[col], marker='o', label=col)
        plt.legend(title='Author')
        plt.title('Sample Uploads Over Time by Author')
        plt.xlabel('Month')
        plt.ylabel('Number of Samples')
        plt.xticks(rotation=45, ha='right')
        plt.grid(True, linestyle='--', alpha=0.7)
        plt.tight_layout()
        plt.show()
    outputs.append(ts_output)
    
    # Combine charts
    charts_container.children = outputs
    return charts_container

## Authentication Tab

In [7]:
# Create the authentication tab using the imported nomad_auth module
def create_auth_tab():
    # Use the authentication UI from the imported nomad_auth module
    auth_ui = widgets.VBox([
        widgets.HTML("<h3>NOMAD Authentication</h3>"),
        widgets.HTML("<p>Please authenticate with NOMAD to access data.</p>"),
        general_settings_box  # This comes from the imported nomad_auth module
    ])
    
    # Create wrapper for authentication state that other tabs can use
    auth_state = {
        'is_authenticated': lambda: api_client is not None,
        'token': lambda: current_token,
        'user_info': lambda: current_user_info,
        'client': lambda: api_client,
        'oasis': lambda: oasis_dropdown.value if oasis_dropdown.value else None,
        'oasis_url': lambda: oasis_options.get(oasis_dropdown.value, None) if oasis_dropdown.value else None
    }
    
    # Return the authentication UI and state
    return auth_ui, auth_state

## Data Retrieval (get_hysprint_data) Implementation

In [8]:
# Define the data retrieval UI
def create_data_tab(auth_state):
    # Fetch button
    fetch_button = widgets.Button(
        description='Fetch HySprint Data',
        disabled=False,
        button_style='info',
        tooltip='Click to fetch HySprint sample data from NOMAD',
        icon='database'
    )
    
    # Max entries input
    max_entries = widgets.IntSlider(
        value=500,
        min=10,
        max=5000,
        step=10,
        description='Max entries:',
        disabled=False,
        continuous_update=False,
        orientation='horizontal',
        readout=True,
        readout_format='d',
        layout=widgets.Layout(width='50%')
    )
    
    # Status output
    status_output = widgets.Output()
    
    # Data store
    data_state = {
        'df': None,
        'attributions': load_attributions()
    }
    
    # Fetch button click handler
    def on_fetch_button_click(b):
        with status_output:
            clear_output()
            
            if not auth_state['is_authenticated']():
                print("❌ Please authenticate first")
                return
            
            print("Fetching HySprint sample data from NOMAD...")
            try:
                df = get_hysprint_data(auth_state['client'](), max_entries=max_entries.value)
                
                if df is None or df.empty:
                    print("❌ No data retrieved")
                else:
                    data_state['df'] = df
                    print(f"✓ Retrieved {len(df)} HySprint samples")
                    
                    # Display first 5 rows
                    print("\nPreview of retrieved data:")
                    display(df.head())
                    
            except Exception as e:
                print(f"❌ Error retrieving data: {str(e)}")
    
    fetch_button.on_click(on_fetch_button_click)
    
    # Combine widgets into a form
    data_ui = widgets.VBox([
        widgets.HTML("<h2>HySprint Data Retrieval</h2>"),
        widgets.HBox([max_entries, fetch_button]),
        status_output
    ])
    
    return data_ui, data_state

## Local Attribution Management

In [9]:
# Define the attribution management UI
def create_attribution_tab(data_state):
    # Save button
    save_button = widgets.Button(
        description='Save Attributions',
        disabled=False,
        button_style='success',
        tooltip='Save attribution changes to local file',
        icon='save'
    )
    
    # Reset button
    reset_button = widgets.Button(
        description='Reset Changes',
        disabled=False,
        button_style='danger',
        tooltip='Reset all attribution changes',
        icon='refresh'
    )
    
    # Status output
    status_output = widgets.Output()
    
    # Sample table container
    table_container = widgets.Output()
    
    # Update table function
    def update_table():
        with table_container:
            clear_output()
            if data_state['df'] is not None and not data_state['df'].empty:
                table_widget = create_sample_table(data_state['df'], data_state['attributions'])
                display(table_widget)
            else:
                display(widgets.HTML("<p>No data available. Please fetch data first.</p>"))
    
    # Save button click handler
    def on_save_button_click(b):
        with status_output:
            clear_output()
            if data_state['attributions']:
                success = save_attributions(data_state['attributions'])
                if success:
                    print(f"✓ Saved {len(data_state['attributions'])} attribution overrides")
                else:
                    print("❌ Failed to save attributions")
            else:
                print("ℹ️ No attributions to save")
    
    # Reset button click handler
    def on_reset_button_click(b):
        with status_output:
            clear_output()
            data_state['attributions'] = load_attributions()
            print("✓ Attributions reset to saved state")
            update_table()
    
    # Connect event handlers
    save_button.on_click(on_save_button_click)
    reset_button.on_click(on_reset_button_click)
    
    # Refresh button
    refresh_button = widgets.Button(
        description='Refresh Table',
        button_style='info',
        icon='sync'
    )
    
    def on_refresh_click(b):
        update_table()
        
    refresh_button.on_click(on_refresh_click)
    
    # Combine widgets into a form
    attribution_ui = widgets.VBox([
        widgets.HTML("<h2>Sample Attribution Management</h2>"),
        widgets.HBox([refresh_button, save_button, reset_button]),
        status_output,
        table_container
    ])
    
    # Initialize table
    update_table()
    
    return attribution_ui

## Visualization Tab (create_author_charts)

In [10]:
# Define the visualization tab
def create_visualization_tab(data_state):
    # Visualization container
    viz_container = widgets.Output()
    
    # Refresh button
    refresh_button = widgets.Button(
        description='Refresh Visualizations',
        button_style='info',
        icon='sync'
    )
    
    # Status output
    status_output = widgets.Output()
    
    # Update visualizations function
    def update_visualizations():
        with viz_container:
            clear_output()
            if data_state['df'] is not None and not data_state['df'].empty:
                charts = create_author_charts(data_state['df'], data_state['attributions'])
                display(charts)
            else:
                display(widgets.HTML("<p>No data available. Please fetch data first.</p>"))
    
    # Refresh button click handler
    def on_refresh_click(b):
        with status_output:
            clear_output()
            print("Refreshing visualizations...")
            update_visualizations()
            print("✓ Visualizations updated")
    
    refresh_button.on_click(on_refresh_click)
    
    # Combine widgets into a form
    viz_ui = widgets.VBox([
        widgets.HTML("<h2>HySprint Sample Visualizations</h2>"),
        refresh_button,
        status_output,
        viz_container
    ])
    
    # Initialize visualizations
    update_visualizations()
    
    return viz_ui

## Data Overview Tab

In [11]:
# Define the data overview tab
def create_overview_tab(data_state):
    # Overview container
    overview_container = widgets.Output()
    
    # Refresh button
    refresh_button = widgets.Button(
        description='Refresh Overview',
        button_style='info',
        icon='sync'
    )
    
    # Update overview function
    def update_overview():
        with overview_container:
            clear_output()
            if data_state['df'] is not None and not data_state['df'].empty:
                df = data_state['df']
                attributions = data_state['attributions']
                
                # Calculate statistics
                total_samples = len(df)
                override_count = len(attributions)
                unique_authors = df['main_author'].nunique()
                date_range = f"{df['upload_date'].min()} to {df['upload_date'].max()}"
                
                # Create HTML overview
                html_content = f"""
                <div style="padding: 20px; background-color: #f8f9fa; border-radius: 10px;">
                    <h3>📊 HySprint Sample Data Overview</h3>
                    <table style="width: 100%; border-collapse: collapse;">
                        <tr>
                            <td style="padding: 10px; border-bottom: 1px solid #ddd;"><strong>Total Samples:</strong></td>
                            <td style="padding: 10px; border-bottom: 1px solid #ddd;">{total_samples}</td>
                        </tr>
                        <tr>
                            <td style="padding: 10px; border-bottom: 1px solid #ddd;"><strong>Attribution Overrides:</strong></td>
                            <td style="padding: 10px; border-bottom: 1px solid #ddd;">{override_count}</td>
                        </tr>
                        <tr>
                            <td style="padding: 10px; border-bottom: 1px solid #ddd;"><strong>Unique Authors:</strong></td>
                            <td style="padding: 10px; border-bottom: 1px solid #ddd;">{unique_authors}</td>
                        </tr>
                        <tr>
                            <td style="padding: 10px; border-bottom: 1px solid #ddd;"><strong>Date Range:</strong></td>
                            <td style="padding: 10px; border-bottom: 1px solid #ddd;">{date_range}</td>
                        </tr>
                    </table>
                </div>
                """
                
                # Top authors chart
                author_counts = df['main_author'].value_counts().head(5)
                fig = plt.figure(figsize=(8, 5))
                plt.bar(author_counts.index, author_counts.values, color='lightblue')
                plt.xticks(rotation=45, ha='right')
                plt.xlabel('Author')
                plt.ylabel('Number of Samples')
                plt.title('Top 5 Authors by Sample Count')
                plt.tight_layout()
                
                # Display overview
                display(widgets.HTML(html_content))
                plt.show(fig)
                
            else:
                display(widgets.HTML("<p>No data available. Please fetch data first.</p>"))
    
    # Refresh button click handler
    def on_refresh_click(b):
        update_overview()
    
    refresh_button.on_click(on_refresh_click)
    
    # Combine widgets into a form
    overview_ui = widgets.VBox([
        widgets.HTML("<h2>Data Overview</h2>"),
        refresh_button,
        overview_container
    ])
    
    # Initialize overview
    update_overview()
    
    return overview_ui

## Error Handling and Status Messages

In [12]:
# General error handler for operations
def handle_error(operation, exception, output_widget):
    """
    Handle exceptions and display error messages
    
    Parameters:
    -----------
    operation: str
        Name of the operation that failed
    exception: Exception
        The exception that was raised
    output_widget: widgets.Output
        Output widget to display the error message
    """
    with output_widget:
        clear_output()
        error_message = f"❌ Error during {operation}: {str(exception)}"
        print(error_message)
        
        # Log more detailed error information
        import traceback
        traceback.print_exc()

# Status message display function
def show_status(message, output_widget, success=True):
    """
    Display a status message
    
    Parameters:
    -----------
    message: str
        Message to display
    output_widget: widgets.Output
        Output widget to display the message
    success: bool
        Whether the message indicates success or failure
    """
    with output_widget:
        clear_output()
        icon = "✓" if success else "❌"
        print(f"{icon} {message}")

## Search and Filtering

In [13]:
# Define the search and filtering UI
def create_search_tab(data_state):
    # Search input
    search_input = widgets.Text(
        value='',
        placeholder='Enter search term (sample name, lab ID, etc.)',
        description='Search:',
        disabled=False,
        layout=widgets.Layout(width='60%')
    )
    
    # Author filter
    author_filter = widgets.Dropdown(
        options=['All Authors'],
        value='All Authors',
        description='Author:',
        disabled=False,
        layout=widgets.Layout(width='40%')
    )
    
    # Date range filter using separate date pickers instead of DateRangeSlider
    start_date_picker = widgets.DatePicker(
        description='Start Date:',
        disabled=False,
        layout=widgets.Layout(width='40%')
    )
    
    end_date_picker = widgets.DatePicker(
        description='End Date:',
        disabled=False,
        layout=widgets.Layout(width='40%')
    )
    
    # Apply filter button
    apply_button = widgets.Button(
        description='Apply Filters',
        button_style='primary',
        icon='filter'
    )
    
    # Reset filters button
    reset_button = widgets.Button(
        description='Reset Filters',
        button_style='warning',
        icon='refresh'
    )
    
    # Results container
    results_container = widgets.Output()
    
    # Status output
    status_output = widgets.Output()
    
    # Update author dropdown options
    def update_author_options():
        if data_state['df'] is not None and not data_state['df'].empty:
            authors = ['All Authors'] + sorted(data_state['df']['main_author'].unique().tolist())
            author_filter.options = authors
            
            # Update date range
            if 'upload_date' in data_state['df'].columns:
                dates = pd.to_datetime(data_state['df']['upload_date'])
                if not dates.empty:
                    min_date = dates.min().to_pydatetime()
                    max_date = dates.max().to_pydatetime()
                    
                    # Add one day to max_date to include it in the range
                    max_date = max_date + timedelta(days=1)
                    
                    start_date_picker.value = min_date
                    end_date_picker.value = max_date
    
    # Apply filters function
    def apply_filters():
        with results_container:
            clear_output()
            
            if data_state['df'] is None or data_state['df'].empty:
                display(widgets.HTML("<p>No data available. Please fetch data first.</p>"))
                return
            
            # Start with the full dataset
            filtered_df = data_state['df'].copy()
            
            # Apply search filter
            search_term = search_input.value.lower()
            if search_term:
                mask = (
                    filtered_df['sample_name'].str.lower().str.contains(search_term, na=False) |
                    filtered_df['lab_id'].str.lower().str.contains(search_term, na=False) |
                    filtered_df['upload_id'].str.lower().str.contains(search_term, na=False)
                )
                filtered_df = filtered_df[mask]
            
            # Apply author filter
            if author_filter.value != 'All Authors':
                filtered_df = filtered_df[filtered_df['main_author'] == author_filter.value]
            
            # Apply date filter using the separate date pickers
            if start_date_picker.value is not None and end_date_picker.value is not None:
                start_date = pd.Timestamp(start_date_picker.value)
                end_date = pd.Timestamp(end_date_picker.value) + pd.Timedelta(days=1)  # Include end date
                
                filtered_df = filtered_df[
                    (pd.to_datetime(filtered_df['upload_date']) >= start_date) &
                    (pd.to_datetime(filtered_df['upload_date']) <= end_date)
                ]
            
            # Display results
            if filtered_df.empty:
                display(widgets.HTML("<p>No samples match the current filters.</p>"))
            else:
                display(widgets.HTML(f"<p>Showing {len(filtered_df)} samples</p>"))
                display(filtered_df[['upload_id', 'sample_name', 'lab_id', 'upload_date', 'main_author']])
    
    # Apply button click handler
    def on_apply_click(b):
        with status_output:
            clear_output()
            try:
                print("Applying filters...")
                apply_filters()
                print("✓ Filters applied")
            except Exception as e:
                print(f"❌ Error applying filters: {str(e)}")
    
    # Reset button click handler
    def on_reset_click(b):
        search_input.value = ''
        author_filter.value = 'All Authors'
        if data_state['df'] is not None and not data_state['df'].empty:
            dates = pd.to_datetime(data_state['df']['upload_date'])
            if not dates.empty:
                min_date = dates.min().to_pydatetime()
                max_date = dates.max().to_pydatetime() + timedelta(days=1)
                start_date_picker.value = min_date
                end_date_picker.value = max_date
        apply_filters()
    
    # Connect event handlers
    apply_button.on_click(on_apply_click)
    reset_button.on_click(on_reset_click)
    
    # Initialize author options
    update_author_options()
    
    # Combine widgets into a form
    search_ui = widgets.VBox([
        widgets.HTML("<h2>Search and Filter Samples</h2>"),
        widgets.HBox([search_input, author_filter]),
        widgets.HBox([start_date_picker, end_date_picker]),  # Two separate date pickers in a horizontal box
        widgets.HBox([apply_button, reset_button]),
        status_output,
        results_container
    ])
    
    return search_ui, update_author_options

## Data Export

In [14]:
# Define the data export tab
def create_export_tab(data_state):
    # Export format selection
    format_select = widgets.Dropdown(
        options=['CSV', 'Excel'],
        value='CSV',
        description='Format:',
        disabled=False,
        layout=widgets.Layout(width='30%')
    )
    
    # Include options
    include_options = widgets.Checkbox(
        value=True,
        description='Include attribution overrides',
        disabled=False
    )
    
    # Export button
    export_button = widgets.Button(
        description='Export Data',
        button_style='success',
        icon='download'
    )
    
    # File name input
    filename_input = widgets.Text(
        value='hysprint_samples',
        description='Filename:',
        disabled=False,
        layout=widgets.Layout(width='50%')
    )
    
    # Status output
    status_output = widgets.Output()
    
    # Export function
    def export_data():
        with status_output:
            clear_output()
            
            if data_state['df'] is None or data_state['df'].empty:
                print("❌ No data available to export")
                return
            
            # Create export dataframe
            export_df = data_state['df'].copy()
            
            # Apply attribution overrides if requested
            if include_options.value and data_state['attributions']:
                for upload_id, attr_info in data_state['attributions'].items():
                    if upload_id in export_df['upload_id'].values:
                        idx = export_df.index[export_df['upload_id'] == upload_id].tolist()[0]
                        export_df.at[idx, 'main_author'] = attr_info.get('main_author', export_df.at[idx, 'main_author'])
                        # Add a column to indicate overridden attributions
                        if 'attribution_overridden' not in export_df.columns:
                            export_df['attribution_overridden'] = False
                        export_df.at[idx, 'attribution_overridden'] = True
            
            # Generate filename with format
            base_filename = filename_input.value
            if not base_filename:
                base_filename = "hysprint_samples"
                
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            filename = f"{base_filename}_{timestamp}"
            
            try:
                if format_select.value == 'CSV':
                    full_filename = f"{filename}.csv"
                    export_df.to_csv(full_filename, index=False)
                else:  # Excel
                    full_filename = f"{filename}.xlsx"
                    export_df.to_excel(full_filename, index=False, engine='openpyxl')
                
                print(f"✓ Data exported successfully to {full_filename}")
                
                # Create a download link
                if os.path.exists(full_filename):
                    from IPython.display import FileLink
                    display(FileLink(full_filename, result_html_prefix="Click here to download: "))
            
            except Exception as e:
                print(f"❌ Error exporting data: {str(e)}")
    
    # Export button click handler
    def on_export_click(b):
        export_data()
    
    export_button.on_click(on_export_click)
    
    # Combine widgets into a form
    export_ui = widgets.VBox([
        widgets.HTML("<h2>Export Sample Data</h2>"),
        widgets.HBox([filename_input, format_select]),
        include_options,
        export_button,
        status_output
    ])
    
    return export_ui

## Main Dashboard

In [15]:
# Create main dashboard
def create_dashboard():
    # Create authentication tab
    auth_tab, auth_state = create_auth_tab()
    
    # Create data tab
    data_tab, data_state = create_data_tab(auth_state)
    
    # Create attribution tab
    attribution_tab = create_attribution_tab(data_state)
    
    # Create visualization tab
    viz_tab = create_visualization_tab(data_state)
    
    # Create search tab
    search_tab, update_author_options = create_search_tab(data_state)
    
    # Create export tab
    export_tab = create_export_tab(data_state)
    
    # Create overview tab
    overview_tab = create_overview_tab(data_state)
    
    # Create tab widget
    tab = widgets.Tab([auth_tab, data_tab, attribution_tab, viz_tab, overview_tab, search_tab, export_tab])
    tab.set_title(0, 'Authentication')
    tab.set_title(1, 'Data Retrieval')
    tab.set_title(2, 'Attribution Management')
    tab.set_title(3, 'Visualizations')
    tab.set_title(4, 'Overview')
    tab.set_title(5, 'Search & Filter')
    tab.set_title(6, 'Export Data')
    
    # Tab change handler to update data as needed
    def on_tab_change(change):
        if change['new'] == 2:  # Attribution tab
            # Update attribution table when tab is selected
            attribution_tab.children[-1].clear_output()
            with attribution_tab.children[-1]:
                if data_state['df'] is not None and not data_state['df'].empty:
                    table_widget = create_sample_table(data_state['df'], data_state['attributions'])
                    display(table_widget)
                else:
                    display(widgets.HTML("<p>No data available. Please fetch data first.</p>"))
        elif change['new'] == 3:  # Visualization tab
            # Update visualizations when tab is selected
            viz_tab.children[-1].clear_output()
            with viz_tab.children[-1]:
                if data_state['df'] is not None and not data_state['df'].empty:
                    charts = create_author_charts(data_state['df'], data_state['attributions'])
                    display(charts)
                else:
                    display(widgets.HTML("<p>No data available. Please fetch data first.</p>"))
        elif change['new'] == 5:  # Search tab
            update_author_options()
    
    tab.observe(on_tab_change, names='selected_index')
    
    # Create dashboard
    dashboard = widgets.VBox([
        widgets.HTML("""
        <div style="background-color: #4CAF50; color: white; padding: 10px; text-align: center; border-radius: 5px;">
            <h1>HySprint Sample Attribution Dashboard</h1>
            <p>A tool for managing NOMAD HySprint sample attributions</p>
        </div>
        """),
        tab
    ])
    
    return dashboard

In [16]:
# Initialize and display the dashboard
dashboard = create_dashboard()
display(dashboard)

Loaded 71 attribution overrides


VBox(children=(HTML(value='\n        <div style="background-color: #4CAF50; color: white; padding: 10px; text-…