# NOMAD Samples Dashboard

This dashboard provides a modern interface for managing and analyzing HySprint samples in NOMAD. Key features:

1. **Authentication**: Secure connection to NOMAD Oasis instances
2. **Sample Management**: View and manage HySprint sample data
3. **Author Attribution**: Track and override sample attributions
4. **Analytics**: Visualize sample statistics and trends

## Setup and Dependencies

In [1]:
# Import required libraries
import ipywidgets as widgets
from ipywidgets import HBox, VBox, Button, Text, Password, Label, Dropdown
from IPython.display import display, clear_output, FileLink
import os
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from pathlib import Path
from datetime import datetime, timedelta
import asyncio
from IPython.lib.backgroundjobs import BackgroundJobManager
jobs = BackgroundJobManager()

# Configure plotting
%matplotlib inline
plt.style.use('seaborn-v0_8-whitegrid')

# Import NOMAD authentication and data modules
%run 'nomad_auth.ipynb'
%run 'nomad_data_retrieval.ipynb'
from nomad_data import load_attributions, save_attributions

## Authentication Tab

In [2]:
def create_auth_tab():
    """Create the authentication tab using nomad_auth functionality"""
    auth_ui = widgets.VBox([
        widgets.HTML("<h3>NOMAD Authentication</h3>"),
        widgets.HTML("<p>Please authenticate with NOMAD to access data.</p>"),
        general_settings_box  # From nomad_auth.ipynb
    ])
    
    # Create wrapper for authentication state
    auth_state = {
        'is_authenticated': lambda: api_client is not None,
        'token': lambda: current_token,
        'user_info': lambda: current_user_info,
        'client': lambda: api_client,
        'oasis': lambda: oasis_dropdown.value if oasis_dropdown.value else None,
        'oasis_url': lambda: oasis_options.get(oasis_dropdown.value, None) if oasis_dropdown.value else None
    }
    
    return auth_ui, auth_state

## Data Retrieval Tab

In [3]:
def initialize_data_tab(auth_state):
    """Create the data retrieval tab using the modular nomad_data_retrieval functionality"""
    # Use the modular data retrieval component from nomad_data_retrieval.ipynb
    return create_data_tab(auth_state)

## Attribution Management Tab

In [4]:
def create_attribution_tab(data_state):
    """Create the attribution management tab"""
    # Save button
    save_button = widgets.Button(
        description='Save Attributions',
        disabled=False,
        button_style='success',
        tooltip='Save attribution changes to local file',
        icon='save'
    )
    
    # Reset button
    reset_button = widgets.Button(
        description='Reset Changes',
        disabled=False,
        button_style='danger',
        tooltip='Reset all attribution changes',
        icon='refresh'
    )
    
    # Status output
    status_output = widgets.Output()
    
    # Sample table container
    table_container = widgets.Output()
    
    # Update table function
    def update_table():
        with table_container:
            clear_output()
            if data_state['df'] is not None and not data_state['df'].empty:
                table_widget = create_sample_table(data_state['df'], data_state['attributions'])
                display(table_widget)
            else:
                display(widgets.HTML("<p>No data available. Please fetch data first.</p>"))
    
    # Save button click handler
    def on_save_button_click(b):
        with status_output:
            clear_output()
            if data_state['attributions']:
                success = save_attributions(data_state['attributions'])
                if success:
                    print(f"✓ Saved {len(data_state['attributions'])} attribution overrides")
                else:
                    print("❌ Failed to save attributions")
            else:
                print("ℹ️ No attributions to save")
    
    # Reset button click handler
    def on_reset_button_click(b):
        with status_output:
            clear_output()
            data_state['attributions'] = load_attributions()
            print("✓ Attributions reset to saved state")
            update_table()
    
    # Connect event handlers
    save_button.on_click(on_save_button_click)
    reset_button.on_click(on_reset_button_click)
    
    # Refresh button
    refresh_button = widgets.Button(
        description='Refresh Table',
        button_style='info',
        icon='sync'
    )
    
    def on_refresh_click(b):
        update_table()
        
    refresh_button.on_click(on_refresh_click)
    
    # Combine widgets into a form
    attribution_ui = widgets.VBox([
        widgets.HTML("<h2>Sample Attribution Management</h2>"),
        widgets.HBox([refresh_button, save_button, reset_button]),
        status_output,
        table_container
    ])
    
    # Initialize table
    update_table()
    
    return attribution_ui

## Visualization Tab

In [5]:
def create_visualization_tab(data_state):
    """Create the visualization tab"""
    # Visualization container
    viz_container = widgets.Output()
    
    # Refresh button
    refresh_button = widgets.Button(
        description='Refresh Visualizations',
        button_style='info',
        icon='sync'
    )
    
    # Status output
    status_output = widgets.Output()
    
    # Update visualizations function
    def update_visualizations():
        with viz_container:
            clear_output()
            if data_state['df'] is not None and not data_state['df'].empty:
                df = data_state['df']
                
                # Create author distribution plot using author names instead of IDs
                plt.figure(figsize=(10, 6))
                # Use author_name column if available, otherwise fall back to main_author
                if 'author_name' in df.columns:
                    author_counts = df['author_name'].value_counts()
                else:
                    author_counts = df['main_author'].value_counts()
                # Limit to top 15 authors if there are many
                if len(author_counts) > 15:
                    author_counts = author_counts.head(15)
                    plt.title('Top 15 Authors by Sample Count')
                else:
                    plt.title('Sample Distribution by Author')
                
                # Create the bar plot with author names
                sns.barplot(x=author_counts.values, y=author_counts.index)
                plt.xlabel('Number of Samples')
                plt.tight_layout()
                plt.show()
                
                # Create time series plot
                plt.figure(figsize=(12, 6))
                df['upload_date'] = pd.to_datetime(df['upload_date'])
                samples_by_date = df.groupby('upload_date').size()
                samples_by_date.plot(kind='line', marker='o')
                plt.title('Samples Over Time')
                plt.xlabel('Date')
                plt.ylabel('Number of Samples')
                plt.grid(True)
                plt.tight_layout()
                plt.show()
                
                # Create efficiency distribution plot
                if 'efficiency' in df.columns:
                    plt.figure(figsize=(8, 6))
                    sns.histplot(data=df, x='efficiency', bins=20)
                    plt.title('Distribution of Sample Efficiencies')
                    plt.xlabel('Efficiency (%)')
                    plt.ylabel('Count')
                    plt.tight_layout()
                    plt.show()
                    
                    # Add a boxplot of efficiencies by author
                    if 'author_name' in df.columns and len(df['author_name'].unique()) <= 10:
                        plt.figure(figsize=(12, 7))
                        sns.boxplot(data=df, x='author_name', y='efficiency')
                        plt.title('Sample Efficiency by Author')
                        plt.xlabel('Author')
                        plt.ylabel('Efficiency (%)')
                        plt.xticks(rotation=45, ha='right')
                        plt.tight_layout()
                        plt.show()
            else:
                display(widgets.HTML("<p>No data available. Please fetch data first.</p>"))
    
    # Refresh button click handler
    def on_refresh_click(b):
        with status_output:
            clear_output()
            print("Refreshing visualizations...")
            update_visualizations()
            print("✓ Visualizations updated")
    
    refresh_button.on_click(on_refresh_click)
    
    # Combine widgets into a form
    viz_ui = widgets.VBox([
        widgets.HTML("<h2>Sample Visualizations</h2>"),
        refresh_button,
        status_output,
        viz_container
    ])
    
    # Initialize visualizations
    update_visualizations()
    
    return viz_ui

## Main Dashboard

In [None]:
def create_sample_table(df, attributions):
    """Create an interactive table for sample attribution management"""
    table_container = widgets.VBox()
    rows = []
    
    # Add a persistent notification at the top of the table
    save_reminder = widgets.HTML(
        """<div style="background-color: #fff3cd; color: #856404; padding: 8px; 
        border-left: 5px solid #ffeeba; margin-bottom: 10px; border-radius: 3px;">
        <b>⚠️ Important:</b> Click individual row "Save" buttons to save changes temporarily. 
        Click the <b>"Save Attributions"</b> button at the top to make all changes permanent.
        </div>"""
    )
    rows.append(save_reminder)
    
    # Create header row
    header = widgets.HBox([
        widgets.Label('Upload ID', layout=widgets.Layout(width='250px')),
        widgets.Label('Upload Name', layout=widgets.Layout(width='200px')),
        widgets.Label('Current Author', layout=widgets.Layout(width='200px')),
        widgets.Label('Override Author', layout=widgets.Layout(width='200px')),
        widgets.Label('Actions', layout=widgets.Layout(width='100px'))
    ], layout=widgets.Layout(margin='5px 0', font_weight='bold'))
    rows.append(header)
    
    # Status message area for individual row actions
    row_status = widgets.HTML("", layout=widgets.Layout(margin='5px 0'))
    rows.append(row_status)
    
    # Create rows for each sample (limit to first 20 for performance)
    for _, row in df.head(20).iterrows():
        upload_id = row['upload_id']
        # Use author_name instead of main_author if available
        current_author = row.get('author_name', row['main_author'])
        
        # Create input for override author
        override_input = widgets.Text(
            value=attributions.get(upload_id, {}).get('main_author', ''),
            placeholder='Enter override author',
            layout=widgets.Layout(width='200px')
        )
        
        # Create save button
        save_button = widgets.Button(
            description='Save',
            button_style='primary',
            layout=widgets.Layout(width='80px')
        )
        
        # Define save button click handler
        def make_save_handler(btn_upload_id, btn_input, btn_save):
            def save_handler(b):
                # Get input value and strip all whitespace and non-visible characters
                raw_value = btn_input.value
                override_value = "".join(c for c in raw_value if c.isprintable() and not c.isspace())
                
                # If the cleaned value is different from the original, update the input field
                if raw_value != override_value and override_value:
                    btn_input.value = override_value
                
                # Provide immediate visual feedback
                btn_save.description = 'Saved!'
                btn_save.button_style = 'success'  # Change to green
                btn_save.icon = 'check'
                
                if override_value:
                    attributions[btn_upload_id] = {
                        'main_author': override_value,
                        'override_date': datetime.now().strftime('%Y-%m-%d')
                    }
                    btn_input.style.background = '#d4f7d4'  # Light green
                    row_status.value = f"<span style='color: green'>✓ Attribution saved for upload {btn_upload_id}</span>"
                else:
                    if btn_upload_id in attributions:
                        del attributions[btn_upload_id]
                    btn_input.style.background = ''
                    row_status.value = f"<span style='color: blue'>ℹ Attribution removed for upload {btn_upload_id}</span>"
                
                # Schedule the button to revert after a delay
                async def reset_after_delay():
                    await asyncio.sleep(1.5)  # Wait for 1.5 seconds
                    btn_save.description = 'Save'
                    btn_save.button_style = 'primary'  # Change back to original style
                    btn_save.icon = ''
                    # Clear status message after a delay
                    await asyncio.sleep(1.5)  # Additional delay before clearing the message
                    row_status.value = ""
                
                # Create and run the asyncio task
                from IPython.lib.backgroundjobs import BackgroundJobBase
                from IPython import get_ipython
                
                # Create an event loop if one doesn't exist
                try:
                    loop = asyncio.get_event_loop()
                except RuntimeError:
                    loop = asyncio.new_event_loop()
                    asyncio.set_event_loop(loop)
                
                # Run the async task in the current event loop
                if loop.is_running():
                    asyncio.ensure_future(reset_after_delay())  # For running event loop
                else:
                    loop.run_until_complete(reset_after_delay())  # For non-running loop
            return save_handler
        
        save_button.on_click(make_save_handler(upload_id, override_input, save_button))
        
        # Highlight overridden values
        if upload_id in attributions and attributions[upload_id].get('main_author'):
            override_input.style.background = '#d4f7d4'
        
        # Create row
        row_widget = widgets.HBox([
            widgets.Label(upload_id, layout=widgets.Layout(width='250px')),
            widgets.Label(row.get('upload_name', 'Unknown'), layout=widgets.Layout(width='200px')),
            widgets.Label(current_author, layout=widgets.Layout(width='200px')),
            override_input,
            save_button
        ], layout=widgets.Layout(margin='2px 0'))
        
        rows.append(row_widget)
    
    # Add note about pagination if needed
    if len(df) > 20:
        note = widgets.HTML(f"<em>Note: Showing first 20 of {len(df)} samples.</em>")
        rows.append(note)
    
    table_container.children = rows
    return table_container

def create_dashboard():
    """Create and display the main dashboard"""
    # Create authentication tab
    auth_tab, auth_state = create_auth_tab()
    
    # Create data tab using the modular component
    data_tab, data_state = initialize_data_tab(auth_state)
    
    # Create attribution tab
    attribution_tab = create_attribution_tab(data_state)
    
    # Create visualization tab
    viz_tab = create_visualization_tab(data_state)
    
    # Create tab widget
    tab = widgets.Tab([
        auth_tab,
        data_tab,
        attribution_tab,
        viz_tab
    ])
    
    tab.set_title(0, 'Authentication')
    tab.set_title(1, 'Data Retrieval')
    tab.set_title(2, 'Attribution Management')
    tab.set_title(3, 'Visualizations')
    
    # Create dashboard container with header
    dashboard = widgets.VBox([
        widgets.HTML("""
        <div style="background-color: #4CAF50; color: white; padding: 10px; text-align: center; border-radius: 5px;">
            <h1>NOMAD Samples Dashboard</h1>
            <p>Manage and analyze HySprint samples in NOMAD</p>
        </div>
        """),
        tab
    ])
    
    return dashboard

# Create and display the dashboard
dashboard = create_dashboard()
display(dashboard)

Loaded 62 attribution overrides


VBox(children=(HTML(value='\n        <div style="background-color: #4CAF50; color: white; padding: 10px; text-…