# NOMAD Samples Dashboard

This dashboard provides a modern interface for managing and analyzing HySprint samples in NOMAD. Key features:

1. **Authentication**: Secure connection to NOMAD Oasis instances
2. **Sample Management**: View and manage HySprint sample data
3. **Author Attribution**: Track and override sample attributions
4. **Analytics**: Visualize sample statistics and trends

## Setup and Dependencies

In [1]:
# Import required libraries
import ipywidgets as widgets
from ipywidgets import HBox, VBox, Button, Text, Password, Label, Dropdown
from IPython.display import display, clear_output, FileLink
import os
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from pathlib import Path
from datetime import datetime, timedelta
import asyncio
from IPython.lib.backgroundjobs import BackgroundJobManager
jobs = BackgroundJobManager()

# Configure plotting
%matplotlib inline
plt.style.use('seaborn-v0_8-whitegrid')

# Import NOMAD modules
%run 'nomad_auth.ipynb'
%run 'nomad_data_retrieval.ipynb'
%run 'nomad_attribution.ipynb'
from nomad_data import load_attributions, save_attributions

## Authentication Tab

In [2]:
def create_auth_tab():
    """Create the authentication tab using nomad_auth functionality"""
    auth_ui = widgets.VBox([
        widgets.HTML("<h3>NOMAD Authentication</h3>"),
        widgets.HTML("<p>Please authenticate with NOMAD to access data.</p>"),
        general_settings_box  # From nomad_auth.ipynb
    ])
    
    # Create wrapper for authentication state
    auth_state = {
        'is_authenticated': lambda: api_client is not None,
        'token': lambda: current_token,
        'user_info': lambda: current_user_info,
        'client': lambda: api_client,
        'oasis': lambda: oasis_dropdown.value if oasis_dropdown.value else None,
        'oasis_url': lambda: oasis_options.get(oasis_dropdown.value, None) if oasis_dropdown.value else None
    }
    
    return auth_ui, auth_state

## Data Retrieval Tab

In [3]:
def initialize_data_tab(auth_state):
    """Create the data retrieval tab using the modular nomad_data_retrieval functionality"""
    # Use the modular data retrieval component from nomad_data_retrieval.ipynb
    return create_data_tab(auth_state)

## Attribution Management Tab

In [4]:
def initialize_attribution_tab(data_state):
    """Create the attribution tab using the modular nomad_attribution functionality"""
    # Use the modular attribution component from nomad_attribution.ipynb
    return create_attribution_tab(data_state)

## Visualization Tab

In [5]:
def create_visualization_tab(data_state):
    """Create the visualization tab with interactive visualizations"""
    # Create containers
    viz_container = widgets.Output()
    controls_container = widgets.VBox()
    status_output = widgets.Output()

    # Date range selector - initially None to show all data
    start_date = widgets.DatePicker(
        description='Start Date:',
        value=None,
        style={'description_width': 'initial'},
        layout=widgets.Layout(width='300px')
    )
    
    end_date = widgets.DatePicker(
        description='End Date:',
        value=None,
        style={'description_width': 'initial'},
        layout=widgets.Layout(width='300px')
    )

    # Time series options
    time_grouping = widgets.RadioButtons(
        options=['Daily', 'Weekly', 'Monthly', 'Yearly'],
        value='Monthly',
        description='Group by:',
        style={'description_width': 'initial'}
    )

    plot_type = widgets.RadioButtons(
        options=['Stacked Bars', 'Grouped Bars'],
        value='Stacked Bars',
        description='Plot type:',
        style={'description_width': 'initial'}
    )

    # Show visualization button
    show_viz_button = widgets.Button(
        description='Show Visualizations',
        button_style='primary',
        icon='chart-bar',
        layout=widgets.Layout(width='200px')
    )

    # Create containers for each plot type
    author_plot_container = widgets.Output(
        layout=widgets.Layout(
            width='100%',
            min_height='700px'
        )
    )

    time_series_container = widgets.Output(
        layout=widgets.Layout(
            width='calc(100% - 290px)',
            min_height='700px'
        )
    )

    # Define all functions upfront
    def update_visualizations(*args):
        with author_plot_container:
            clear_output(wait=True)
        with time_series_container:
            clear_output(wait=True)
            
        if data_state.get('df') is None or data_state['df'].empty:
            with viz_container:
                clear_output(wait=True)
                display(widgets.HTML("<p>No data available. Please fetch data first.</p>"))
            return

        df = data_state['df'].copy()
        
        # Convert dates
        df['upload_date'] = pd.to_datetime(df['upload_date'])
        
        # Apply date filter only if both dates are set
        if start_date.value is not None and end_date.value is not None:
            mask = (df['upload_date'] >= pd.Timestamp(start_date.value)) & \
                   (df['upload_date'] <= pd.Timestamp(end_date.value))
            df = df[mask]

        # Use attribution overrides if available
        author_col = 'author_name' if 'author_name' in df.columns else 'main_author'
        if 'override_author' in df.columns:
            df['effective_author'] = df['override_author'].fillna(df[author_col])
        else:
            df['effective_author'] = df[author_col]

        # Author distribution plot
        with author_plot_container:
            author_counts = df['effective_author'].value_counts()
            
            fig1 = go.Figure(data=[
                go.Bar(
                    x=author_counts.index,
                    y=author_counts.values,
                    text=author_counts.values,
                    textposition='auto',
                )
            ])

            fig1.update_layout(
                title='Sample Distribution by Author',
                xaxis_title='Author',
                yaxis_title='Number of Samples',
                height=700,
                xaxis={'tickangle': 45},
                margin=dict(b=100)
            )
            
            fig1.show()

        # Time series plot
        with time_series_container:
            if time_grouping.value == 'Daily':
                freq = 'D'
            elif time_grouping.value == 'Weekly':
                freq = 'W'
            elif time_grouping.value == 'Monthly':
                freq = 'ME'
            else:  # Yearly
                freq = 'YE'
            
            # Group data by time period and author
            df_grouped = df.groupby([pd.Grouper(key='upload_date', freq=freq), 'effective_author']).size().unstack(fill_value=0)
            
            fig2 = go.Figure()
            
            if plot_type.value == 'Stacked Bars':
                # Create stacked bar plot
                for author in df_grouped.columns:
                    fig2.add_trace(go.Bar(
                        name=author,
                        x=df_grouped.index,
                        y=df_grouped[author],
                        text=df_grouped[author],
                        textposition='inside'
                    ))
                
                # Add total labels on top of stacked bars
                totals = df_grouped.sum(axis=1)
                fig2.add_trace(go.Scatter(
                    x=df_grouped.index,
                    y=totals,
                    mode='text',
                    text=totals,
                    textposition='top center',
                    showlegend=False,
                    textfont=dict(size=12)
                ))
                
                fig2.update_layout(barmode='stack')
                title = 'Samples Over Time (Stacked by Author)'
            
            else:  # Grouped Bars
                # Create grouped bar plot
                for author in df_grouped.columns:
                    fig2.add_trace(go.Bar(
                        name=author,
                        x=df_grouped.index,
                        y=df_grouped[author],
                        text=df_grouped[author],
                        textposition='auto'
                    ))
                fig2.update_layout(barmode='group')
                title = 'Samples Over Time (Grouped by Author)'

            fig2.update_layout(
                title=title,
                xaxis_title='Date',
                yaxis_title='Number of Samples',
                height=700,
                showlegend=True,
                legend_title_text='Author'
            )
            
            fig2.show()

    # Set up event handlers
    start_date.observe(update_visualizations, 'value')
    end_date.observe(update_visualizations, 'value')
    time_grouping.observe(update_visualizations, 'value')
    plot_type.observe(update_visualizations, 'value')
    show_viz_button.on_click(update_visualizations)

    # Time series controls box
    time_series_controls = widgets.VBox([
        widgets.HTML("<h4>Time Series Options:</h4>"),
        time_grouping,
        plot_type
    ], layout=widgets.Layout(
        margin='10px 20px',
        padding='15px',
        border='1px solid #ddd',
        border_radius='5px',
        width='250px',
        align_items='flex-start'
    ))

    # Organize global controls with more spacing
    global_controls = widgets.VBox([
        widgets.HTML("<h3>Global Controls</h3>"),
        show_viz_button,
        widgets.HBox([start_date, end_date], layout=widgets.Layout(margin='10px 0'))
    ], layout=widgets.Layout(
        margin='0 0 30px 0',
        padding='15px',
        border='1px solid #ddd',
        border_radius='5px'
    ))

    # Create a divider
    divider = widgets.HTML("<hr style='border: none; border-top: 1px solid #ddd; margin: 20px 0;'>")

    # Create layout for time series with controls and proper spacing
    time_series_layout = widgets.HBox([
        time_series_container,
        time_series_controls
    ], layout=widgets.Layout(
        margin='20px 0',
        width='100%',
        align_items='flex-start'
    ))

    # Combine widgets into form with improved spacing
    viz_ui = widgets.VBox([
        widgets.HTML("<h2>Sample Visualizations</h2>"),
        global_controls,
        author_plot_container,
        divider,
        time_series_layout
    ], layout=widgets.Layout(
        margin='20px',
        width='100%'
    ))

    # Initialize if data is already present
    if data_state.get('df') is not None and not data_state['df'].empty:
        update_visualizations()

    return viz_ui

## Cache Management Tab

In [6]:
def format_timestamp(ts_str):
    """Format timestamp string to readable format"""
    if not ts_str:
        return 'N/A'
    try:
        dt = datetime.fromisoformat(ts_str)
        return dt.strftime('%Y-%m-%d %H:%M:%S')
    except:
        return ts_str

def create_cache_tab():
    """Create the cache management tab"""
    from nomad_data import get_cache_stats, clear_cache, CACHE_CONFIG

    # Create widgets for cache management
    stats_output = widgets.Output()
    status_output = widgets.Output()

    # Create buttons for each cache type
    clear_buttons = {
        cache_type: widgets.Button(
            description=f'Clear {cache_type}',
            button_style='danger',
            layout={'width': '150px'}
        )
        for cache_type in CACHE_CONFIG.keys()
    }

    # Clear all button
    clear_all_button = widgets.Button(
        description='Clear All Cache',
        button_style='danger',
        icon='trash',
        layout={'width': '150px'}
    )

    # Refresh button
    refresh_button = widgets.Button(
        description='Refresh Stats',
        button_style='info',
        icon='sync',
        layout={'width': '150px'}
    )

    def update_stats():
        """Update the cache statistics display"""
        with stats_output:
            clear_output()
            stats = get_cache_stats()
            
            # Create a formatted table of stats
            print("Cache Statistics:")
            print("-" * 80)
            print(f"{'Cache Type':<15} {'Items':<8} {'Size (KB)':<12} {'Oldest':<25} {'Newest':<25}")
            print("-" * 80)
            
            for cache_type, cache_stats in stats.items():
                print(f"{cache_type:<15} {cache_stats['count']:<8} {cache_stats['size_kb']:<12.2f} ",
                      f"{format_timestamp(cache_stats['oldest']):<25} {format_timestamp(cache_stats['newest']):<25}")

    def on_clear_click(cache_type):
        def handler(b):
            with status_output:
                clear_output()
                print(f"Clearing {cache_type} cache...")
                clear_cache(cache_type)
                print(f"✓ {cache_type} cache cleared")
                update_stats()
        return handler

    def on_clear_all_click(b):
        with status_output:
            clear_output()
            print("Clearing all cache...")
            clear_cache()
            print("✓ All cache cleared")
            update_stats()

    def on_refresh_click(b):
        with status_output:
            clear_output()
            print("Refreshing cache statistics...")
            update_stats()
            print("✓ Statistics updated")

    # Set up button handlers
    for cache_type, button in clear_buttons.items():
        button.on_click(on_clear_click(cache_type))
    clear_all_button.on_click(on_clear_all_click)
    refresh_button.on_click(on_refresh_click)

    # Create the HTML for cache expiration times
    cache_expiry_html = "<ul>"
    for k, v in CACHE_CONFIG.items():
        cache_expiry_html += f"<li><b>{k}:</b> {v.get('expire_hours')} hours</li>"
    cache_expiry_html += "</ul>"

    # Create the cache management UI
    cache_ui = widgets.VBox([
        widgets.HTML("<h2>Cache Management</h2>"),
        widgets.HTML(f"<p>Cache expiration times:{cache_expiry_html}</p>"),
        widgets.HBox([refresh_button, clear_all_button]),
        widgets.HBox(list(clear_buttons.values())),
        status_output,
        stats_output
    ])

    # Initialize stats display
    update_stats()

    return cache_ui

## Main Dashboard

In [None]:
def create_dashboard():
    """Create and display the main dashboard with dynamic tab widths."""

    # Create authentication tab
    auth_tab, auth_state = create_auth_tab()

    # Create data tab using the modular component
    data_tab, data_state = initialize_data_tab(auth_state)

    # Create attribution tab
    attribution_tab = initialize_attribution_tab(data_state)

    # Create visualization tab
    viz_tab = create_visualization_tab(data_state)

    # Create cache management tab
    cache_tab = create_cache_tab()

    # Create tab widget
    tab_widget = widgets.Tab([auth_tab, data_tab, attribution_tab, viz_tab, cache_tab])

    # Set tab titles
    tab_widget.set_title(0, "Authentication")
    tab_widget.set_title(1, "Data Retrieval")
    tab_widget.set_title(2, "Attribution Management")
    tab_widget.set_title(3, "Visualizations")
    tab_widget.set_title(4, "Cache Management")

    # --- REVISED CSS for Dynamic Tab Width ---
    # Inject CSS to style the tab headers.
    # NOTE: You may need to INSPECT the element in your browser
    # and ADJUST the class selectors (.p-TabBar-tab, .jp-TabBar-tab, etc.)
    # to match *your* specific Jupyter environment.
    tab_styling = widgets.HTML("""
    <style>
    /* Try targeting common classes directly */
    .lm-TabBar-tab, .p-TabBar-tab, .jp-TabBar-tab {
        min-width: 100px !important;   /* Keep a minimum width */
        width: auto !important;        /* Allow expansion based on content */
        max-width: none !important;    /* Ensure no max-width is limiting it */
        flex: 0 0 auto !important;     /* Equivalent to flex-grow:0, flex-shrink:0, flex-basis:auto */
        padding-left: 15px !important; /* Add horizontal padding */
        padding-right: 15px !important;/* Add horizontal padding */
        white-space: nowrap !important; /* Prevent text wrapping */
        overflow: visible !important;  /* Ensure content isn't clipped */
        border: 1px solid #ccc !important; /* Add border for visibility if needed */
        margin-right: 2px !important; /* Add spacing between tabs */
    }

    /* Target the label inside specifically */
    .lm-TabBar-tab-label, .p-TabBar-tab-label, .jp-TabBar-tab-label {
        white-space: nowrap !important; /* Prevent text wrapping */
        overflow: visible !important;   /* Ensure label text isn't clipped */
        display: inline-block !important; /* Helps with sizing */
    }
    </style>
    """)

    # Create dashboard container with header, styling, and tabs
    dashboard = widgets.VBox(
        [
            widgets.HTML("""
        <div style="background-color: #4CAF50; color: white; padding: 10px; text-align: center; border-radius: 5px;">
            <h1>NOMAD Samples Dashboard</h1>
            <p>Manage and analyze HySprint samples in NOMAD</p>
        </div>
        """),
            tab_styling,  # Add the CSS styling widget here
            tab_widget,  # Add the tab widget
        ]
    )

    return dashboard


# --- Run the Dashboard ---
# Create and display the dashboard
dashboard = create_dashboard()
display(dashboard)


Loaded 23 attribution overrides


VBox(children=(HTML(value='\n        <div style="background-color: #4CAF50; color: white; padding: 10px; text-…