# NOMAD Samples Dashboard

This dashboard provides a modern interface for managing and analyzing HySprint samples in NOMAD. Key features:

1. **Authentication**: Secure connection to NOMAD Oasis instances
2. **Sample Management**: View and manage HySprint sample data
3. **Author Attribution**: Track and override sample attributions
4. **Analytics**: Visualize sample statistics and trends

## Setup and Dependencies

In [None]:
# Import required libraries
import ipywidgets as widgets
from ipywidgets import HBox, VBox, Button, Text, Password, Label, Dropdown
from IPython.display import display, clear_output, FileLink
import os
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from pathlib import Path
from datetime import datetime, timedelta

# Configure plotting
%matplotlib inline
plt.style.use('seaborn-v0_8-whitegrid')

# Import NOMAD authentication and data modules
%run 'nomad_auth.ipynb'
from nomad_data import get_hysprint_data, load_attributions, save_attributions

## Authentication Tab

In [None]:
def create_auth_tab():
    """Create the authentication tab using nomad_auth functionality"""
    auth_ui = widgets.VBox([
        widgets.HTML("<h3>NOMAD Authentication</h3>"),
        widgets.HTML("<p>Please authenticate with NOMAD to access data.</p>"),
        general_settings_box  # From nomad_auth.ipynb
    ])
    
    # Create wrapper for authentication state
    auth_state = {
        'is_authenticated': lambda: api_client is not None,
        'token': lambda: current_token,
        'user_info': lambda: current_user_info,
        'client': lambda: api_client,
        'oasis': lambda: oasis_dropdown.value if oasis_dropdown.value else None,
        'oasis_url': lambda: oasis_options.get(oasis_dropdown.value, None) if oasis_dropdown.value else None
    }
    
    return auth_ui, auth_state

## Utility Functions

In [None]:
def fetch_user_details(client, user_id):
    """Helper to get user details (Name, Last Name) using the API client.
    
    Args:
        client: NOMAD API client instance
        user_id: User ID to fetch details for
        
    Returns:
        str: User's full name or fallback identification
    """
    try:
        # Use the API client to get user details directly from the users endpoint
        user_data = client.make_request('get', f'users/{user_id}')

        first_name = user_data.get('first_name', '')
        last_name = user_data.get('last_name', '')
        name = f"{first_name} {last_name}".strip()

        if not name: # Fallback if name fields are empty
            name = user_data.get('username', user_data.get('email', f"ID: {user_id}"))
        return name
    except Exception as e:
        # Silent fail with fallback to ID
        return f"{user_id}"

def get_author_names(client, df):
    """Transform main_author IDs to names in the dataframe.
    
    Args:
        client: NOMAD API client instance
        df: DataFrame containing main_author column with user IDs
        
    Returns:
        pd.DataFrame: DataFrame with additional author_name column
    """
    if client is None or df is None or df.empty or 'main_author' not in df.columns:
        return df
    
    # Create a copy to avoid modifying the original dataframe
    result_df = df.copy()
    
    # Get unique author IDs to minimize API calls
    unique_authors = df['main_author'].unique()
    author_map = {}
    
    # Create a mapping from author ID to name
    for author_id in unique_authors:
        if pd.isna(author_id) or author_id is None or author_id == '':
            author_map[author_id] = 'Unknown'
        else:
            author_map[author_id] = fetch_user_details(client, author_id)
    
    # Add the author name column based on the mapping
    result_df['author_name'] = result_df['main_author'].map(author_map)
    
    return result_df

## Data Retrieval Tab

In [None]:
def create_data_tab(auth_state):
    """Create the data retrieval tab"""
    # Fetch button
    fetch_button = widgets.Button(
        description='Fetch HySprint Data',
        disabled=False,
        button_style='info',
        tooltip='Click to fetch all HySprint sample data from NOMAD',
        icon='database'
    )
    
    # Time period selector for visualizations
    time_period = widgets.RadioButtons(
        options=['Monthly', 'Yearly'],
        value='Monthly',
        description='Time Period:',
        disabled=False,
        layout=widgets.Layout(visibility='hidden')
    )
    
    # Status output
    status_output = widgets.Output()
    
    # Visualization output
    viz_output = widgets.Output()
    
    # Data store
    data_state = {
        'df': None,
        'attributions': load_attributions()
    }
    
    # Function to update the time distribution plot
    def update_time_plot(period='Monthly'):
        with viz_output:
            clear_output()
            if data_state['df'] is None or data_state['df'].empty:
                print("No data available to plot.")
                return
                
            # Make sure upload_date is datetime
            df = data_state['df'].copy()
            df['upload_date'] = pd.to_datetime(df['upload_date'])
            
            # Group by month or year based on selection
            if period == 'Monthly':
                df['period'] = df['upload_date'].dt.to_period('M').astype(str)
                title = 'Samples Uploaded per Month'
                x_title = 'Month'
            else:  # Yearly
                df['period'] = df['upload_date'].dt.to_period('Y').astype(str)
                title = 'Samples Uploaded per Year'
                x_title = 'Year'
                
            # Count samples per period
            samples_by_period = df.groupby('period').size().reset_index(name='count')
            
            # Create the bar plot with plotly
            fig = px.bar(samples_by_period, x='period', y='count', 
                        title=title,
                        labels={'period': x_title, 'count': 'Number of Samples'},
                        color_discrete_sequence=['#4CAF50'])
            
            # Improve layout
            fig.update_layout(
                xaxis_tickangle=-45,
                plot_bgcolor='white',
                height=400,
                width=800,
                margin=dict(t=50, b=100)
            )
            
            fig.show()
    
    # Handler for time period change
    def on_time_period_change(change):
        if change['type'] == 'change' and change['name'] == 'value':
            update_time_plot(change['new'])
    
    time_period.observe(on_time_period_change, names='value')
    
    # Fetch button click handler
    def on_fetch_button_click(b):
        with status_output:
            clear_output()
            
            if not auth_state['is_authenticated']():
                print("❌ Please authenticate first")
                return
            
            print("Fetching all available HySprint sample data records from NOMAD...")
            try:
                # Request data with tqdm progress bars (implemented in nomad_data.py)
                df = get_hysprint_data(auth_state['client'](), max_entries=None)
                
                if df is None or df.empty:
                    print("❌ No data retrieved")
                else:
                    # Add author names to the dataframe
                    df_with_names = get_author_names(auth_state['client'](), df)
                    data_state['df'] = df_with_names
                    print(f"✓ Retrieved {len(df)} HySprint samples")
                    
                    # Make time period selector visible
                    time_period.layout.visibility = 'visible'
                    
                    # Update the time distribution plot
                    update_time_plot(time_period.value)
                    

            except Exception as e:
                print(f"❌ Error retrieving data: {str(e)}")
                import traceback
                traceback.print_exc()
    
    fetch_button.on_click(on_fetch_button_click)
    
    # Combine widgets into a form
    data_ui = widgets.VBox([
        widgets.HTML("<h2>HySprint Data Retrieval</h2>"),
        fetch_button,
        status_output,
        widgets.HBox([time_period]),
        viz_output
    ])
    
    return data_ui, data_state

## Attribution Management Tab

In [None]:
def create_attribution_tab(data_state):
    """Create the attribution management tab"""
    # Save button
    save_button = widgets.Button(
        description='Save Attributions',
        disabled=False,
        button_style='success',
        tooltip='Save attribution changes to local file',
        icon='save'
    )
    
    # Reset button
    reset_button = widgets.Button(
        description='Reset Changes',
        disabled=False,
        button_style='danger',
        tooltip='Reset all attribution changes',
        icon='refresh'
    )
    
    # Status output
    status_output = widgets.Output()
    
    # Sample table container
    table_container = widgets.Output()
    
    # Update table function
    def update_table():
        with table_container:
            clear_output()
            if data_state['df'] is not None and not data_state['df'].empty:
                table_widget = create_sample_table(data_state['df'], data_state['attributions'])
                display(table_widget)
            else:
                display(widgets.HTML("<p>No data available. Please fetch data first.</p>"))
    
    # Save button click handler
    def on_save_button_click(b):
        with status_output:
            clear_output()
            if data_state['attributions']:
                success = save_attributions(data_state['attributions'])
                if success:
                    print(f"✓ Saved {len(data_state['attributions'])} attribution overrides")
                else:
                    print("❌ Failed to save attributions")
            else:
                print("ℹ️ No attributions to save")
    
    # Reset button click handler
    def on_reset_button_click(b):
        with status_output:
            clear_output()
            data_state['attributions'] = load_attributions()
            print("✓ Attributions reset to saved state")
            update_table()
    
    # Connect event handlers
    save_button.on_click(on_save_button_click)
    reset_button.on_click(on_reset_button_click)
    
    # Refresh button
    refresh_button = widgets.Button(
        description='Refresh Table',
        button_style='info',
        icon='sync'
    )
    
    def on_refresh_click(b):
        update_table()
        
    refresh_button.on_click(on_refresh_click)
    
    # Combine widgets into a form
    attribution_ui = widgets.VBox([
        widgets.HTML("<h2>Sample Attribution Management</h2>"),
        widgets.HBox([refresh_button, save_button, reset_button]),
        status_output,
        table_container
    ])
    
    # Initialize table
    update_table()
    
    return attribution_ui

## Visualization Tab

In [None]:
def create_visualization_tab(data_state):
    """Create the visualization tab"""
    # Visualization container
    viz_container = widgets.Output()
    
    # Refresh button
    refresh_button = widgets.Button(
        description='Refresh Visualizations',
        button_style='info',
        icon='sync'
    )
    
    # Status output
    status_output = widgets.Output()
    
    # Update visualizations function
    def update_visualizations():
        with viz_container:
            clear_output()
            if data_state['df'] is not None and not data_state['df'].empty:
                df = data_state['df']
                
                # Create author distribution plot using author names instead of IDs
                plt.figure(figsize=(10, 6))
                # Use author_name column if available, otherwise fall back to main_author
                if 'author_name' in df.columns:
                    author_counts = df['author_name'].value_counts()
                else:
                    author_counts = df['main_author'].value_counts()
                # Limit to top 15 authors if there are many
                if len(author_counts) > 15:
                    author_counts = author_counts.head(15)
                    plt.title('Top 15 Authors by Sample Count')
                else:
                    plt.title('Sample Distribution by Author')
                
                # Create the bar plot with author names
                sns.barplot(x=author_counts.values, y=author_counts.index)
                plt.xlabel('Number of Samples')
                plt.tight_layout()
                plt.show()
                
                # Create time series plot
                plt.figure(figsize=(12, 6))
                df['upload_date'] = pd.to_datetime(df['upload_date'])
                samples_by_date = df.groupby('upload_date').size()
                samples_by_date.plot(kind='line', marker='o')
                plt.title('Samples Over Time')
                plt.xlabel('Date')
                plt.ylabel('Number of Samples')
                plt.grid(True)
                plt.tight_layout()
                plt.show()
                
                # Create efficiency distribution plot
                if 'efficiency' in df.columns:
                    plt.figure(figsize=(8, 6))
                    sns.histplot(data=df, x='efficiency', bins=20)
                    plt.title('Distribution of Sample Efficiencies')
                    plt.xlabel('Efficiency (%)')
                    plt.ylabel('Count')
                    plt.tight_layout()
                    plt.show()
                    
                    # Add a boxplot of efficiencies by author
                    if 'author_name' in df.columns and len(df['author_name'].unique()) <= 10:
                        plt.figure(figsize=(12, 7))
                        sns.boxplot(data=df, x='author_name', y='efficiency')
                        plt.title('Sample Efficiency by Author')
                        plt.xlabel('Author')
                        plt.ylabel('Efficiency (%)')
                        plt.xticks(rotation=45, ha='right')
                        plt.tight_layout()
                        plt.show()
            else:
                display(widgets.HTML("<p>No data available. Please fetch data first.</p>"))
    
    # Refresh button click handler
    def on_refresh_click(b):
        with status_output:
            clear_output()
            print("Refreshing visualizations...")
            update_visualizations()
            print("✓ Visualizations updated")
    
    refresh_button.on_click(on_refresh_click)
    
    # Combine widgets into a form
    viz_ui = widgets.VBox([
        widgets.HTML("<h2>Sample Visualizations</h2>"),
        refresh_button,
        status_output,
        viz_container
    ])
    
    # Initialize visualizations
    update_visualizations()
    
    return viz_ui

## Main Dashboard

In [None]:
def create_sample_table(df, attributions):
    """Create an interactive table for sample attribution management"""
    table_container = widgets.VBox()
    rows = []
    
    # Create header row
    header = widgets.HBox([
        widgets.Label('Upload ID', layout=widgets.Layout(width='150px')),
        widgets.Label('Sample Name', layout=widgets.Layout(width='150px')),
        widgets.Label('Current Author', layout=widgets.Layout(width='200px')),
        widgets.Label('Override Author', layout=widgets.Layout(width='200px')),
        widgets.Label('Actions', layout=widgets.Layout(width='100px'))
    ], layout=widgets.Layout(margin='5px 0', font_weight='bold'))
    rows.append(header)
    
    # Create rows for each sample (limit to first 20 for performance)
    for _, row in df.head(20).iterrows():
        upload_id = row['upload_id']
        # Use author_name instead of main_author if available
        current_author = row.get('author_name', row['main_author'])
        
        # Create input for override author
        override_input = widgets.Text(
            value=attributions.get(upload_id, {}).get('main_author', ''),
            placeholder='Enter override author',
            layout=widgets.Layout(width='200px')
        )
        
        # Create save button
        save_button = widgets.Button(
            description='Save',
            button_style='primary',
            layout=widgets.Layout(width='80px')
        )
        
        # Define save button click handler
        def make_save_handler(btn_upload_id, btn_input):
            def save_handler(b):
                override_value = btn_input.value.strip()
                if override_value:
                    attributions[btn_upload_id] = {
                        'main_author': override_value,
                        'override_date': datetime.now().strftime('%Y-%m-%d')
                    }
                    btn_input.style.background = '#d4f7d4'  # Light green
                else:
                    if btn_upload_id in attributions:
                        del attributions[btn_upload_id]
                    btn_input.style.background = ''
            return save_handler
        
        save_button.on_click(make_save_handler(upload_id, override_input))
        
        # Highlight overridden values
        if upload_id in attributions and attributions[upload_id].get('main_author'):
            override_input.style.background = '#d4f7d4'
        
        # Create row
        row_widget = widgets.HBox([
            widgets.Label(upload_id, layout=widgets.Layout(width='150px')),
            widgets.Label(row['sample_name'], layout=widgets.Layout(width='150px')),
            widgets.Label(current_author, layout=widgets.Layout(width='200px')),
            override_input,
            save_button
        ], layout=widgets.Layout(margin='2px 0'))
        
        rows.append(row_widget)
    
    # Add note about pagination if needed
    if len(df) > 20:
        note = widgets.HTML(f"<em>Note: Showing first 20 of {len(df)} samples.</em>")
        rows.append(note)
    
    table_container.children = rows
    return table_container

def create_dashboard():
    """Create and display the main dashboard"""
    # Create authentication tab
    auth_tab, auth_state = create_auth_tab()
    
    # Create data tab
    data_tab, data_state = create_data_tab(auth_state)
    
    # Create attribution tab
    attribution_tab = create_attribution_tab(data_state)
    
    # Create visualization tab
    viz_tab = create_visualization_tab(data_state)
    
    # Create tab widget
    tab = widgets.Tab([
        auth_tab,
        data_tab,
        attribution_tab,
        viz_tab
    ])
    
    tab.set_title(0, 'Authentication')
    tab.set_title(1, 'Data Retrieval')
    tab.set_title(2, 'Attribution Management')
    tab.set_title(3, 'Visualizations')
    
    # Create dashboard container with header
    dashboard = widgets.VBox([
        widgets.HTML("""
        <div style="background-color: #4CAF50; color: white; padding: 10px; text-align: center; border-radius: 5px;">
            <h1>NOMAD Samples Dashboard</h1>
            <p>Manage and analyze HySprint samples in NOMAD</p>
        </div>
        """),
        tab
    ])
    
    return dashboard

# Create and display the dashboard
dashboard = create_dashboard()
display(dashboard)