# NOMAD Samples Dashboard

This dashboard provides a modern interface for managing and analyzing HySprint samples in NOMAD. Key features:

1. **Authentication**: Secure connection to NOMAD Oasis instances
2. **Sample Management**: View and manage HySprint sample data
3. **Author Attribution**: Track and override sample attributions
4. **Analytics**: Visualize sample statistics and trends

## Setup and Dependencies

In [None]:
# Import required libraries
import ipywidgets as widgets
from ipywidgets import HBox, VBox, Button, Text, Password, Label, Dropdown
from IPython.display import display, clear_output, FileLink
import os
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from pathlib import Path
from datetime import datetime, timedelta
import asyncio
from IPython.lib.backgroundjobs import BackgroundJobManager
jobs = BackgroundJobManager()

# Configure plotting
%matplotlib inline
plt.style.use('seaborn-v0_8-whitegrid')

# Import NOMAD modules
%run 'nomad_auth.ipynb'
%run 'nomad_data_retrieval.ipynb'
%run 'nomad_attribution.ipynb'
from nomad_data import load_attributions, save_attributions

## Authentication Tab

In [None]:
def create_auth_tab():
    """Create the authentication tab using nomad_auth functionality"""
    auth_ui = widgets.VBox([
        widgets.HTML("<h3>NOMAD Authentication</h3>"),
        widgets.HTML("<p>Please authenticate with NOMAD to access data.</p>"),
        general_settings_box  # From nomad_auth.ipynb
    ])
    
    # Create wrapper for authentication state
    auth_state = {
        'is_authenticated': lambda: api_client is not None,
        'token': lambda: current_token,
        'user_info': lambda: current_user_info,
        'client': lambda: api_client,
        'oasis': lambda: oasis_dropdown.value if oasis_dropdown.value else None,
        'oasis_url': lambda: oasis_options.get(oasis_dropdown.value, None) if oasis_dropdown.value else None
    }
    
    return auth_ui, auth_state

## Data Retrieval Tab

In [None]:
def initialize_data_tab(auth_state):
    """Create the data retrieval tab using the modular nomad_data_retrieval functionality"""
    # Use the modular data retrieval component from nomad_data_retrieval.ipynb
    return create_data_tab(auth_state)

## Visualization Tab

In [None]:
def create_visualization_tab(data_state):
    """Create the visualization tab"""
    # Visualization container
    viz_container = widgets.Output()
    
    # Refresh button
    refresh_button = widgets.Button(
        description='Refresh Visualizations',
        button_style='info',
        icon='sync'
    )
    
    # Status output
    status_output = widgets.Output()
    
    # Update visualizations function
    def update_visualizations():
        with viz_container:
            clear_output()
            if data_state['df'] is not None and not data_state['df'].empty:
                df = data_state['df']
                
                # Create author distribution plot using author names instead of IDs
                plt.figure(figsize=(10, 6))
                # Use author_name column if available, otherwise fall back to main_author
                if 'author_name' in df.columns:
                    author_counts = df['author_name'].value_counts()
                else:
                    author_counts = df['main_author'].value_counts()
                # Limit to top 15 authors if there are many
                if len(author_counts) > 15:
                    author_counts = author_counts.head(15)
                    plt.title('Top 15 Authors by Sample Count')
                else:
                    plt.title('Sample Distribution by Author')
                
                # Create the bar plot with author names
                sns.barplot(x=author_counts.values, y=author_counts.index)
                plt.xlabel('Number of Samples')
                plt.tight_layout()
                plt.show()
                
                # Create time series plot
                plt.figure(figsize=(12, 6))
                df['upload_date'] = pd.to_datetime(df['upload_date'])
                samples_by_date = df.groupby('upload_date').size()
                samples_by_date.plot(kind='line', marker='o')
                plt.title('Samples Over Time')
                plt.xlabel('Date')
                plt.ylabel('Number of Samples')
                plt.grid(True)
                plt.tight_layout()
                plt.show()
                
                # Create efficiency distribution plot
                if 'efficiency' in df.columns:
                    plt.figure(figsize=(8, 6))
                    sns.histplot(data=df, x='efficiency', bins=20)
                    plt.title('Distribution of Sample Efficiencies')
                    plt.xlabel('Efficiency (%)')
                    plt.ylabel('Count')
                    plt.tight_layout()
                    plt.show()
                    
                    # Add a boxplot of efficiencies by author
                    if 'author_name' in df.columns and len(df['author_name'].unique()) <= 10:
                        plt.figure(figsize=(12, 7))
                        sns.boxplot(data=df, x='author_name', y='efficiency')
                        plt.title('Sample Efficiency by Author')
                        plt.xlabel('Author')
                        plt.ylabel('Efficiency (%)')
                        plt.xticks(rotation=45, ha='right')
                        plt.tight_layout()
                        plt.show()
            else:
                display(widgets.HTML("<p>No data available. Please fetch data first.</p>"))
    
    # Refresh button click handler
    def on_refresh_click(b):
        with status_output:
            clear_output()
            print("Refreshing visualizations...")
            update_visualizations()
            print("✓ Visualizations updated")
    
    refresh_button.on_click(on_refresh_click)
    
    # Combine widgets into a form
    viz_ui = widgets.VBox([
        widgets.HTML("<h2>Sample Visualizations</h2>"),
        refresh_button,
        status_output,
        viz_container
    ])
    
    # Initialize visualizations
    update_visualizations()
    
    return viz_ui

## Main Dashboard

In [None]:
def create_dashboard():
    """Create and display the main dashboard"""
    # Create authentication tab
    auth_tab, auth_state = create_auth_tab()
    
    # Create data tab using the modular component
    data_tab, data_state = initialize_data_tab(auth_state)
    
    # Create attribution tab using the modular component
    attribution_tab = create_attribution_tab(data_state)
    
    # Create visualization tab
    viz_tab = create_visualization_tab(data_state)
    
    # Create tab widget
    tab = widgets.Tab([
        auth_tab,
        data_tab,
        attribution_tab,
        viz_tab
    ])
    
    tab.set_title(0, 'Authentication')
    tab.set_title(1, 'Data Retrieval')
    tab.set_title(2, 'Attribution Management')
    tab.set_title(3, 'Visualizations')
    
    # Create dashboard container with header
    dashboard = widgets.VBox([
        widgets.HTML("""
        <div style="background-color: #4CAF50; color: white; padding: 10px; text-align: center; border-radius: 5px;">
            <h1>NOMAD Samples Dashboard</h1>
            <p>Manage and analyze HySprint samples in NOMAD</p>
        </div>
        """),
        tab
    ])
    
    return dashboard

# Create and display the dashboard
dashboard = create_dashboard()
display(dashboard)