# Ink Jet Printing Analysis

## How to use this notebook:
1. Select batches to analyze (only batches of type "hysprint_batch" are considered)
2. The data will be loaded into a pandas DataFrame
3. Use the plotting tools to visualize your data:
   - Create scatter plots for comparing two parameters
   - Use box plots to analyze parameter distributions
4. Access advanced features for data table viewing and statistics

# JV Parameter Visualization Guide

The JV parameters (efficiency, open_circuit_voltage, fill_factor, short_circuit_current_density, series_resistance, shunt_resistance) are stored as lists of values for each sample, representing multiple pixels or measurements.

## Visualization Options

- **Scatter Plot**: Shows the relationship between two parameters
- **Box Plot**: Shows the distribution of values grouped by a categorical variable with all individual data points shown

## JV Data Display Options

- **All Points**: Shows all individual measurement points for complete data visualization
- **Mean**: Shows only the mean value for each sample
- **Max**: Shows only the maximum value for each sample
- **Min**: Shows only the minimum value for each sample
- **Median**: Shows only the median value for each sample

## Data Display Features

- Points from the same sample are colored consistently for easy identification
- Hover over points to see detailed information about each measurement

In [1]:
%matplotlib ipympl
%load_ext autoreload
%autoreload 2
import os
import sys
import ipywidgets as widgets
import plotly.express as px
import plotly.graph_objects as go
from IPython.display import display, Markdown, HTML
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import io
import base64

sys.path.append(os.path.dirname(os.getcwd()))
from api_calls import get_ids_in_batch, get_sample_description, get_all_eqe as get_all_ijp
import batch_selection
import access_token

url_base ="https://nomad-hzb-se.de"
url = f"{url_base}/nomad-oasis/api/v1"
token = access_token.get_token(url)
access_token.log_notebook_usage()

In [None]:
def get_ijp_data(try_sample_ids, variation):
    print(f"Fetching data for {len(try_sample_ids)} samples")
    
    # Make API call, result has everything in json format
    all_ijp = get_all_ijp(url, token, try_sample_ids, eqe_type="HySprint_Inkjet_Printing")
    # Make API call, result has everything in json format
    all_jv = get_all_ijp(url, token, try_sample_ids, eqe_type="HySprint_JVmeasurement")
    
    # Check if there's any IJP data
    existing_sample_ids = list(all_ijp.keys())
    if len(existing_sample_ids) == 0:
        return None  # Return None value to indicate no data
    
    # List to hold all dataframes that will be concatenated
    sample_data_list = []
    
    # Process each sample's data
    for sample_id, sample_entries in all_ijp.items():
        print(sample_id)
        if len(sample_entries) > 1:
            assert "Multiple entries found for sample_id: {}".format(sample_id)
        for entry in sample_entries:
            # Extract the data part from the entry (index 0 is data, index 1 is metadata)
            ijp_data = entry[0]
            
            # Create a dictionary to hold flattened data
            row_data = {
                # Basic sample information
                'sample_id': sample_id,
                'variation': variation.get(sample_id, ''),
                'name': ijp_data.get('name', ''),
                'datetime': ijp_data.get('datetime', ''),
                'description': ijp_data.get('description', ''),
                'location': ijp_data.get('location', ''),
                
                # Annealing information
                'annealing_temperature': ijp_data.get('annealing', {}).get('temperature', None),
                'annealing_time': ijp_data.get('annealing', {}).get('time', None),
                'annealing_atmosphere': ijp_data.get('annealing', {}).get('atmosphere', ''),
                
                # Atmosphere information
                'relative_humidity': ijp_data.get('atmosphere', {}).get('relative_humidity', None),
            }
            
            # Extract printing properties
            properties = ijp_data.get('properties', {})
            row_data.update({
                'cartridge_pressure': properties.get('cartridge_pressure', None),
                'drop_density': properties.get('drop_density', None),
                'printed_area': properties.get('printed_area', None),
                'substrate_temperature': properties.get('substrate_temperature', None),
            })
            
            # Extract print head properties
            print_head = properties.get('print_head_properties', {})
            row_data.update({
                'print_head_name': print_head.get('print_head_name', ''),
                'print_head_temperature': print_head.get('print_head_temperature', None),
                'num_active_nozzles': print_head.get('number_of_active_print_nozzles', None),
                'nozzle_drop_frequency': print_head.get('print_nozzle_drop_frequency', None),
                'nozzle_drop_volume': print_head.get('print_nozzle_drop_volume', None),
            })
            
            # Extract quenching information
            quenching = ijp_data.get('quenching', {})
            if quenching:
                # Extract vacuum properties
                vacuum_props = quenching.get('vacuum_properties', {})
                if vacuum_props:
                    row_data.update({
                        'vacuum_pressure': vacuum_props.get('pressure', None),
                        'vacuum_start_time': vacuum_props.get('start_time', None),
                        'vacuum_duration': vacuum_props.get('duration', None),
                        'vacuum_temperature': vacuum_props.get('temperature', None),
                    })
                
                # Extract gas quenching properties
                gas_props = quenching.get('gas_quenching_properties', {})
                if gas_props:
                    row_data.update({
                        'quenching_gas': gas_props.get('gas', ''),
                        'quenching_duration': gas_props.get('duration', None),
                        'quenching_pressure': gas_props.get('pressure', None),
                    })
                    
                # Extract any additional quenching fields at the top level
                row_data.update({
                    'quenching_comment': quenching.get('comment', ''),
                    'quenching_type': quenching.get('m_def', '').split('.')[-1] if 'm_def' in quenching else '',
                })
            
            # Extract layer information
            if 'layer' in ijp_data and len(ijp_data['layer']) > 0:
                layer = ijp_data['layer'][0]  # Take first layer as example
                if "absorber" not in layer.get('layer_type', '').lower():
                    continue  # Skip if layer material is not an absorber
                # Update row_data with layer information
                row_data.update({
                    'layer_material': layer.get('layer_material', ''),
                    'layer_material_name': layer.get('layer_material_name', ''),
                    'layer_type': layer.get('layer_type', '')
                })
            
            # Extract solution information
            if 'solution' in ijp_data and len(ijp_data['solution']) > 0:
                solution = ijp_data['solution'][0]  # Take first solution
                solution_details = solution.get('solution_details', {})
                
                # Extract solvent information
                solvents = solution_details.get('solvent', [])
                for i, solvent in enumerate(solvents): 
                    
                    solvent_name = solvent["chemical_2"]["name"] if "chemical_2" in solvent else f'solvent{i+1}'
                    # Include name directly in column name instead of separate column
                    row_data.update({
                        f'solvent_amount_{solvent_name}': solvent.get('amount_relative', None),
                        f'solvent_volume_{solvent_name}': solvent.get('chemical_volume', None)
                    })
                
                # Extract solute information
                solutes = solution_details.get('solute', [])
                for i, solute in enumerate(solutes): 
                    if 'name' in solute:  # Only process if name exists
                        solute_name = solute["chemical_2"]["name"] if "chemical_2" in solute else f'solute{i+1}'
                        # Include name directly in column name instead of separate column
                        row_data.update({
                            f'solute_concentration_{solute_name}': solute.get('concentration_mol', None)
                        })
            if not all_jv.get(sample_id):
                continue
            for jv in all_jv[sample_id]:
                jv = jv[0]  # Get the data part of the JV measurement
                if "efficiency" not in row_data.keys():
                    # Initialize efficiency and other JV parameters if not present
                    row_data['efficiency'] = []
                    row_data['open_circuit_voltage'] = []
                    row_data['fill_factor'] = []
                    row_data['short_circuit_current_density'] = []
                    row_data['series_resistance'] = []
                    row_data['shunt_resistance'] = []
                row_data['efficiency'].extend([c["efficiency"] for c in  jv["jv_curve"]])
                row_data['open_circuit_voltage'].extend([c["open_circuit_voltage"] for c in  jv["jv_curve"]])
                row_data['fill_factor'].extend([c["fill_factor"] for c in  jv["jv_curve"]])
                row_data['short_circuit_current_density'].extend([c["short_circuit_current_density"] for c in  jv["jv_curve"]])
                row_data['series_resistance'].extend([c["series_resistance"] for c in  jv["jv_curve"]])
                row_data['shunt_resistance'].extend([c["shunt_resistance"] for c in  jv["jv_curve"]])
                
            
            # Create a DataFrame from the row data and append to our list
            sample_df = pd.DataFrame([row_data])
            sample_data_list.append(sample_df)
    
    # Concatenate all sample DataFrames
    if sample_data_list:
        result_df = pd.concat(sample_data_list, ignore_index=True)
        return result_df
    return None

In [None]:
warning_sign = "\u26A0"

out = widgets.Output()
dynamic_content = widgets.Output()  # For dynamically updated content
results_content = widgets.Output(layout={
    'max_height': '1000px',  # Set the height
    'overflow': 'scroll',  # Adds a scrollbar if content overflows
    })
download_content = widgets.Output()  # For download events

data = None
original_data = None  # To store original data for filter reset

# Function to trigger file download
def trigger_download(text, filename, kind='text/json'):
    content_b64 = base64.b64encode(text.encode()).decode()
    data_url = f'data:{kind};charset=utf-8;base64,{content_b64}'
    js_code = f"""
        var a = document.createElement('a');
        a.setAttribute('download', '{filename}');
        a.setAttribute('href', '{data_url}');
        a.click()
    """
    with download_content:
        download_content.clear_output()
        display(HTML(f'<script>{js_code}</script>'))

# Function to export data to CSV
def export_to_csv(e=None):
    if data is None:
        with out:
            out.clear_output()
            print("No data available to export. Please load data first.")
        return
    
    csv_data = io.StringIO()
    data.to_csv(csv_data)
    timestamp = pd.Timestamp.now().strftime("%Y%m%d_%H%M%S")
    filename = f"ijp_data_export_{timestamp}.csv"
    trigger_download(csv_data.getvalue(), filename, kind='text/plain')
    with out:
        out.clear_output()
        print(f"Data exported to {filename}")


def create_visualization_widgets():
    """Create widgets for visualization control"""
    
    # JV parameters to visualize
    jv_params = ['efficiency', 'open_circuit_voltage', 'fill_factor', 
                 'short_circuit_current_density', 'series_resistance', 'shunt_resistance']
    
    # Create dropdowns for x, y, and color parameters
    x_param = widgets.Dropdown(
        options=data.columns.tolist(),
        value='layer_material_name' if 'layer_material_name' in data.columns else data.columns[0],
        description='X Parameter:',
        style={'description_width': 'initial'},
        layout={'width': '300px'}
    )
    
    y_param = widgets.Dropdown(
        options=data.columns.tolist(),
        value='efficiency' if 'efficiency' in data.columns else data.columns[0],
        description='Y Parameter:',
        style={'description_width': 'initial'},
        layout={'width': '300px'}
    )
    
    color_param = widgets.Dropdown(
        options=['None'] + data.columns.tolist(),
        value='None',
        description='Color By:',
        style={'description_width': 'initial'},
        layout={'width': '300px'}
    )
    
    # Add dropdown for JV parameter aggregation method
    jv_aggregation = widgets.Dropdown(
        options=['All Points', 'Mean', 'Max', 'Min', 'Median'],
        value='All Points',
        description='JV Data Display:',
        style={'description_width': 'initial'},
        layout={'width': '300px'},
        tooltip='How to display JV parameters that contain multiple values per sample'
    )
    
    # Button to create plot
    create_plot_button = widgets.Button(
        description='Create Plot',
        button_style='primary',
        tooltip='Click to generate the plot',
        icon='chart-line'
    )
    
    # Export data button
    export_button = widgets.Button(
        description='Export Data to CSV',
        button_style='info',
        tooltip='Click to download data as CSV file',
        icon='download'
    )
    
    # Output area for plots
    plot_output = widgets.Output()
    
    # Create layout
    controls = widgets.VBox([
        widgets.HBox([x_param, y_param, color_param]),
        widgets.HBox([jv_aggregation]),
        widgets.HBox([create_plot_button, export_button])
    ])
    
    # Function to handle button click
    def on_create_plot_button_clicked(b):
        with plot_output:
            plot_output.clear_output(wait=True)     
            create_scatter_plot(x_param.value, y_param.value, color_param.value, jv_aggregation.value)
            
    # Attach handlers to buttons
    create_plot_button.on_click(on_create_plot_button_clicked)
    export_button.on_click(export_to_csv)
    
    return widgets.VBox([controls, plot_output])


def create_scatter_plot(x_param, y_param, color_param=None, jv_aggregation='All Points'):
    """Create a scatter plot with optional color parameter and JV aggregation method"""
    # Process the data
    df = data.copy()
    
    # JV parameters that are stored as lists
    jv_params = ['efficiency', 'open_circuit_voltage', 'fill_factor', 
                'short_circuit_current_density', 'series_resistance', 'shunt_resistance']
    
    # Check if we're dealing with a JV parameter that has multiple values
    x_is_jv_list = x_param in jv_params and isinstance(df[x_param].iloc[0], list) if not df.empty else False
    y_is_jv_list = y_param in jv_params and isinstance(df[y_param].iloc[0], list) if not df.empty else False
    
    # Debug info
    print(f"X parameter: {x_param}, is list: {x_is_jv_list}")
    print(f"Y parameter: {y_param}, is list: {y_is_jv_list}")
    print(f"JV Aggregation method: {jv_aggregation}")
    
    # If we're using a statistical aggregation method for JV parameters
    if jv_aggregation != 'All Points' and (x_is_jv_list or y_is_jv_list):
        # Create a new dataframe with aggregated values
        df_agg = df.copy()
        
        # Apply aggregation to JV parameters as needed
        for param in jv_params:
            if param in df.columns and isinstance(df[param].iloc[0], list) if not df.empty else False:
                if jv_aggregation == 'Mean':
                    df_agg[param] = df[param].apply(lambda x: np.mean(x) if isinstance(x, list) and len(x) > 0 else np.nan)
                elif jv_aggregation == 'Max':
                    df_agg[param] = df[param].apply(lambda x: np.max(x) if isinstance(x, list) and len(x) > 0 else np.nan)
                elif jv_aggregation == 'Min':
                    df_agg[param] = df[param].apply(lambda x: np.min(x) if isinstance(x, list) and len(x) > 0 else np.nan)
                elif jv_aggregation == 'Median':
                    df_agg[param] = df[param].apply(lambda x: np.median(x) if isinstance(x, list) and len(x) > 0 else np.nan)
        
        # Create the plot with aggregated values
        if color_param and color_param != 'None':
            fig = px.scatter(df_agg, x=x_param, y=y_param, color=color_param,
                            hover_data=['sample_id', 'layer_material_name'],
                            title=f'{y_param} vs {x_param} ({jv_aggregation} values) colored by {color_param}')
        else:
            fig = px.scatter(df_agg, x=x_param, y=y_param,
                            hover_data=['sample_id', 'layer_material_name'],
                            title=f'{y_param} vs {x_param} ({jv_aggregation} values)')
                            
        # Update layout for better readability
        fig.update_layout(
            height=600,
            width=800,
            xaxis_title=x_param,
            yaxis_title=y_param,
            template='plotly_white'
        )
        
        # Show the plot
        fig.show()
        return
    
    # If dealing with JV parameters that have multiple values and showing all points
    elif x_is_jv_list or y_is_jv_list:
        # Create an empty figure
        fig = go.Figure()
        
        # Create a dictionary to map sample_id to color for consistent coloring
        colors = px.colors.qualitative.Plotly  # Use Plotly's qualitative color scale
        sample_color = {}
        
        # Process each row (sample) individually
        for idx, row in df.iterrows():
            sample_id = row['sample_id']
            
            # Assign a consistent color to this sample
            if sample_id not in sample_color:
                sample_color[sample_id] = colors[len(sample_color) % len(colors)]
                
            # Get x and y values
            if x_is_jv_list:
                x_values = row[x_param]
            else:
                x_values = [row[x_param]] * (len(row[y_param]) if y_is_jv_list else 1)
                
            if y_is_jv_list:
                y_values = row[y_param]
            else:
                y_values = [row[y_param]] * (len(row[x_param]) if x_is_jv_list else 1)
            
            # Ensure we have values to plot
            if not x_values or not y_values:
                continue
                
            # Make sure x and y lists have the same length for plotting
            min_len = min(len(x_values), len(y_values))
            x_values = x_values[:min_len]
            y_values = y_values[:min_len]
            
            # Create hover text
            hover_texts = [f"Sample ID: {sample_id}<br>Point: {i}<br>{x_param}: {x_val}<br>{y_param}: {y_val}" 
                          for i, (x_val, y_val) in enumerate(zip(x_values, y_values))]
            
            # Add scatter points for this sample
            fig.add_trace(go.Scatter(
                x=x_values,
                y=y_values,
                mode='markers',
                marker=dict(
                    size=10,
                    color=sample_color[sample_id] if color_param == 'None' or not color_param else None
                ),
                name=f"Sample {sample_id}",
                text=hover_texts,
                hoverinfo='text',
                showlegend=True
            ))
        
        # If we're coloring by a parameter other than sample_id, use a colorbar instead
        if color_param != 'None' and color_param:
            # We need to create a new plot with explicit coloring
            all_x = []
            all_y = []
            all_colors = []
            all_hover = []
            
            for idx, row in df.iterrows():
                sample_id = row['sample_id']
                color_val = row[color_param]
                
                # Get x and y values
                if x_is_jv_list:
                    x_values = row[x_param]
                else:
                    x_values = [row[x_param]] * (len(row[y_param]) if y_is_jv_list else 1)
                    
                if y_is_jv_list:
                    y_values = row[y_param]
                else:
                    y_values = [row[y_param]] * (len(row[x_param]) if x_is_jv_list else 1)
                
                # Ensure we have values to plot
                if not x_values or not y_values:
                    continue
                    
                # Make sure x and y lists have the same length for plotting
                min_len = min(len(x_values), len(y_values))
                x_values = x_values[:min_len]
                y_values = y_values[:min_len]
                
                # Create hover text
                hover_texts = [f"Sample ID: {sample_id}<br>Point: {i}<br>{color_param}: {color_val}" 
                              for i in range(min_len)]
                
                all_x.extend(x_values)
                all_y.extend(y_values)
                all_colors.extend([color_val] * min_len)
                all_hover.extend(hover_texts)
            
            # Clear the figure and create a new one with coloring
            fig = px.scatter(
                x=all_x,
                y=all_y,
                color=all_colors,
                hover_name=all_hover,
                title=f'{y_param} vs {x_param} colored by {color_param}'
            )
    
    else:
        # For scatter plots without JV list parameters, take mean values if parameters are lists
        for param in jv_params:
            if param in df.columns and isinstance(df[param].iloc[0], list) if not df.empty else False:
                df[param] = df[param].apply(lambda x: np.mean(x) if isinstance(x, list) and len(x) > 0 else np.nan)
        
        # Create the standard plot with means
        if color_param and color_param != 'None':
            fig = px.scatter(df, x=x_param, y=y_param, color=color_param,
                            hover_data=['sample_id', 'layer_material_name'],
                            title=f'{y_param} vs {x_param} (mean values) colored by {color_param}')
        else:
            fig = px.scatter(df, x=x_param, y=y_param,
                            hover_data=['sample_id', 'layer_material_name'],
                            title=f'{y_param} vs {x_param} (mean values)')
    
    # Update layout for better readability
    fig.update_layout(
        height=600,
        width=800,
        xaxis_title=x_param,
        yaxis_title=y_param,
        template='plotly_white'
    )
    
    # Show the plot
    fig.show()




def on_load_data_clicked(batch_ids_selector):
    #global dictionary to hold data
    global data, original_data
    dynamic_content.clear_output()
    with out:
        out.clear_output()
        print("Loading Data")

        try_sample_ids = get_ids_in_batch(url, token, batch_ids_selector.value)

        #extract data here
        identifiers = get_sample_description(url, token, list(try_sample_ids))
        data = get_ijp_data(try_sample_ids, identifiers)

        # Check if data was found
        if data is None:
            out.clear_output()
            print("The batches selected don't contain any relevant measurements")
            return

        # Store original data for filter reset functionality
        original_data = data.copy()
        
        out.clear_output()
        print("Data Loaded")
        
        # Create and display visualization widgets once data is loaded
        with dynamic_content:
            dynamic_content.clear_output(wait=True)
            if data is not None:
                display(Markdown("## Data Visualization"))
                display(create_visualization_widgets())
            else:
                display(Markdown("No data available for visualization"))


display(batch_selection.create_batch_selection(url, token, on_load_data_clicked))
display(out)
display(dynamic_content)  # This will be updated dynamically with the variables menu

display(download_content)

VBox(children=(Text(value='', description='Search Batch'), SelectMultiple(description='Batches', layout=Layout…

Output()

Output()

HBox(children=(Button(button_style='info', description='Export All Data to CSV', icon='download', layout=Layou…

Output()