# Summer Student 2025 Analysis - PEC

In [1]:
%%capture
%matplotlib widget
#!pip install requests_cache

import matplotlib.pyplot as plt
import time
import requests
import pandas as pd
import numpy as np
import datetime

import plotly.graph_objects as go
from scipy.signal import find_peaks

import ipywidgets as widgets
from IPython.display import display

import sys
sys.path.insert(1, '../python-scripts-c6fxKDJrSsWp1xCxON1Y7g')
sys.path.insert(1, '../../python-scripts-c6fxKDJrSsWp1xCxON1Y7g')
from api_calls import *
from pec_activity_stability_functions import *

url = "https://nomad-hzb-ce.de/nomad-oasis/api/v1"

import os
token = os.environ['NOMAD_CLIENT_ACCESS_TOKEN']

In [2]:
def get_ids_of_all_samples(url, token, author_name, sample_type='CE_NOME_Sample'):
    row = {"entry_type":sample_type,
           #"authors.name": "Abdelrahman Ishmael"
           #"authors.name": "Sofia Muñoz"
           #"authors.name": "Felipe Mata mata"
           "authors.name": author_name
        }
    query = {
        'required': {
            'metadata': '*',
            'data': '*',
        },
        'owner': 'visible',
        'query': row,
        'pagination': {
            'page_size': 100
        }
    }
    response = requests.post(f'{url}/entries/archive/query',
                             headers={'Authorization': f'Bearer {token}'}, json=query)
    return [s["archive"]["data"].get("lab_id") for s in response.json()["data"] if s["archive"]["data"].get("lab_id")]

def get_overview_table(sample_id_list):
    columns = ["Sample_id", "env_id", "Bi(NO3)3 5H2O", "KI", "LA", "VO(acac)2", "benzoquinone", "ph", "G1_mAcm_2", "G_time", "N1_mAcm_2", "N_time", "NG_time", "no_cycles"]
    overview_table = pd.DataFrame(columns=columns)
    for sample_id in sample_id_list:
        try:
            row = {"Sample_id":sample_id}
            data = get_specific_data_of_sample(url, token, sample_id, "", with_meta=True)            
            data.sort(key=lambda x: datetime.datetime.strptime(x[0].get('datetime'), "%Y-%m-%dT%H:%M:%S.%f+00:00") if "." in x[0].get('datetime') else
             datetime.datetime.strptime(x[0].get('datetime'), "%Y-%m-%dT%H:%M:%S+00:00")       
             )
    
            N = data[0]
            G = data[1]
            NG = data[2]
            env = get_environment(url, token,N[1]["entry_id"])
            for s in env["substances"]:
                if "Bi(NO3)3 5H2O" in s["name"]:
                    row.update({"Bi(NO3)3 5H2O":s["concentration_mmol_per_l"]})
                if "KI" in s["name"]:
                    row.update({"KI":s["concentration_mmol_per_l"]})
                if "Lactic Acid" in s["name"] or "Lactic" in s["name"]:
                    row.update({"LA":s["concentration_mmol_per_l"]})
                if "Benzoquinone" in s["name"]:
                    row.update({"benzoquinone":s["concentration_mmol_per_l"]})
                if "VO(acac)2" in s["name"]:
                    row.update({"VO(acac)2":s["concentration_mmol_per_l"]})
            row.update({"env_id":env["lab_id"]})
            row.update({"ph":env["ph_value"]})
            sample_area = N[0]["properties"]["sample_area"] / 1000 #mAcm_2
            row.update({"N1_mAcm_2":N[0]["properties"]["step_1_current"]/sample_area})
            row.update({"N_time":N[0]["properties"]["step_1_time"]})
            row.update({"no_cycles":(len(data)-1) // 2})
            sample_area = G[0]["properties"]["sample_area"] / 1000 #mAcm_2
            row.update({"G1_mAcm_2":G[0]["properties"]["step_1_current"]/sample_area})
            row.update({"G_time":G[0]["properties"]["step_1_time"]})
            row.update({"NG_time":NG[0]["properties"]["step_1_time"]})
            overview_table.loc[len(overview_table)] = row
        except:
            if sample_id != 'CE-NOME_FeMa_250709_0001':
                print(sample_id, "didnt work")
    return overview_table

In [3]:
# all ipywidgets

# FFT = Furthest-First Traversal
author_selector = widgets.Dropdown(
    options=['Felipe Mata mata', 'Maddalena Zoli'],
    value='Felipe Mata mata',
    description='NOMAD author:',
    style={'description_width': 'initial'}
)

sample_selector = widgets.Dropdown(
    description="Select SampleID:",
    style={'description_width': 'initial'}
)

get_button = widgets.Button(
    description='Get NOMAD data',
    button_style='success',
    layout=widgets.Layout(width='auto')
)

analysis_button = widgets.Button(
    description='Analyse PEC data',
    button_style='info',
    layout=widgets.Layout(width='auto')
)


safe_baybe_button = widgets.Button(
    description='Safe data for Bayesian Optimization',
    button_style='primary',
    layout=widgets.Layout(width='auto')
)

show_sample_details_button = widgets.Button(
    description="Show plots for selected sample ID",
    button_style='info',
    layout=widgets.Layout(width='auto')
)

all_runs_output = widgets.Output()
analysis_output = widgets.Output()
baybe_output = widgets.Output()
sample_select_output = widgets.Output()
sample_detail_output = widgets.Output()

out1 = widgets.Output()
out2 = widgets.Output()
out3 = widgets.Output()

tab = widgets.Tab(children=[out1, out2, out3])
tab.set_title(0, 'Activity')
tab.set_title(1, 'Stability')
tab.set_title(2, 'Images')

In [4]:
def on_author_change(change):
    if change['type'] == 'change' and change['name'] == 'value':
        all_runs_output.clear_output()
        analysis_output.clear_output()
        baybe_output.clear_output()
        sample_select_output.clear_output()
        sample_detail_output.clear_output()

author_selector.observe(on_author_change)

display(author_selector)

Dropdown(description='NOMAD author:', options=('Felipe Mata mata', 'Maddalena Zoli'), style=DescriptionStyle(d…

## Show overview of all samples

In [5]:
overview_table = pd.DataFrame()

def on_button_clicked(b):
    global samples, overview_table
    with all_runs_output:
        all_runs_output.clear_output()
        analysis_output.clear_output()
        print('Getting data. This can take some time...')
        samples = get_ids_of_all_samples(url, token, author_selector.value)
        samples = sorted(samples)
        overview_table = get_overview_table(samples)
        samples = np.array(overview_table['Sample_id'])
        all_runs_output.clear_output()
        display(overview_table)

get_button.on_click(on_button_clicked)

display(get_button, all_runs_output)

# TODO pH spalte?? und spalten rampCmin, temp1_C und time_min fehlen

Button(button_style='success', description='Get NOMAD data', layout=Layout(width='auto'), style=ButtonStyle())

Output()

## Analyse PEC for given samples

In [6]:
# TODO get stability values
# TODO make table for BO

def on_analysis_clicked(b):
    if overview_table.empty:
        with analysis_output:
            analysis_output.clear_output()
            print('Please use the "Get NOMAD data" button first.')
            return
    
    with analysis_output:
        analysis_output.clear_output()
        print('Evaluating data. This can take some time...')
        for sample_id in samples:
            entry_id = get_entryid(url, token, sample_id)
            description = get_entry_data(url, token, entry_id).get('description')
            overview_table.loc[overview_table['Sample_id']==sample_id, 'description'] = description
        
            if description and 'no deposition' in description:
                overview_table.loc[overview_table['Sample_id']==sample_id, 'activity'] = 0
                overview_table.loc[overview_table['Sample_id']==sample_id, 'stability'] = 1
        
            photoactivity_entries = get_photoactivity_data_for_samples(url, token, [entry_id])
            for ca_data in photoactivity_entries:
                time = np.array(ca_data['archive']['data'].get('time'))
                current = np.array(ca_data['archive']['data'].get('current'))
                photoactivity_results = analyse_photoactivity(current, time)
                overview_table.loc[overview_table['Sample_id']==sample_id, 'activity'] = photoactivity_results['mean_delta_I']
        analysis_output.clear_output()
        display(overview_table)

    if not overview_table.empty:
        sample_selector.options = list(overview_table['Sample_id'])
        with sample_select_output:
            sample_select_output.clear_output()
            display(sample_selector)

analysis_button.on_click(on_analysis_clicked)

display(analysis_button, analysis_output)

Button(button_style='info', description='Analyse PEC data', layout=Layout(width='auto'), style=ButtonStyle())

Output()

## Inspect individual samples

In [7]:
def get_image_list(sample_id):
    data = get_specific_data_of_sample(url, token, sample_id, "CE_NOME_Measurement", with_meta=True)
    image_path_list = []
    for nomad_entry in data:
        data, metadata = nomad_entry
        file_name = data.get('data_file')[0]
        upload_name = metadata.get('upload_name')
        upload_id = metadata.get('upload_id')
        file_path = f'../../{upload_name.lower().replace(' ', '-')}-{upload_id}/{file_name}'
        image_path_list.append(file_path)
    return image_path_list

In [12]:
def on_sample_id_change(change):
    if change['name'] == 'value' and change['new'] is not None:
        sample_id = change['new']
        entry_id = get_entryid(url, token, sample_id)
        with sample_select_output:
            sample_select_output.clear_output()
            sample_detail_output.clear_output()
            display(sample_selector, overview_table[overview_table['Sample_id']==sample_id])
        with out1:
            out1.clear_output()
            photoactivity_entries = get_photoactivity_data_for_samples(url, token, [entry_id])
            for ca_data in photoactivity_entries:
                upload_name = ca_data['archive']['metadata'].get('upload_name', '')
                if sample_id in upload_name:
                    time = np.array(ca_data['archive']['data'].get('time'))
                    current = np.array(ca_data['archive']['data'].get('current'))
                    photoactivity_results = analyse_photoactivity(current, time)
                    print(f'{upload_name}, Mean ΔI: {photoactivity_results['mean_delta_I']}')
                    fig = get_photoactivity_plot(time, current, photoactivity_results)
                    fig.show()
        with out2:
            out2.clear_output()
            stability_entries = get_stability_data_for_samples(url, token, [entry_id])
            show_stability_plots(stability_entries)
            show_cleaned_stability_plots(stability_entries)
        with out3:
            out3.clear_output()
            image_paths = get_image_list(sample_id)
            #box = widgets.HBox([widgets.Image(value=open(b, 'rb').read(), format='png', width=200) for b in image_paths])
            box = widgets.HBox([widgets.Image(value=open(b, 'rb').read(), format='png') for b in image_paths])
            display(box)
        with sample_detail_output:
            sample_detail_output.clear_output()
            display(tab)

sample_selector.observe(on_sample_id_change, names='value')

if 'stability' not in overview_table:
    with sample_select_output:
        print('Please run the "Analyse PEC data" button before inspecting individual parameter sets.')

display(sample_select_output, sample_detail_output)


Output(outputs=({'output_type': 'display_data', 'data': {'text/plain': "Dropdown(description='Select SampleID:…

Output(outputs=({'output_type': 'display_data', 'data': {'text/plain': 'Tab(children=(Output(), Output(), Outp…