In [2]:
%%capture
%matplotlib widget
#!pip install requests_cache

import matplotlib.pyplot as plt
import plotly.graph_objects as go
import time
import requests
import pandas as pd
import numpy as np
import datetime
import math

import ipywidgets as widgets
from IPython.display import display, clear_output

import sys
sys.path.insert(1, '../python-scripts-c6fxKDJrSsWp1xCxON1Y7g')
sys.path.insert(1, '../../python-scripts-c6fxKDJrSsWp1xCxON1Y7g')
from api_calls import *

url = "https://nomad-hzb-ce.de/nomad-oasis/api/v1"

import os
token = os.environ['NOMAD_CLIENT_ACCESS_TOKEN']

In [20]:
def get_upload_ids_from_main_authors(url, token, main_authors):   
    query = {
        'required': {
            'upload_id': '*',
        },
        'owner': 'visible',
        'query': {
            'origin:any': main_authors,
        },
        'pagination': {
            'page_size': 1000
        }
    }
    response = requests.post(f'{url}/entries/archive/query',
                             headers={'Authorization': f'Bearer {token}'}, json=query)
    linked_data = response.json()["data"]
    res = set()
    for ldata in linked_data:
        res.add(ldata.get('upload_id'))
    return res

def get_specific_entrytype_of_upload_ids(url, token, upload_list, entry_type):   
    query = {
        'required': {
            'data': '*',
        },
        'owner': 'visible',
        'query': {
            'upload_id:any': upload_list,
            'entry_type': entry_type
        },
        'pagination': {
            'page_size': 10000
        }
    }
    response = requests.post(f'{url}/entries/archive/query',
                             headers={'Authorization': f'Bearer {token}'}, json=query)
    linked_data = response.json()["data"]
    res = []
    for ldata in linked_data:
        res.append(ldata["archive"]["data"])
    return res 

def get_upload_name_from_id(url, token, upload_id):
    response = requests.get(f'{url}/uploads/{upload_id}', headers={'Authorization': f'Bearer {token}'})
    linked_data = response.json()["data"]
    return linked_data.get('upload_name')

def get_result_df_from_upload_ids(upload_ids):
    #columns = ['sample_id', 'potential1 (mV vs RHE)', 'hold1 (s)', 'potential2 (mV vs RHE)', 'hold2 (s)', 'potential3 (mV vs RHE)', 'sweep speed (mV/s)', 'cycle (P2-P3)', 'duration (s)', 'duration (h)']
    columns = ['upload_id', 'sample_id', 'potential1 (mV vs Hg/HgO)', 'hold1 (s)', 'potential2 (mV vs Hg/HgO)', 'hold2 (s)', 'potential3 (mV vs Hg/HgO)', 'sweep speed (mV/s)', 'cycle (P2-P3)', 'duration (s)', 'duration (h)']
    rows = []
    hold2 = 0
    sweep_speed = 100
    
    for upload_id in upload_ids:
        try:
            ca_data = get_specific_entrytype_of_upload_ids(url, token, [upload_id], 'CE_NOME_Chronoamperometry',)
            cv_data = get_specific_entrytype_of_upload_ids(url, token, [upload_id], 'CE_NOME_CyclicVoltammetry',)
    
            sample_id = ca_data[0].get('samples')[0].get('name')
            #upload_name = get_upload_name_from_id(url, token, upload_id)
        
            # extract data from Chronoamperometry files
            rhe = ca_data[0].get('voltage_shift') or 0.933
            potential1 = ca_data[0].get('properties').get('step_1_potential')
            potential1_rhe = potential1 + rhe
            hold1 = ca_data[0].get('properties').get('step_1_time')
        
            # extract data from CyclicVoltammetry files
            rhe = cv_data[0].get('voltage_shift') or 0.933
            potential2 = cv_data[0].get('properties').get('limit_potential_1')
            potential3 = cv_data[0].get('properties').get('limit_potential_2')
            potential2_rhe = potential2 + rhe
            potential3_rhe = potential3 + rhe
            sweep_speed = cv_data[0].get('properties').get('scan_rate')

            # convert to mV
            potential1 *= 1000
            potential2 *= 1000
            potential3 *= 1000
         
            interval1 = hold1 + np.abs(potential1 - potential2)/sweep_speed
            interval2 = np.abs(potential2 - potential3)/sweep_speed + hold2
            cycles = min(100, math.floor((3600 - interval1) / interval2 - 1))   #TODO check the -1
            
            #if cycles != len(cv_data[0].get('cycles', [{}])):
            if cycles != cv_data[0].get('properties').get('cycles'):
                print(f' {upload_id} cycles do not match: {cycles} != {cv_data[0].get('properties').get('cycles')}')
            duration = interval1 + cycles*interval2
        
            rows.append([upload_id, sample_id, potential1, hold1, potential2, hold2, potential3, sweep_speed, cycles, duration, duration/3600])
        
            #print(upload_id, f': {len(cv_data)} CV files, {len(ca_data)} CA files')
        except:
            print(upload_id, "didnt work - maybe calibration upload?")
    
    result = pd.DataFrame(rows, columns=columns)
    result = result.sort_values(by=['sample_id']).reset_index(drop = True)
    return result

In [12]:
# all ipywidgets

author_selector = widgets.Dropdown(
    options=['Maitryi Gupta', 'Marlena Thormeier'],
    value='Maitryi Gupta',
    description='NOMAD author:',
    style={'description_width': 'initial'}
)

get_button = widgets.Button(
    description='Get NOMAD data',
    button_style='success',
    layout=widgets.Layout(width='auto')
)

analysis_button = widgets.Button(
    description='Group and evaluate data',
    button_style='info',
    layout=widgets.Layout(width='auto')
)

all_runs_output = widgets.Output()
analysis_output = widgets.Output()

### Select Uploads

In [10]:
def on_author_change(change):
    if change['type'] == 'change' and change['name'] == 'value':
        all_runs_output.clear_output()
        analysis_output.clear_output()

author_selector.observe(on_author_change)

display(author_selector)

Dropdown(description='NOMAD author:', options=('Maitryi Gupta', 'Marlena Thormeier'), style=DescriptionStyle(d…

### Create Table from NOMAD entries

In [21]:
def on_button_clicked(b):
    global result
    with all_runs_output:
        all_runs_output.clear_output()
        print('Getting data. This can take some time...')
        upload_ids = get_upload_ids_from_main_authors(url, token, [author_selector.value])
        result = get_result_df_from_upload_ids(upload_ids)
        all_runs_output.clear_output()
        display(result)

get_button.on_click(on_button_clicked)

display(get_button, all_runs_output)

Button(button_style='success', description='Get NOMAD data', layout=Layout(width='auto'), style=ButtonStyle())

Output(outputs=({'name': 'stdout', 'text': 'Getting data. This can take some time...\n1424.8571174377223\n wX0…

### Calculation of Targets

In [6]:
# TODO decide how table and activity is connected: everything in the same upload or match via sample ids?

# TODO E_shift referenzelektrode
# TODO add calculation of targets
#1) e_shift reference electrode (+ overview of std e_shift)
#3) Auswertung im Bereich max(-1.45 V, P2) und 1.55 V -> ersten vollständigen Zyklus angeschaut und den letzten vollständigen (cycle 2 und 100) -> TODO sometimes different number of cycles?

In [23]:
def get_groups(df):
    res_grouped = df.groupby([
        'potential1 (mV vs Hg/HgO)',
        'hold1 (s)',
        'potential3 (mV vs Hg/HgO)',
        'sweep speed (mV/s)',
        'cycle (P2-P3)'
    ]).agg({
        'upload_id': lambda x: list(x.unique()),
        'sample_id': lambda x: list(x.unique())
    }).reset_index()
    res_grouped.rename(columns={
        'upload_id': 'upload_ids',
        'sample_id': 'sample_ids'
    }, inplace=True)

    # sort by date
    res_grouped['sort_by_id_date'] = res_grouped['sample_ids'].apply(lambda x: x[0] if x else '') #apply(lambda x: x[0][13:24] if x else '')
    res_grouped.sort_values(by='sort_by_id_date', inplace=True)
    res_grouped.drop(columns='sort_by_id_date', inplace=True)
    res_grouped.reset_index(drop=True, inplace=True)

    return res_grouped

def get_mean_std_no_cycles(data_list, quantity):
    replicates = []
    for measurement in data_list:
        replicates.append(measurement.get(quantity))
    mean_all = np.mean(replicates, axis=0)
    std_all = np.std(replicates, axis=0, ddof=1)
    return mean_all.mean(), std_all.mean()
    
def get_mean_std_of_groups(res_grouped):
    eval_col_names = ['ca_current_mean', 'ca_current_std', 'cp_voltage_mean', 'cp_voltage_std', 'cv_first_current_mean', 'cv_first_current_std', 'cv_last_current_mean', 'cv_last_current_std']
    eval_cols = []
    
    for group in res_grouped.itertuples():
        ca_data = get_specific_entrytype_of_upload_ids(url, token, group.upload_ids, 'CE_NOME_Chronoamperometry',)
        cp_data = get_specific_entrytype_of_upload_ids(url, token, group.upload_ids, 'CE_NOME_Chronopotentiometry',)
        cv_data = get_specific_entrytype_of_upload_ids(url, token, group.upload_ids, 'CE_NOME_CyclicVoltammetry',)

        ca_mean, ca_std = get_mean_std_no_cycles(ca_data, 'current')
        
        cp_mean, cp_std = get_mean_std_no_cycles(cp_data, 'voltage') #_rhe_compensated')
        cp_mean *= 1000 #mV
        cp_std *= 1000 #mV
        
        potential2 = cv_data[0].get('properties').get('limit_potential_1')
        cv_v_min = max(-1.45, potential2)
        cv_v_max = 1.55
        first_cycles = []
        last_cycles = []
        for idx, cv in enumerate(cv_data):
            first_cycle = cv.get('cycles')[1]   # cycle 2 (first complete cycle)
            last_cycle = cv.get('cycles')[-1]
            if len(last_cycle) < len(first_cycle):
                last_cycle = cv.get('cycles')[-2]    # sometimes the last cycle is only half cycle

            first_current = first_cycle.get('current')
            first_voltage = np.array(first_cycle.get('voltage_rhe_compensated'))
            first_df = pd.DataFrame({
                f'voltage{idx}': first_voltage,
                f'current{idx}': first_current
            })
            first_cycles.append(first_df)
            
            last_current = last_cycle.get('current')
            last_voltage = np.array(last_cycle.get('voltage_rhe_compensated'))
            last_df = pd.DataFrame({
                f'voltage{idx}': last_voltage,
                f'current{idx}': last_current
            })
            last_cycles.append(last_df)
            
        df_first_cycles = pd.concat(first_cycles, axis=1)
        df_first_cycles['voltage_mean'] = df_first_cycles[[f'voltage{i}' for i in range(3)]].mean(axis=1)
        df_first_cycles['current_mean'] = df_first_cycles[[f'current{i}' for i in range(3)]].mean(axis=1)
        df_first_cycles['current_std'] = df_first_cycles[[f'current{i}' for i in range(3)]].std(axis=1)
        df_first_cycles = df_first_cycles[(df_first_cycles['voltage_mean'] >= cv_v_min) & (df_first_cycles['voltage_mean'] <= cv_v_max)]

        df_last_cycles = pd.concat(last_cycles, axis=1)
        df_last_cycles['voltage_mean'] = df_last_cycles[[f'voltage{i}' for i in range(3)]].mean(axis=1)
        df_last_cycles['current_mean'] = df_last_cycles[[f'current{i}' for i in range(3)]].mean(axis=1)
        df_last_cycles['current_std'] = df_last_cycles[[f'current{i}' for i in range(3)]].std(axis=1)
        df_last_cycles = df_last_cycles[(df_last_cycles['voltage_mean'] >= cv_v_min) & (df_last_cycles['voltage_mean'] <= cv_v_max)]

        #display(df_first_cycles)
        #display(df_last_cycles)

        cv_first_mean = df_first_cycles['current_mean'].mean()
        cv_first_std = df_first_cycles['current_std'].mean()
        cv_last_mean = df_last_cycles['current_mean'].mean()
        cv_last_std = df_last_cycles['current_std'].mean()
        
        eval_cols.append([ca_mean, ca_std, cp_mean, cp_std, cv_first_mean, cv_first_std, cv_last_mean, cv_last_std])
    
    res_grouped.loc[:, eval_col_names] = eval_cols
    return res_grouped

#upload_ids = get_upload_ids_from_main_authors(url, token, [author_selector.value])
#result = get_result_df_from_upload_ids(upload_ids)
#res_grouped = get_groups(result)
#res_grouped2 = get_mean_std_of_groups(res_grouped)
#res_grouped2

In [24]:
def get_trial_overview(res_grouped, plot_title='CP Mean & STD in mV', y1_name='cp_voltage_mean', y1_label='CP Voltage (mV)', y2_name='cp_voltage_std', y2_label='CP Standard Deviation (mV)'):
    fig = go.Figure()
    
    fig.add_trace(go.Scatter(
        x=res_grouped.index,
        y=res_grouped[y1_name],
        name=y1_label,
        mode='lines+markers',
        yaxis='y1'
    ))
    
    fig.add_trace(go.Scatter(
        x=res_grouped.index,
        y=res_grouped[y2_name],
        name=y2_label,
        mode='lines+markers',
        yaxis='y2'
    ))
    
    fig.update_layout(
        title=plot_title,
        plot_bgcolor='white',
        xaxis=dict(
            title='Setup',
            showgrid=False,      # no vertikal line
            linecolor='black',   # black axis at bottom
        ),
        yaxis=dict(
            title=y1_label,
            #showgrid=False,      # no horizontal line
            linecolor='blue',
            titlefont=dict(color='blue'),
            tickfont=dict(color='blue'),
        ),
        yaxis2=dict(
            title=y2_label,
            overlaying='y',
            side='right',
            showgrid=False,     # no horizontal line
            linecolor='red',
            titlefont=dict(color='red'),
            tickfont=dict(color='red'),
        ),
        legend=dict(
            x=0.5, y=-0.3,
            xanchor='center',
            orientation='h'
        )
    )
    return fig

def get_trial_std_over_mean(res_grouped, plot_title='CP std vs. CP mean (in mV)', x_name='cp_voltage_mean', x_label='CP Voltage (mV)', y_name='cp_voltage_std', y_label='CP Standard Deviation (mV)'):
    fig = go.Figure()
    
    fig.add_trace(go.Scatter(
        x=res_grouped[x_name],   # TODO maybe use mA
        y=res_grouped[y_name],
        mode='markers+text',
        marker=dict(color='green', size=8),
        name='Mean vs Std',
        textposition='top center'
    ))
    
    fig.update_layout(
        title=plot_title,
        xaxis=dict(
            title=x_label,
            showgrid=True,
            gridcolor='lightgrey',
            zeroline=False,
            linecolor='black',
            range=[0, 2000],
        ),
        yaxis=dict(
            title=y_label, 
            showgrid=True,
            gridcolor='lightgrey',
            zeroline=False,
            linecolor='black',
        ),
        plot_bgcolor='white',
    )
    return fig


In [25]:
def on_analysis_clicked(b):
    with analysis_output:
        analysis_output.clear_output()
        print('Evaluating data. This can take some time...')
        res_grouped = get_groups(result)
        res_grouped = get_mean_std_of_groups(res_grouped)
        fig1 = get_trial_overview(res_grouped, plot_title='CP Mean & STD in mV', y1_name='cp_voltage_mean', y1_label='CP Voltage (mV)', y2_name='cp_voltage_std', y2_label='CP Standard Deviation (mV)')
        fig2 = get_trial_overview(res_grouped, plot_title='CV First Cycle Mean & STD in A', y1_name='cv_first_current_mean', y1_label='CV Current (A)', y2_name='cv_first_current_std', y2_label='CV Standard Deviation (A)')
        fig3 = get_trial_overview(res_grouped, plot_title='CV Last Cycle Mean & STD in A', y1_name='cv_last_current_mean', y1_label='CV Current (A)', y2_name='cv_last_current_std', y2_label='CV Standard Deviation (A)')
        fig4 = get_trial_overview(res_grouped, plot_title='CA Mean & STD in A', y1_name='ca_current_mean', y1_label='CA Current (A)', y2_name='ca_current_std', y2_label='CA Standard Deviation (A)')
        fig5 = get_trial_std_over_mean(res_grouped, plot_title='CP std vs. CP mean (in mV)', x_name='cp_voltage_mean', x_label='CP Voltage (mV)', y_name='cp_voltage_std', y_label='CP Standard Deviation (mV)')
        fig6 = get_trial_std_over_mean(res_grouped, plot_title='CV std vs. CV mean (first cycle in A)', x_name='cv_first_current_mean', x_label='CV Current (A)', y_name='cv_first_current_std', y_label='CV Standard Deviation (A)')
        fig7 = get_trial_std_over_mean(res_grouped, plot_title='CV std vs. CV mean (last cycle in A)', x_name='cv_last_current_mean', x_label='CV Current (A)', y_name='cv_last_current_std', y_label='CV Standard Deviation (A)')
        analysis_output.clear_output()
        display(res_grouped)
        fig1.show()
        fig2.show()
        fig3.show()
        fig4.show()
        fig5.show()
        fig6.show()
        fig7.show()

analysis_button.on_click(on_analysis_clicked)

display(analysis_button, analysis_output)

Button(button_style='info', description='Group and evaluate data', layout=Layout(width='auto'), style=ButtonSt…

Output()

### Save Data for Bayesian Optimization

In [10]:
#date_now = datetime.datetime.now()
#file_name = 'baybe_csv/nomad_result_table_maitryi_' + date_now.strftime("%Y%m%d") + '.csv'
#result.to_csv(file_name, index=False)

In [11]:
#rename_baybe = {
#    'potential1 (mV vs Hg/HgO)': 'potential1',
#    'potential2 (mV vs Hg/HgO)': 'potential2',
#    'potential3 (mV vs Hg/HgO)': 'potential3',  
#}
#baybe_table = result.rename(columns=rename_baybe)[['potential1', 'potential2', 'potential3']]
#import random 
#baybe_table['Activity'] = range(1,len(result)+1) # TODO calculate this!!
#baybe_table = baybe_table.replace('n/a', 0)    # this is from last year where sweep speed could be n/a TODO check if this is also possible this year
#baybe_table.to_csv('baybe_csv/wateroxidation_ni_maitryi.csv', index=False, header=True)
#baybe_table