In [1]:
# Pre-defined block

# This notebook has been generated by "Jupyter Notebook Analysis" schema.
# It gets the data from the entries referenced in the `inputs` sub-section.
# It also gets the analysis function based on the analysis type (e.g., XRD).

## Get the analysis archive

Add the `analysis_entry_id` of the corresponding analysis entry 

In [2]:
# Pre-defined block

analysis_entry_id = ""

def get_analysis_entry(entry_id: str, url: str = None):
    """
    Gets the entry archive of the analysis entry.

    Args:
        entry_id (str): Entry ID of the analysis ELN.
        url (str): URL of the NOMAD server.

    Returns:
        EntryArchive: Entry archive of the analysis entry.
    """

    from nomad.client import ArchiveQuery
    from nomad.config import config

    if url is None:
        url = config.client.url

    a_query = ArchiveQuery(
        query={
            'entry_id:any': [entry_id],
        },
        required='*',
        url=url,
    )
    entry_list = a_query.download()

    if not entry_list:
        print(
            f'Analysis entry with id "{entry_id}" not '
            f'found at the given URL "{url}".'
        )
        return None
    if len(entry_list) > 1:
        print('Multiple entries found. Picking the first one.')

    return entry_list[0]


## Load the schemas defined in your Plugin

In [4]:
from bayesian_optimization_hpt.schema_packages.schema_package import (
    MinMaxScaling, Acquisition, InitialSampling, MinMaxRange,
    PassivationPerformanceMeasurement, HydrogenPlasmaTreatment, PassivationPerformanceResult, PassivationPerformanceMeasurementReference,
    SurrogateModel, Acquisition
)
from nomad_analysis.utils import create_entry_with_api

## Adding the initial samples
Uses `create_entry_with_api` to add entries for the inputs
- adds `InitialSampling` step to analysis

In [5]:
url = analysis.m_context.installation_url
upload_id = analysis.m_context.upload_id

In [6]:
# define the initial set of samples
# Create archives for each measurement
# Create Initial Sampling step for analysis

import pandas as pd
df = pd.read_excel('BO_initial_samples.xlsx')
X_name = ["Process temperature [℃]", "Process time [min]", "H2 pressure [Pa]", "H2 flow rate [sccm]", "RF power [W]", "Electrod distance [mm]"]
y_name = ["Carrier lifetime [um]"]

X = df[X_name]
y = df[y_name]

sampling = InitialSampling(name='Initial Sampling', samples=[])

for i in range(len(df)):
    step = HydrogenPlasmaTreatment(
        name='H2 plasma treatment',
        temperature=X.iloc[i,0],
        duration=X.iloc[i,1],
        h2_pressure=X.iloc[i,2],
        h2_flow_rate=X.iloc[i,3],
        rf_power=X.iloc[i,4],
        electrode_distance=X.iloc[i,5],
    )
    result = PassivationPerformanceResult(
        name='Passivation performance',
        carrier_lifetime=X.iloc[i,0]
    )
    measurement = PassivationPerformanceMeasurement(
        name = f'Initial Sample {i+1}',
        steps = [step],
        results = [result],
    )

    # ... in the loop
    ref = create_entry_with_api(
        section=measurement,
        base_url=url,
        upload_id=upload_id,
        file_name=(
            measurement.name.replace(' ', '_') +
            '.archive.json'
        ),
        path='./samples/',
    )
    sampling.samples.append(
        PassivationPerformanceMeasurementReference(
            reference=ref
        )
    )

analysis.data.steps.append(sampling)

Sending post request @ http://nomad_oasis_proxy/nomad-oasis/api/v1/uploads/xY-Qb7dlQnahl1sbVbYoaQ/raw/./samples/
Sending post request @ http://nomad_oasis_proxy/nomad-oasis/api/v1/uploads/xY-Qb7dlQnahl1sbVbYoaQ/raw/./samples/
Sending post request @ http://nomad_oasis_proxy/nomad-oasis/api/v1/uploads/xY-Qb7dlQnahl1sbVbYoaQ/raw/./samples/
Sending post request @ http://nomad_oasis_proxy/nomad-oasis/api/v1/uploads/xY-Qb7dlQnahl1sbVbYoaQ/raw/./samples/
Sending post request @ http://nomad_oasis_proxy/nomad-oasis/api/v1/uploads/xY-Qb7dlQnahl1sbVbYoaQ/raw/./samples/
Sending post request @ http://nomad_oasis_proxy/nomad-oasis/api/v1/uploads/xY-Qb7dlQnahl1sbVbYoaQ/raw/./samples/
Sending post request @ http://nomad_oasis_proxy/nomad-oasis/api/v1/uploads/xY-Qb7dlQnahl1sbVbYoaQ/raw/./samples/
Sending post request @ http://nomad_oasis_proxy/nomad-oasis/api/v1/uploads/xY-Qb7dlQnahl1sbVbYoaQ/raw/./samples/
Sending post request @ http://nomad_oasis_proxy/nomad-oasis/api/v1/uploads/xY-Qb7dlQnahl1sbVbYoa

## Perform MinMax scaling of the dataset
- Adds a `MinMaxScaling` step to the analysis

In [7]:
# Scale the input data using Minmax scaling
# Store the min max ranges for each parameter in analysis step

import numpy as np
from sklearn.preprocessing import MinMaxScaler

minmax=np.array([[50, 0.25, 100, 10, 270, 10],[300,4,700,100, 450, 40]])
scaler = MinMaxScaler(feature_range=(0,1), copy=True)
scaler.fit(minmax)
X1= scaler.transform(X.values)

minmax_scaling = MinMaxScaling(
    name='Minmax Scaling',
    comment="""
    import numpy from np
    from sklearn.preprocessing import MinMaxScaler

    minmax=np.array([[50, 0.25, 100, 10, 270, 10],[300,4,700,100, 450, 40]])
    scaler = MinMaxScaler(feature_range=(0,1), copy=True)
    scaler.fit(minmax)
    X1= scaler.transform(X)
    """
)

for i, key in enumerate(['temperature', 'duration', 'h2_pressure', 'h2_flow_rate', 'rf_power', 'electrode_distance']):
    minmax_scaling.min_max_ranges.append(MinMaxRange(name=key, min_value=minmax[0,i], max_value=minmax[1,i]))

# ... in a loop, add `minmax_scaling.min_max_ranges`
analysis.data.steps.append(minmax_scaling)

## Train the Surrogate model and save it
- Trains the model, saves the model in `models/` directory
- Add path of the model to analysis

In [8]:
# Train Surrogate model
import pickle
from pathlib import Path

from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import (
    RBF, ConstantKernel as C, WhiteKernel as Wh
)


kernel = (
    C(300.0, (1e0, 1e4)) *
    RBF([1,1,1,1,1,1], (0.2, 10)) +
    Wh(1, (1, 30))
)

gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10)
gp.fit(X1, y.values)

# pickle the model
model_path = "models/GPSurrogate.pkl"
Path("models").mkdir(parents=True, exist_ok=True)
pickle.dump(gp, open(model_path, 'wb'))

analysis.data.surrogate_model = SurrogateModel(
    name='Gaussian Process Surrogate Model',
    model_type="Gaussian Process",
    trained_on=sampling.samples,
    model_path=model_path,
)

gp = pickle.load(open(model_path, 'rb'))




## Define the Acquisition Strategy
- defines acquisition function using Upper Confidence Bound method
- defines function to get proposal from the BO model

In [9]:
from scipy.optimize import minimize

# define the acquisition function
def UpperConfidenceBoundObjective(x):
    y_pred, STD = gp.predict(x.reshape(1,-1), return_std=True)
    k = 1
    return - (y_pred + k * STD)

# define class for indexing acquisition in NOMAD
class AcquisitionIndexer:
    def __init__(self):
        self.count = 0

    def create_sample_archive(self, proposal, carrier_lifetime):
        measurement = PassivationPerformanceMeasurement(
            name = f'Sample at acquired point {self.count+1}',
            steps = [proposal],
            results = [
                PassivationPerformanceResult(
                    name='Passivation performance',
                    carrier_lifetime=carrier_lifetime,
                )
            ],
        )

        file_name = measurement.name.replace(' ', '_') + '.archive.json'
        ref = create_entry_with_api(section=measurement, base_url=url, upload_id=upload_id, file_name=file_name, path='./samples/')

        self.count += 1
        return ref

def generate_proposal():
    x0 = np.random.rand(6)
    res = minimize(UpperConfidenceBoundObjective, x0, args=(), method='L-BFGS-B', options={'maxcor': 10, 'ftol': 2.220446049250313e-09, 'gtol': 1e-05, 'eps': 1e-08, 'maxfun': 15000, 'maxiter': 15000, 'iprint': -1, 'maxls': 20})
    proposal = scaler.inverse_transform(res.x.reshape(1,-1))[0]

    print(res.fun)

    proposal = HydrogenPlasmaTreatment(
        name='Proposed parameters',
        temperature=proposal[0],
        duration=proposal[1],
        h2_pressure=proposal[2],
        h2_flow_rate=proposal[3],
        rf_power=proposal[4],
        electrode_distance=proposal[5],
    )

    return proposal

acquisition_indexer = AcquisitionIndexer()

## Loop to perform Acquisition and retrain the model
- Adds `Acquisition` step in the analysis
- generates a proposal
- creates a sample for the proposal
- retrains the model based on it
- Updates the analysis archive

In [10]:
iters = 0
while True:
    if iters == 3: break
    iters += 1

    # generate proposal
    proposal = generate_proposal()
    x = []
    x.append(proposal.temperature.magnitude)
    x.append(proposal.duration.magnitude)
    x.append(proposal.h2_pressure.magnitude)
    x.append(proposal.h2_flow_rate.magnitude)
    x.append(proposal.rf_power.magnitude)
    x.append(proposal.electrode_distance.magnitude)
    x = [x]
    x1 = scaler.transform(x)

    # acquire a sample at the proposed setting
    def simulate_carrier_lifetime():
        return 1000*(float(np.random.random()) + 0.5)

    y = simulate_carrier_lifetime()
    acquisition = Acquisition(
        name='Acquisition',
        proposal=proposal,
        sample=PassivationPerformanceMeasurementReference(
            reference=acquisition_indexer.create_sample_archive(proposal, y)
        ),
    )
    analysis.data.steps.append(acquisition)

    # retrain the model
    gp.fit(x1, [[y]])

    # update analysis ELN
    create_entry_with_api(
        section=analysis,
        base_url=url,
        upload_id=upload_id,
        file_name=analysis.metadata.mainfile
    )

gp = pickle.load(open(model_path, 'rb'))

-1204.5668501660953
Sending post request @ http://nomad_oasis_proxy/nomad-oasis/api/v1/uploads/xY-Qb7dlQnahl1sbVbYoaQ/raw/./samples/




Sending post request @ http://nomad_oasis_proxy/nomad-oasis/api/v1/uploads/xY-Qb7dlQnahl1sbVbYoaQ/raw/
-621.5952215681335
Sending post request @ http://nomad_oasis_proxy/nomad-oasis/api/v1/uploads/xY-Qb7dlQnahl1sbVbYoaQ/raw/./samples/




Sending post request @ http://nomad_oasis_proxy/nomad-oasis/api/v1/uploads/xY-Qb7dlQnahl1sbVbYoaQ/raw/
-801.122178489229
Sending post request @ http://nomad_oasis_proxy/nomad-oasis/api/v1/uploads/xY-Qb7dlQnahl1sbVbYoaQ/raw/./samples/




Sending post request @ http://nomad_oasis_proxy/nomad-oasis/api/v1/uploads/xY-Qb7dlQnahl1sbVbYoaQ/raw/
