# Web Interface

In [1]:
%load_ext autoreload
%autoreload 2

In [6]:
from summit.data import DataSet
from summit.domain import ContinuousVariable, Constraint, Domain
from summit.strategies import TSEMO2
from summit.models import GPyModel, AnalyticalModel
from summit.utils import pareto_efficient

from sklearn.model_selection import KFold
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import mpl_toolkits.mplot3d.art3d as art3d

from functools import partial
import io
import string
import random

import ipywidgets as widgets
from IPython.display import display
# from tqdm import tqdm_notebook

## 1. Set up problem

We will first import the existing data.  Here, we show the last 5 experiments.

In [3]:
def random_string(stringLength=10):
    """Generate a random string of fixed length """
    letters = string.ascii_lowercase
    return ''.join(random.choice(letters) for i in range(stringLength))

In [7]:

def create_file_uploader(file_id):
    data_upload = widgets.FileUpload(
        accept='*.xslx',  # Accepted file extension e.g. '.txt', '.pdf', 'image/*', 'image/*,.pdf'
        multiple=False  # True to accept multiple files upload else False
    )
    output = widgets.Output()

    

    @output.capture()
    def display_data(file):
        keys = file['new'].keys()
        filename = list(keys)[0] 
        excel_raw = file['new'][filename]['content']
        f = io.BytesIO(excel_raw)
        data = pd.read_excel(f)
        data.to_csv(f'tmp/{file_id}', index=False)
        print('The last five rows from the spreadsheet.')
        display(data.tail(5))
    data_upload.observe(display_data, names='value')

    with output:
        display(data_upload)
    # output
    display(output)

Select the columns of the spreadsheet that represent the **manipulated variables**. Use ctrl (or command on Mac) to make multiple selections.

In [6]:
data = pd.read_csv(f'tmp/{file_id}')
columns = data.columns.to_list()

inputs_select = widgets.SelectMultiple(
                        options=columns,
                description=r'Input columns',
                )
display(inputs_select)

SelectMultiple(description='Input columns', options=('No', 'Texapon', 'DehytonAB30', 'Plantacare818', 'CC7BZ',…

Select the columns of the spreadsheet that represent the **objectives** (i.e., the variables being maximized or minimized). Use ctrl (or command on Mac) to make multiple selections.

In [7]:
outputs_select = widgets.SelectMultiple(
                        options=columns,
                description=r'Output columns',
                )

continue_button = widgets.Button(
    description='Add variables',
    tooltip='Add variables to domain')

display(outputs_select, continue_button)

def get_input_outputs(b):
    input_columns = inputs_select.value
    output_columns = outputs_select.value
    with open(f'tmp/{file_id}_inputs', 'w') as f:
        [f.write(f'{column}\n') for column in input_columns]
        
    with open(f'tmp/{file_id}_outputs', 'w') as f:
        [f.write(f'{column}\n') for column in output_columns]
        
continue_button.on_click(get_input_outputs)
    

SelectMultiple(description='Output columns', options=('No', 'Texapon', 'DehytonAB30', 'Plantacare818', 'CC7BZ'…

Button(description='Add variables', style=ButtonStyle(), tooltip='Add variables to domain')

In [8]:
def reopen(filename):
    with open(filename, 'r') as f:
        arr = []
        while True:
            line = f.readline()
            line = line.rstrip('\n')
            if not line:
                break
            arr += [line]
    return arr

In [9]:
#Set up the optimization problem domain


input_columns = reopen(f'tmp/{file_id}_inputs')

objective_columns = reopen(f'tmp/{file_id}_outputs')
    
domain = Domain()
#Decision variables

base_values = [0, 10]
ranges = {}

#Input Variables
input_grid = [widgets.Label(t) for t in ['\t', 'Lower', 'Upper']]
display(widgets.HTML('Select ranges for the <b>manipulated</b> variables.'))
for v in input_columns:
    items = [widgets.FloatText(base_values[i]) for i in range(2)]
    ranges[v] = items
    input_grid += [widgets.Label(v + '\t')] + items
display(widgets.GridBox(input_grid, 
                       layout=widgets.Layout(grid_template_columns="repeat(3, 100px)")))

#Constraints
constraints = []
display(widgets.HTML('Add constraint expressions if needed. The expression is the left hand side of an equation less than or equal to zero. \
                      <br/>Each new constraint should be on its own line.'))
placeholder = f'e.g. 0.5*{input_columns[0]} + {input_columns[-1]}'
constraints_box = widgets.Textarea(placeholder=placeholder)
display(constraints_box)


#Output variables
display(widgets.HTML('Select ranges and type for the <b>objective</b> variables.'))
objective_grid = [widgets.Label(t) for t in ['\t', 'Lower', 'Upper', 'Max/Min']]
for v in objective_columns:
    options = [widgets.FloatText(base_values[i]) for i in range(2)]
    
    options += [widgets.Dropdown(options=['Maximize', 'Minimize'])]
    ranges[v] = options
    objective_grid += [widgets.Label(v + '\t')] + options
#     display(widgets.HBox(items))
display(widgets.GridBox(objective_grid, 
                layout=widgets.Layout(grid_template_columns="repeat(4, 100px)")))

HTML(value='Select ranges for the <b>manipulated</b> variables.')

GridBox(children=(Label(value='\t'), Label(value='Lower'), Label(value='Upper'), Label(value='Texapon\t'), Flo…

HTML(value='Add constraint expressions if needed. The expression is the left hand side of an equation less tha…

Textarea(value='', placeholder='e.g. 0.5*Texapon + ArlyponTT')

HTML(value='Select ranges and type for the <b>objective</b> variables.')

GridBox(children=(Label(value='\t'), Label(value='Lower'), Label(value='Upper'), Label(value='Max/Min'), Label…

In [22]:
#Create domain
def create_domain():
    domain = Domain()

    #Inputs
    for v in input_columns:
        options = [b.value for b in ranges[v]]
        domain += ContinuousVariable(v,description='', bounds=options)

    #Constraints
    constraints = constraints_box.value
    constraints = constraints.split('\n')
    for c in constraints:
        domain += Constraint(c)

    #Objectives
    for v in objective_columns:
        options = [b.value for b in ranges[v]]
        maximize = True if options[2]=='Maximize' else False
        domain += ContinuousVariable(v, description='', bounds=options[0:2], 
                                     is_objective=True, maximize=maximize)
    return domain
domain = create_domain()
domain

0,1,2,3
Name,Type,Description,Values
Texapon,"continuous, input",,"[0.0,15.0]"
DehytonAB30,"continuous, input",,"[0.0,15.0]"
Plantacare818,"continuous, input",,"[0.0,15.0]"
CC7BZ,"continuous, input",,"[0.0,2.0]"
ArlyponTT,"continuous, input",,"[0.0,2.0]"
viscosity,"continuous, minimize objective",,"[0.0,1.0]"
price,"continuous, minimize objective",,"[0.0,1.0]"
turbidity,"continuous, minimize objective",,"[0.0,1.0]"
,constraint,Texapon + DehytonAB30 + Plantacare818-15,


In [23]:
#price function
def price_function(X):
    price = 135.13*X['Texapon']+63.95*X['DehytonAB30']+62.87*X['Plantacare818']+ \
            90*X['CC7BZ']+75*X['ArlyponTT']
    price = price/1e4
    return np.atleast_2d(price.to_numpy()).T

## 2. Visualize Data

Let's visualize the data now.  Here we show the approximate pareto front based on existing experimental data. It has been difficult to achieve low values of viscosity.

In [12]:
# button_visualize = widgets.Button(
#     description='Visualize Data',
#     tooltip='Visualize in a 3D plot',
#     icon='play')

# output_visualize = widgets.Output()
# display(button_visualize, output_visualize)

# def visualize_data(b):
#     _ = plt.tight_layout()
#     fig = plt.figure(figsize=(10, 7))
#     ax = fig.add_subplot(111, projection='3d')
#     pareto_front, indices = pareto_efficient(data[['viscosity', 'price', 'turbidity']].to_numpy(),
#                                       maximize=False)
#     # ax.scatter(data['viscosity'], data['price'], data['turbidity'], 
#     #            alpha=0.1, label='all data', marker='^', s=50)
#     img = ax.scatter(pareto_front[:, 0], pareto_front[:, 1], pareto_front[:, 2], 
#                      alpha=0.7, label='pareto front', s=100, c=data.index[indices])
#     _ = plt.colorbar(img, label='Experiment number')
#     ax.set_xlabel('viscosity'); ax.set_ylabel('price'); ax.set_zlabel('turbidity')
#     for xi, yi, zi in pareto_front:        
#         line=art3d.Line3D(*zip((xi, yi, 0), (xi, yi, zi)), 
#                           marker='o', markevery=(1, 1), c='k',alpha=0.5)
#         _ = ax.add_line(line)
#     _ = ax.view_init(20, -60)
#     _ = ax.set_title('Approximate Pareto Front')
#     output_visualize.clear_output(wait=True)
#     with output_visualize:
#         display(fig)
# button_visualize.on_click(visualize_data)

Button(description='Visualize Data', icon='play', style=ButtonStyle(), tooltip='Visualize in a 3D plot')

Output()

## 3. Run Optimization

Now, we can run the optimization. Click the button that says run optimization.  It might take a couple minutes.

In [30]:
#Build the model
num_experiments = widgets.BoundedIntText(
    value=3,
    min=0,
    max=10,
    step=1,
    description='# Expr:',
    disabled=False
)
button_run_opt = widgets.Button(
    description='Run optimization',
    tooltip='Run the optimization',
    icon='play')
output_run_opt = widgets.Output()

display(num_experiments, button_run_opt, output_run_opt)

input_dim = domain.num_continuous_dimensions() + domain.num_discrete_variables()
models = {'viscosity': GPyModel(input_dim=input_dim), 
          'price': AnalyticalModel(function=price_function),
          'turbidity': GPyModel(input_dim=input_dim)
         }
tsemo = TSEMO2(domain, models)

data_pd = data[input_columns + objective_columns]
data = DataSet.from_df(data_pd)

def run_optimization(b):
    n = num_experiments.value

    with output_run_opt:
        print("Starting. This might take a couple minutes.")
        experiments = tsemo.generate_experiments(data, n)
        
    output_run_opt.clear_output()
    with output_run_opt:
        print("Next experiments:")
        display(experiments)

button_run_opt.on_click(run_optimization)


BoundedIntText(value=3, description='# Expr:', max=10)

Button(description='Run optimization', icon='play', style=ButtonStyle(), tooltip='Run the optimization')

Output()

## 4. Validate Model

We'll make a cross validation plot next. Click on the button to generate one.

In [32]:
button_plot_cv = widgets.Button(
    description='Make CV Plot',
    tooltip='CV Plot',
    icon='play')
output_plot_cv = widgets.Output()
display(button_plot_cv, output_plot_cv)
def plot_cv(b):
    gp = GPyModel(input_dim=input_dim)
    X = data[input_columns].to_numpy(dtype=np.float64)
    Y = data[objective_columns].to_numpy(dtype=np.float64)

    #Mask out weird data
    mask = np.ones(X.shape[0],dtype=bool)
    indices = np.where(Y[:, 0]>1.0)[0]
    mask[indices] = 0
    # Y = Y[mask, :]
    X = X[mask, :]

    
    with output_plot_cv:
        print('Making plots...')
        
    fig, axes = plt.subplots(1, 2, figsize=(15, 5))
    for i, name in enumerate(['viscosity', 'turbidity']):
        kf = KFold(n_splits=3)
        scores = np.zeros(5)

        Y = data[name].to_numpy(dtype=np.float64)
        Y = Y[mask]
        for train, test in kf.split(X):
            gp.fit(X[train], np.atleast_2d(Y[train]).T)
            y_predict = gp.predict(X[test])
            axes[i].scatter(Y[test], y_predict, c='k')
        min_y = Y.min()
        max_y = Y.max()
        axes[i].plot([min_y, max_y], [min_y, max_y], 'k--', lw=4)
        axes[i].set_xlabel('Measured')
        axes[i].set_ylabel('Predicted')
        axes[i].set_title(f'{name} Cross Validation'.title())
    output_plot_cv.clear_output(wait=True)
    with output_plot_cv:
        plt.show()
button_plot_cv.on_click(plot_cv)

Button(description='Make CV Plot', icon='play', style=ButtonStyle(), tooltip='CV Plot')

Output()

In [None]:
file_id = random_string()