In [1]:
# Import Widget packages
import ipyvuetify as v
import pandas as pd
import ipywidgets
from seeq import sdk
import urllib.parse as urlparse
from urllib.parse import parse_qs
import re
import sys
import uuid
import pytz
import datetime
from datetime import date
import pickle
import io
from ipyvuetify.extra import FileInput
from IPython.display import display

In [2]:
# Import Sklearn packages
import numpy as np
import math
import smogn
import imblearn
from imblearn.pipeline import Pipeline as imbpipeline
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import FunctionTransformer
from imblearn.over_sampling import SMOTE
from sklearn.metrics import mean_squared_error
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.model_selection import cross_val_score
from sklearn import linear_model
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.ensemble import RandomForestRegressor
from scipy import stats
from sklearn.linear_model import RidgeCV
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import make_scorer, mean_absolute_percentage_error, accuracy_score, mean_absolute_error
from sklearn.metrics import balanced_accuracy_score
from sklearn.base import is_classifier
from sklearn.decomposition import PCA
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import f1_score
from sklearn.base import TransformerMixin, BaseEstimator

In [3]:
# Parameters
miss_threshold = 20 # Max percentage of missing data allowed before dropping a feature
imbalance_threshold = 0.2 # Max imbalance ratio to consider imbalanced data for classifiation
global loaded_object 
loaded_object = None

In [4]:
# Widgets
app = v.App()

image = v.Img(
    lazy_src="https://seeq.com/sites/default/files/seeq-content/seeq-logo-blue-web-33h.svg",
    aspect_ratio="16/9",
    width=100,
    src="https://seeq.com/sites/default/files/seeq-content/seeq-logo-blue-web-33h.svg",
)
message = v.Text(
    children=["Soft Sensor Add-On"],
    style_="font-size:22px"
)

message2 = v.Text(
    children=["A simple tool to predict a signal based on multiple regressor signals with Machine Learning models"],
    style_="font-size:15px"
)

# Create a layout with space between the widgets
layout = v.Layout(
    row=True,
    children=[
        message,
        v.Spacer(),  # This will introduce space between the widgets
        message2
    ]
)

header = v.Container(children=[image,layout])

url_input = v.TextField(label='Workbench URL', v_model=None)

execute_button = v.Btn(
    button_style='success',
    children=['Get Signals']
)


items_dropdown = v.Select(item_text='Name', 
                          item_value='ID', 
                          label="Select signal to predict", 
                          v_model=None, 
                          return_object=True,
                          disabled = True
                         )

regressors = v.Select(item_text='Name', 
                      item_value='ID', 
                      label="Select regressors", 
                      v_model=None, 
                      return_object=True,
                      multiple=True,
                      disabled=True
                    )

# File input widget
file_input = FileInput(
    accept='.pkl',  # Accept all file types
    multiple=False,  # Allow only single file selection
    label='Upload pre-trained model (.pkl)',
    disabled=False
)

# Vertical divider
divider = v.Divider(vertical=True)

# Create a horizontal layout to arrange the widgets
layout_regressors_file = v.Row(children=[
    v.Col(children=[regressors]),
    v.Col(children=[divider]),
    v.Col(children=[file_input])
])

start_select = v.DatePicker(label='Prediction Start:', v_model=None, first_day_of_week = 1, no_title = False, max=date.today().isoformat(),disabled=True)
end_select = v.DatePicker(label='Prediction End:', v_model=None, first_day_of_week = 1, no_title = False, max=date.today().isoformat(),disabled=True)

start_select_1 = v.DatePicker(label='Prediction Start:', v_model=None, first_day_of_week = 1, no_title = False, max=date.today().isoformat(),disabled=True)
end_select_1 = v.DatePicker(label='Prediction End:', v_model=None, first_day_of_week = 1, no_title = False, max=date.today().isoformat(),disabled=True)


# Create a layout with labels and minimal spacing, and grey out the labels
layout = v.Row(
    class_='d-flex align-center',
    children=[
        v.Col(
            children=[
                v.Html(tag='label', children=['Prediction window start:'], class_='mr-1', style_='color: grey;'),  # Grey out the label
                start_select_1
            ],
            cols=5,
            class_='pr-1'  # Reduced padding
        ),
        v.Col(
            children=[
                v.Html(tag='label', children=['Prediction window end:'], class_='mr-1', style_='color: grey;'),  # Grey out the label
                end_select_1
            ],
            cols=5,
            class_='pl-1'  # Reduced padding
        )
    ]
)

layout3 = v.Row(
    class_='d-flex align-center',
    children=[
        v.Col(
            children=[
                v.Html(tag='label', children=['Training window start:    '], class_='mr-1', style_='color: grey;'),  # Grey out the label
                start_select
            ],
            cols=5,
            class_='pr-1'  # Reduced padding
        ),
        v.Col(
            children=[
                v.Html(tag='label', children=['Training window end:    '], class_='mr-1', style_='color: grey;'),  # Grey out the label
                end_select
            ],
            cols=5,
            class_='pl-1'  # Reduced padding
        )
    ]
)


layout_dates = v.Row(children=[
    v.Col(children=[layout3]),
    v.Col(children=[layout])
])

# Add vertical space below the layout
vertical_space = v.Html(tag='div', class_='mb-4')  # Add margin-bottom for vertical space

submit_button = v.Btn(
    button_style='success',
    children=['Submit'])
submit_button.disabled = True

warning_error_message = v.Text(
    children=[""],
    style_="font-size:15px"
)

info_message = v.Textarea(
    value="",
    style_="font-size:15px",
    auto_grow=True
)



input_container = v.Container(children=[url_input,
                                        execute_button,
                                        items_dropdown,
                                        layout_regressors_file,
                                        #layout3,
                                        #layout,
                                        layout_dates,
                                        vertical_space,
                                        submit_button,
                                        vertical_space,
                                        warning_error_message,
                                        vertical_space,
                                        info_message])



In [5]:
#  Get signals button
def get_worksheet_items(*args):
    
    execute_button.loading = True

    # Get Items on worksheet and populate signal dropdown and regressor multiselect
    if is_valid_seeq_url(url_input.v_model) == False:
        warning_error_message.children = ["Invalid Workbench URL"]
        warning_error_message.style_ = "font-size:15px; color: Black; background-color: red;"
        execute_button.loading = False
        start_select.disabled = True
        end_select.disabled = True
        start_select_1.disabled = True
        end_select_1.disabled = True
        layout.children[0].children[0].style_ = 'color: grey;'  # First label
        layout.children[1].children[0].style_ = 'color: grey;'  # Second label
        layout3.children[0].children[0].style_ = 'color: grey;'  # First label
        layout3.children[1].children[0].style_ = 'color: grey;'  # Second label
        exit
        
    else:
        warning_error_message.children = [""]
        warning_error_message.style_ = "font-size:15px; color: Black; background-color: white;"
        
        items = spy.search(url_input.v_model, quiet=True)
        signals = items.loc[items.Type.str.contains('Signal')]
        dropdown_items = list(signals[['Name','ID']].T.to_dict().values())
        items_dropdown.items = dropdown_items
        items_dropdown.select = dropdown_items[0]
        items_dropdown.disabled=False
        regressors.items = dropdown_items
        regressors.disabled=False
        submit_button.disabled=False
        
        # Color labels to indicate active widgets
        execute_button.loading = False
        start_select.disabled = False
        end_select.disabled = False
        start_select_1.disabled = False
        end_select_1.disabled = False
        layout.children[0].children[0].style_ = 'color: black;'  # First label
        layout.children[1].children[0].style_ = 'color: black;'  # Second label
        layout3.children[0].children[0].style_ = 'color: black;'  # First label
        layout3.children[1].children[0].style_ = 'color: black;'  # Second label

        
        # Fetch worksheet date range
        wb = spy.workbooks.pull(url_input.v_model)
        
        # Regular expression to extract the worksheet ID
        pattern = r"/worksheet/([a-zA-Z0-9-]+)"
        
        # Search for the pattern in the URL
        match = re.search(pattern, url_input.v_model)
        
        # Find the current worksheet based on the worksheetId in the query parameters
        current_worksheet = [ws for ws in wb[0].worksheets if ws.id ==match.group(1)][0]
        
        # Get Display Range
        display_range = current_worksheet.display_range
        
        # Insert values into ipydatetime widget
        layout.children[0].children[1].v_model = display_range['Start'].date().isoformat()
        layout.children[1].children[1].v_model = display_range['End'].date().isoformat()
        layout3.children[0].children[1].v_model = display_range['Start'].date().isoformat()
        layout3.children[1].children[1].v_model = display_range['End'].date().isoformat()

In [6]:
# Update regressors when choosing signal to predict
def update_regressors(change):
    selected_item = items_dropdown.v_model
    # Filter out the selected item from regressors
    
    regressors.items = [item for item in items_dropdown.items if item['Name'] != selected_item['Name']]

In [7]:
# Useful functions
def combine_to_list(item1, item2):
    # Check if both items are strings
    if isinstance(item1, str) and isinstance(item2, str):
        return [item1, item2]
    
    # Check if one item is a string and the other is a list
    elif isinstance(item1, str) and isinstance(item2, list):
        return [item1] + item2
    elif isinstance(item1, list) and isinstance(item2, str):
        return item1 + [item2]

# Define the custom scoring function (inverse of MAPE)
def custom_mape_scorer(y_true, y_pred):
    mape = mean_absolute_percentage_error(y_true, y_pred)
    return 1 - mape

# Define the custom scoring function (inverse of MAE)
def custom_mae_scorer(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    return 1 / mae

# Function to check if the URL is a valid Seeq address
def is_valid_seeq_url(url):
    try:
        
        # Attempt to perform a search using the URL
        spy.search(url, quiet=True)
        
        # If no exception is raised, the URL is considered valid
        return True
    except Exception as e:
        # If an exception is raised, the URL is considered invalid
        return False

def get_signal_unit(signal_id):
    """Retrieves the unit of a Seeq signal given its ID."""
    try:
        signals = spy.search({'ID': signal_id, 'Type': 'Signal'}, quiet=True)
        if not signals.empty:  # Check if any signals were found
            return signals['Units'].iloc[0] # Access units from the dataframe returned by spy.search
        else:
            print(f"Signal with ID '{signal_id}' not found.")
            return None
    except Exception as e:
        print(f"An error occurred while retrieving signal metadata: {e}")
        return None

        
# Create a new event handler for file_input successful load
def on_file_upload(change):
    global loaded_object
    loaded_object = None
    
    try:
        file = file_input.get_files()[0]
        if not file: # if the list is empty, trigger an index error.
          raise IndexError("No files uploaded.")
        else:
            file_content = file['file_obj']
            byte_data = file_content.read()
            loaded_object = pickle.loads(byte_data)
            start_select.disabled = True
            end_select.disabled = True 
    except IndexError as e:
        start_select.disabled = False
        end_select.disabled = False

In [8]:
# Classifier model parameters and hyperparameters for GridSearchCV
classifier_models = {
    'knn': {
        'model': KNeighborsClassifier(),
        'params': {
            'model__n_neighbors': [3, 5, 7, 9],
            'model__weights': ['uniform', 'distance'],
            'model__algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute']
        }
    },
    'logistic_regression': {
        'model': LogisticRegression(),
        'params': [
            {
                'model__C': [0.1, 1, 10],
                'model__penalty': ['l1', 'l2'],
                'model__solver': ['liblinear'],
                'model__class_weight': ['balanced', None] 
            },
            {
                'model__C': [0.1, 1, 10],
                'model__penalty': ['l1', 'l2', 'elasticnet'],
                'model__solver': ['saga'],
                'model__l1_ratio': [0.15, 0.5, 0.85],
                'model__class_weight': ['balanced', None] 
            }
        ]
    },
    'random_forest': {
    'model': RandomForestClassifier(),
    'params': {
        'model__n_estimators': [50, 100, 200, 300],
        'model__max_depth': [None, 10, 20, 30],
        'model__max_features': [0.5, 0.6, 0.7, 0.8],
        'model__max_leaf_nodes': [6, 7, 8, 9, 10],
        'model__class_weight': ['balanced', 'balanced_subsample', None] 
        }
    },
    'gradient_boosting_classifier': {
        'model': GradientBoostingClassifier(),
        'params': {
            'model__n_estimators': [50, 100, 200],
            'model__learning_rate': [0.01, 0.1, 0.2],
            'model__max_depth': [3, 5, 7],
        }
    }
}

In [9]:
# Regression model parameters and hyperparameters for GridSearchCV
regression_models = {
		'knn' : {
		    'model': KNeighborsRegressor(),
		    'params': {
		        'model__n_neighbors': [3, 5, 7, 9],
		        'model__weights': ['uniform', 'distance'],
		        'model__algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute']
		    }
		},
    'ridge_regression': {
        'model': Ridge(),
        'params': {
            'model__alpha': [0.1, 1, 10, 50]
        }
    },
    'random_forest': {
        'model': RandomForestRegressor(),
        'params': {
            'model__n_estimators': [50, 100, 200,300],
            'model__max_depth': [None, 10, 20, 30],
            'model__max_features': [0.5, 0.6, 0.7, 0.8],
            'model__max_leaf_nodes':[6, 7, 8, 9, 10]
        }
    },
    'gradient_boosting': {
        'model': GradientBoostingRegressor(),
        'params': {
            'model__n_estimators': [50, 100, 200],
            'model__learning_rate': [0.01, 0.1, 0.2],
            'model__max_depth': [3, 5, 7]
        }
    }
}

In [10]:
# Submit button
def submit_formula(*args):

    # Clear warning messages
    warning_error_message.children = [""]
    warning_error_message.style_ = "font-size:15px; color: Black; background-color: white;"
    
    # Get the workbook and worksheet IDs from the URL
    submit_button.loading = True
    info_message.value = "Pulling data..."
    info_message.style_ = "font-size:15px; color: Black; background-color: white;"

    # Get the workbook and worksheet IDs from the URL
    url  = url_input.v_model
    workbook_id = spy.utils.get_workbook_id_from_url(url)
    worksheet_id = spy.utils.get_worksheet_id_from_url(url)
    timezone = spy.pull(url).index.tz
    
    # Get start and end date
    start_date = pd.to_datetime(layout3.children[0].children[1].v_model).tz_localize(timezone)
    end_date = pd.to_datetime(layout3.children[1].children[1].v_model).tz_localize(timezone)
    
    start_date_preds = pd.to_datetime(layout.children[0].children[1].v_model).tz_localize(timezone)
    end_date_preds = pd.to_datetime(layout.children[1].children[1].v_model).tz_localize(timezone)

    # Warning messages
    if end_date <= start_date and not loaded_object:
        warning_error_message.children = ["Invalid date ranges for training window"]
        warning_error_message.style_ = "font-size:15px; color: Black; background-color: red;"
        info_message.value = ""
        info_message.style_ = "font-size:15px; color: Black; background-color: white;"
        submit_button.loading=False
    elif end_date_preds <= start_date_preds:
        warning_error_message.children = ["Invalid date ranges for prediction window"]
        warning_error_message.style_ = "font-size:15px; color: Black; background-color: red;"
        info_message.value = ""
        info_message.style_ = "font-size:15px; color: Black; background-color: white;"
        submit_button.loading=False

    if items_dropdown.v_model == None and regressors.v_model == None:
        warning_error_message.children = ["No prediction nor regressor signals selected"]
        warning_error_message.style_ = "font-size:15px; color: Black; background-color: red;"
        info_message.value = ""
        info_message.style_ = "font-size:15px; color: Black; background-color: white;"
        submit_button.loading=False
    elif items_dropdown.v_model == None:
        warning_error_message.children = ["No prediction signal selected"]
        warning_error_message.style_ = "font-size:15px; color: Black; background-color: red;"
        info_message.value = ""
        info_message.style_ = "font-size:15px; color: Black; background-color: white;"
        submit_button.loading=False
    elif regressors.v_model == None:
        warning_error_message.children = ["No regressor signals selected"]
        warning_error_message.style_ = "font-size:15px; color: Black; background-color: red;"
        info_message.value = ""
        info_message.style_ = "font-size:15px; color: Black; background-color: white;"
        submit_button.loading=False

    # Search for the worksheet
    ws = spy.search({'ID': worksheet_id}, quiet=True)
    worksheet_name = ws['Name'].iloc[0]
    
    # Search for the signals given IDs
    signal_IDs = combine_to_list(items_dropdown.v_model['ID'],[item['ID'] for item in regressors.v_model])
    unit = spy.search({'ID': items_dropdown.v_model['ID'], 'Type': 'Signal'}, quiet=True).get('Value Unit Of Measure').iloc[0]
    
    all_signals = pd.DataFrame()
    for signal_id in signal_IDs:
        signals = spy.search({'ID': signal_id, 'Type': 'Signal'}, quiet=True)
        all_signals = pd.concat([all_signals, signals], ignore_index=True)

    # Pull the data for the signals within the specified date range
    try:
        all_signals_df_preds = spy.pull(all_signals, start=start_date_preds, end=end_date_preds,header='ID', grid = None)
        if loaded_object is not None:
           all_signals_df = all_signals_df_preds.copy()
        else:
            all_signals_df = spy.pull(all_signals, start=start_date, end=end_date,header='ID',grid = None)
        
    except Exception as e:
        warning_error_message.children = [f"Something went wrong when pulling Seeq data: {e}"]
        warning_error_message.style_ = "font-size:15px; color: Black; background-color: red;"
        info_message.value = ""
        info_message.style_ = "font-size:15px; color: Black; background-color: white;"
        submit_button.loading=False

    
    # Determine whether we are dealing with a regression or classification problem
    variable_signal_to_predict = all_signals_df[items_dropdown.v_model['ID']].dtype
    
    if pd.api.types.is_numeric_dtype(variable_signal_to_predict):
        # Select regression models
        models = regression_models
        class_imbalance = False
        
        # Assign most representative metric depending on values of variable to predict
        if all_signals_df[items_dropdown.v_model['ID']].apply(lambda x: abs(x) < 0.1).all():
            scoring_metric = make_scorer(custom_mae_scorer, greater_is_better=True)
            metric_name = "1/MAE"
        else:
            scoring_metric = make_scorer(custom_mape_scorer, greater_is_better=True)
            metric_name = "1-MAPE"
        
    else:
        # Select classification models
        models = classifier_models

        # Detect class imbalance
        class_counts = all_signals_df[items_dropdown.v_model['ID']].value_counts()
        imbalance_ratio = class_counts.min() / class_counts.max()
        if imbalance_ratio <= imbalance_threshold:
            class_imbalance = True
        else: 
            class_imbalance = False

        # Assign most representative metric depending on binary or multiclass variable to predict
        scoring_metric = "balanced_accuracy"
        metric_name = "Balanced accuracy"
        
    # Reset the index to ensure proper alignment of data points
    all_signals_df.reset_index(inplace=True)
    
    info_message.value = "Preprocessing data..."
    info_message.style_ = "font-size:15px; color: Black; background-color: white;"

    # Eliminate regressors where too many values are missing
    missing_percentage = all_signals_df.iloc[:, 2:].isna().mean() * 100
    columns_to_drop = missing_percentage[missing_percentage >= miss_threshold].index
    all_signals_df.drop(columns=columns_to_drop, inplace=True)
    all_signals_df_preds.drop(columns=columns_to_drop, inplace=True)
    
    # Create df copy for predictions
    df_to_predict = all_signals_df_preds.copy()

    # Clean missing values for target variable and drop date column
    all_signals_df = all_signals_df.drop(all_signals_df.columns[0], axis=1)
    all_signals_df = all_signals_df.dropna(subset=[all_signals_df.columns[0]])
    
    #Additional warnings
    if all_signals_df.shape[1] == 2:
        warning_error_message.children = ["Only one valid regressor for predictions"]
        warning_error_message.style_ = "font-size:15px; color: Black; background-color: yellow;"
    elif all_signals_df.shape[1] == 1:
        warning_error_message.children = ["No valid regressors for predictions"]
        warning_error_message.style_ = "font-size:15px; color: Black; background-color: red;"
        info_message.value = ""
        info_message.style_ = "font-size:15px; color: Black; background-color: white;"
        submit_button.loading=False
        exit

    # Identify numerical, categorical columns, variable to predict and regressors
    X = all_signals_df.drop(columns=all_signals_df.columns[0])
    y = all_signals_df[all_signals_df.columns[0]]
    X_valid = df_to_predict.iloc[:, 1:]
    numerical_cols = all_signals_df.drop(columns=all_signals_df.columns[0]).select_dtypes(include=['int64', 'float64','float32']).columns.tolist()
    categorical_cols = all_signals_df.drop(columns=all_signals_df.columns[0]).select_dtypes(include=['object']).columns.tolist()
    
    # Preprocessing for numerical data
    numerical_transformer = Pipeline(steps=[
        ('imputer', IterativeImputer()),
        ('scaler', StandardScaler())
    ])
    
    # Preprocessing for categorical data
    categorical_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='most_frequent')),
        ('onehot', OneHotEncoder(handle_unknown='ignore',sparse_output=False))
    ])

    # Bundle preprocessing for numerical and categorical data
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numerical_transformer, numerical_cols),
            ('cat', categorical_transformer, categorical_cols)
        ])
    
    string = ""
    max_width = 0
    
    # Find the maximum width for the model names and parameters
    max_name_width = max(len(name) for name in models.keys())
    max_params_width = max(len(str(model['params'])) for model in models.values())
    
    best_score = float('-inf')


    # Perform GridSearchCV for each model
    if loaded_object is None:
        for model_name, model_info in models.items():
        
            message = string + f"Testing {model_name}: {model_info['params']}..."
            info_message.value = message
            info_message.style_ = "font-size:15px; color: Black; background-color: white;"
        
            # Create a pipeline with preprocessors and the model
            steps = [('preprocessor', preprocessor)]
        
            # Add SMOTE if necessary
            if class_imbalance == True and models == classifier_models:
                steps.append(('SMOTE', SMOTE(sampling_strategy='auto')))
        
            # Add model and convert into imbpipeline to avoid data leakage
            steps.append(('pca', PCA(n_components=0.95)))
            steps.append(('model', model_info['model']))
            pipeline = imbpipeline(steps)
        
            # GridSearch
            grid_search = GridSearchCV(pipeline, model_info['params'], cv=5, n_jobs=-1, scoring=scoring_metric, error_score='raise')
            try:
                grid_search.fit(X, y)
            except Exception as e:
                warning_error_message.children = [f"Something went wrong during fitting: : {e}"]
                warning_error_message.style_ = "font-size:15px; color: Black; background-color: red;"
                info_message.value = ""
                info_message.style_ = "font-size:15px; color: Black; background-color: white;"
                submit_button.loading=False
            
            string = string + f"{model_name:<{max_name_width}}....Best {metric_name}: {grid_search.best_score_:.3f}....Best params: {str(grid_search.best_params_)}\n"
            if grid_search.best_score_ > best_score:
                best_score = grid_search.best_score_
                best_model = grid_search.best_estimator_
                
        message = string + f"Generating predictions with {best_model[-1]}...\n"
        info_message.value = message
        preds = best_model.predict(X_valid)

    else:
        message = string + f"Generating predictions with loaded model {loaded_object}...\n"
        try:
            preds = loaded_object.predict(X_valid)
        except Exception as e:
            warning_error_message.children = [f"Something went wrong during prediction: : {e}"]
            warning_error_message.style_ = "font-size:15px; color: Black; background-color: red;"
            info_message.value = ""
            info_message.style_ = "font-size:15px; color: Black; background-color: white;"
            submit_button.loading=False
    
    # Generate signal name
    new_signal_name = "Soft_" + items_dropdown.v_model['Name']
    
    # Predict with timestamp
    preds = pd.DataFrame(preds, columns=[new_signal_name])
    preds.index = X_valid.index

    # Encode classification labels to be able to push directly to Seeq
    if not pd.api.types.is_numeric_dtype(preds.iloc[:, 0]):
        label_encoder = LabelEncoder()
        preds.iloc[:, 0] = label_encoder.fit_transform(preds.iloc[:, 0])
        mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
        if loaded_object is None:
            desc = f"Soft sensor predicted with {best_model[-1]} and regressors: {[item['Name'] for item in regressors.v_model]}. {metric_name}: {grid_search.best_score_:.3f}. Mapping: {mapping}"
        else:
            desc = f"Soft sensor predicted with loaded model {loaded_object} and regressors: {[item['Name'] for item in regressors.v_model]}. Mapping: {mapping}"
    else:
        if loaded_object is None:
            desc = f"Soft sensor predicted with {best_model[-1]} and regressors: {[item['Name'] for item in regressors.v_model]}. {metric_name}: {grid_search.best_score_:.3f}"
        else:
            desc = f"Soft sensor predicted with loaded model {loaded_object} and regressors: {[item['Name'] for item in regressors.v_model]}."

    # Metadata including Units
    metadata = pd.DataFrame({
        'Name': [new_signal_name],  # Required
        'Description':  [desc],
        'Units': [unit],
        'Type': ["Signal"]
    })
    metadata.index = [new_signal_name]

    # Push the data to Seeq
    try:
        spy.push(data=preds, 
         workbook=workbook_id, 
         worksheet=worksheet_id,
         metadata = metadata,
         quiet=True)
        message = message + f"Data for signal {new_signal_name} succesfully pushed."
        info_message.value = message
        info_message.style_ = "font-size:15px; color: Black; background-color: #90EE90;"
        warning_error_message.children = [""]
        warning_error_message.style_ = "font-size:15px; color: Black; background-color: white;"
        submit_button.loading=False
    except Exception as e:
        warning_error_message.children = [f"Something went wrong when pushing back to Seeq Workbench: {e}"]
        warning_error_message.style_ = "font-size:15px; color: Black; background-color: red;"
        info_message.value = ""
        info_message.style_ = "font-size:15px; color: Black; background-color: white;"
        submit_button.loading=False

In [11]:
# Event handling
execute_button.on_event('click',get_worksheet_items)
items_dropdown.observe(update_regressors, names='v_model')
submit_button.on_event('click', submit_formula)
file_input.observe(on_file_upload, names='file_info')

In [12]:
# Output 
app.children=[
        header,
        input_container
    ]
app

App(children=[Container(children=[Img(aspect_ratio='16/9', layout=None, lazy_src='https://seeq.com/sites/defau…