In [None]:
from ipyml.api import RegressionBase
from sklearn.datasets import load_diabetes

## Choosing a dataset
The first step with `ipyml` is to find a dataset you would like to use regression on. For the purposes of this example, we will use the diabetes dataset from `sklearn`, but this dataset can be anything as long as it is in dataframe form.

In [None]:
diabetes = load_diabetes(return_X_y=False, as_frame=True)['frame']

## Initializing the `RegressionBase` class

In [None]:
regr = RegressionBase(diabetes)

As we load the application, we see that we first go through the initial setup of selecting a target feature, selecting your inputs, and an optional validation column (for splitting into train and test based on column value). We then have three buttons on the bottom corresponding to neural network (`sklearn`), simple linear regression (`sklearn`), and OLS regression (`statsmodels.api`). The OLS regression is useful for mimicking R-style syntax when drafting a regression equation. After fitting model specific parameters, you can train the model, at which point a new window will appear with options to view useful plots as well as export to a user-defined `.json` file that will store all the model data.

In [None]:
regr

### Automated testing
TODO expand automated testing and migrate to `Test_` style notebook.

In [None]:
import json, os, time
from pathlib import Path

IS_TESTING = json.loads(os.environ.get("IPYML_TESTING", "false"))

if IS_TESTING:
    # automate the "user interaction" of the tool
    target_feature = "target"
    input_features = ['age', 'sex', 'bmi', 'bp', 's1']
    validation_column = None
    test_model_filename = "test_model"

    assert target_feature in regr.target_select.options, "Failed to find the target feature 'target'."
    regr.target_select.value = "target"
    assert all([_input in regr.inputs_select.options for _input in input_features]), "Failed to find all input features."
    regr.inputs_select.value = input_features
    assert validation_column in regr.validation_column_select.options
    regr.validation_column_select.value = validation_column

    # No validation - linear regression
    regr.lr_button.click()

    # Get latest regression model widget and run
    model_widget = regr._generated_models[-1]
    model_widget.run_button.click()
    # give it a few seconds to build the model
    time.sleep(3)

    trained_model_widget = model_widget._trained_models[-1]
    trained_model_widget.model_name.value = test_model_filename + "_lr"
    trained_model_widget.export_button.click()
    # give it a few seconds to save the model
    time.sleep(5)

    assert Path(f"{test_model_filename}_lr.json").is_file()

## Running regression models from a `.json` file
> Note: **This is a Work in Progress**

Once we have a model saved in `JSON` format, we use callable functions from `regression_callable.py` that were designed to take the format of saved model data and translate it into a tool that can be used to make predictions. 



In [None]:
# imports
from ipyml.regression_callable import run_linear_regression, run_neural_net
from functools import partial
import json

In [None]:
# load the model
if Path("test_model_lr.json").is_file():
    model_name = "test_model_lr.json"
    reg_call = run_linear_regression
elif Path("test_model_nn.json").is_file():
    model_name = "test_model_nn.json"
    reg_call = run_neural_net
else:
    raise NotImplementedError

with open(model_name, 'r') as f:
    model_data = json.load(f)

In [None]:
# setup a partially initialized function
partial_func = partial(reg_call, model=model_data)

# use it to make predictions
inputs = {var: diabetes[var] for var in diabetes.columns}
predictions = partial_func(**inputs)
print(predictions)