In [1]:
from base_selection import RegressionBase
from sklearn.datasets import load_diabetes

## Step One: Load a DataFrame for regression analysis, then export it to a `JSON` file

In [2]:
X = load_diabetes(return_X_y=False, as_frame=True)['frame']
x=RegressionBase(dataframe=X)

In [3]:
x

RegressionBase(children=(Button(description='view dataframe', icon='bars', layout=Layout(height='40px', width=…

## Step Two: Create callable function taking desired inputs and the model data from the `json` file

In [7]:
from functools import partial
from regression_callable import run_neural_net, run_linear_regression
import json

f = open('test_lr.json', 'r')
model_lr = json.load(f)
f.close()

f = open('test_nn.json','r')
model_nn = json.load(f)
f.close()

linear_regression = partial(run_linear_regression, model=model_lr)
neural_net = partial(run_neural_net, model=model_nn)

## Step Three: Set everything up with `ipycontour` syntax, and view your contours!

In [8]:
from ipycontour.api import Variable, Constraint, ContourApp
age = Variable(name='age', lower=min(X.age), upper=max(X.age))
sex = Variable(name='sex', lower=min(X.sex), upper=max(X.sex))
bmi = Variable(name='bmi', lower=min(X.bmi), upper=max(X.bmi))
bp = Variable(name='bp', lower=min(X.bp), upper=max(X.bp))
s1 = Variable(name='s1', lower=min(X.s1), upper=max(X.s1))
s2 = Variable(name='s2', lower=min(X.s2), upper=max(X.s2))
s3 = Variable(name='s3', lower=min(X.s3), upper=max(X.s3))
s4 = Variable(name='s4', lower=min(X.s4), upper=max(X.s4))
s5 = Variable(name='s5', lower=min(X.s5), upper=max(X.s5))
s6 = Variable(name='s6', lower=min(X.s6), upper=max(X.s6))

nn = Constraint(name='neural_net', function=neural_net, inputs = (age,sex,bmi,bp,s1,s2,s3,s4,s5,s6), lower_bound=0, upper_bound=350)
lr = Constraint(name='linear_regressoin', function=linear_regression, inputs = (s6,age,sex,bmi,s1,s2,s3,s4,s5,bp), lower_bound=0, upper_bound=350)
app = ContourApp(constraints=[nn, lr])
app

ContourApp(commands=CommandRegistry(), sessions=SessionManager(), shell=Shell())

In [6]:
import ipywidgets as ipyw
import traitlets as trt

class SelectContainer(ipyw.VBox):
    selectors = ipyw.trait_types.TypedTuple()
    options = ipyw.trait_types.TypedTuple(trt.Unicode())
    selected = ipyw.trait_types.TypedTuple(trt.Unicode())
    
    @trt.validate("children")
    def _check_children(self, proposal: trt.Bunch):
        children = proposal.value
        if not children:
            children = self.selectors
        return children
    
    @trt.validate("selectors")
    def _check_selectors(self, proposal: trt.Bunch):
        selectors = proposal.value
        for selector in self.selectors:
            selector.options = [""] + list(self.options)
            if isinstance(selector.value, tuple):
                selector.value = ()
            else:
                selector.value = ""
        return selectors

    @trt.observe("selectors")
    def _update_selector_observers(self, change: trt.Bunch):
        old = None if change.old is trt.Undefined else change.old
        for selector in old or []:
            selector.unobserve(self._update_selected, "value")
        self.selected = ()
        new = None if change.new is trt.Undefined else change.new
        for selector in new or []:
            if isinstance(selector.value, tuple):
                selector.value = ()
            else:
                selector.value = ""
            selector.observe(self._update_selected, "value")
        self.children = self.selectors
            
    @trt.observe("selected")
    def _update_selector_options(self, change: trt.Bunch):
        for selector in self.selectors:
            try:
                selector.unobserve(self._update_selected, "value")
            except:
                continue
        for selector in self.selectors:
            print([selector.value for selector in self.selectors])
            with selector.hold_trait_notifications():
                if isinstance(selector.value, tuple):
                    value = selector.value
                    print(value)
                    selector.options = list(selector.value) + [
                        option for option in self.options
                        if option not in self.selected
                    ]
                    selector.value = value
                else:
                    selector.options = [selector.value] + [
                        option for option in self.options
                        if option not in self.selected
                    ]
        for selector in self.selectors:
            print(selector.value)
            try:
                selector.observe(self._update_selected, "value")
            except:
                continue
    def _update_selected(self, change: trt.Bunch):
        selected = []
        for selector in self.selectors:
            if isinstance(selector.value, tuple):
                [selected.append(val) for val in selector.value]
            else:
                selected.append(selector.value)
        self.selected = selected

x=SelectContainer(selectors=[ipyw.SelectMultiple()]+[ipyw.Dropdown() for _ in range(5)], options=list("abcdefgh"))

[(), '', '', '', '', '']
()
[(), '', '', '', '', '']
[(), '', '', '', '', '']
[(), '', '', '', '', '']
[(), '', '', '', '', '']
[(), '', '', '', '', '']
()







In [7]:
x

SelectContainer(children=(SelectMultiple(options=('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'), value=()), Dropdown…

In [8]:
x.selected

()

In [5]:
# okay lets try and mimic this
import numpy as np
import statsmodels.formula.api as sm
import pandas as pd
from sklearn.model_selection import train_test_split

In [6]:
X = load_diabetes(return_X_y=False, as_frame=True)['frame']
X.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019908,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.06833,-0.092204,75.0
2,0.085299,0.05068,0.044451,-0.005671,-0.045599,-0.034194,-0.032356,-0.002592,0.002864,-0.02593,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022692,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031991,-0.046641,135.0


In [7]:
X_train, X_test = train_test_split(X)

In [8]:
import ipywidgets as ipyw
formula_test = ipyw.Text(value='target~age')

In [10]:
formula_test

Text(value='target~age')

In [11]:
formula = 'target ~ age + sex**2 + bmi:sex  + s1 + s2 + s3 + s4 + s5 + s6'
target_col = 'target'
result = sm.ols(formula_test.value, data=X_train).fit() 
# result = sm.OLS()
# result.from_formula(formula, data=X_train)
# result.fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:                 target   R-squared:                       0.364
Model:                            OLS   Adj. R-squared:                  0.360
Method:                 Least Squares   F-statistic:                     93.77
Date:                Thu, 03 Feb 2022   Prob (F-statistic):           6.19e-33
Time:                        14:21:08   Log-Likelihood:                -1829.0
No. Observations:                 331   AIC:                             3664.
Df Residuals:                     328   BIC:                             3675.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept    147.4789      3.354     43.970      0.0

In [12]:
saved_formula = f'{target_col} ~ ' + '+'.join(result.params.index).replace('Intercept','1') # (is this necessary?)                                                                                                  
saved_params = result.params.to_dict()                                                                                                                                                                     
print(saved_formula)                                                                                                                                                                                    
print(saved_params)   

import json
model_data = {
    'weights':saved_params,
    'target_column':target_col,
    'formula':saved_formula,
}
f = open('sm_OLS.json', 'w')
json.dump(model_data, f)
f.close()

target ~ 1+bmi+bp
{'Intercept': 147.47887432437375, 'bmi': 738.009468867815, 'bp': 404.4034614482928}


In [9]:
import patsy                                                                                                                          
def predict(data, formula) :                                                                                                          
    formula_rhs = formula.split('~')[1].strip()                                                                                       
    x_data = patsy.dmatrix(formula_rhs, data=data)                                                                                      
    ordered_terms = x_data.design_info.term_names                                                                                     
    ordered_params = list(map(saved_params.get, ordered_terms))                                                                       
    return x_data @ ordered_params                                                                                                  
                                                                                                                                      
model = lambda x : predict(x, saved_formula)

In [10]:
model(X_test)

array([167.27420459, 155.77208269, 131.48982534, 159.60612332,
       113.59763571, 160.88413686, 165.99619104, 157.05009623,
       180.05434004, 153.2160556 , 132.76783888, 174.94228586,
       132.76783888, 132.76783888, 165.99619104, 181.33235358,
       162.16215041, 126.37777116, 163.44016395, 158.32810978,
       158.32810978, 134.04585243, 172.38625877, 141.71393369,
       150.66002851, 136.60187951, 146.82598787, 163.44016395,
       164.7181775 , 174.94228586, 165.99619104, 125.09975761,
       141.71393369, 164.7181775 , 137.87989306, 165.99619104,
       142.99194724, 158.32810978, 180.05434004, 148.10400142,
       167.27420459, 153.2160556 , 149.38201496, 153.2160556 ,
       154.49406914, 136.60187951, 145.54797433, 130.21181179,
       160.88413686, 148.10400142, 139.1579066 , 154.49406914,
       157.05009623, 174.94228586, 168.55221813, 139.1579066 ,
       128.93379825, 180.05434004, 180.05434004, 145.54797433,
       128.93379825, 171.10824522, 162.16215041, 162.16

In [11]:
actual_model = result.predict
x = X.drop(['target'], axis=1)
znew = actual_model(X_test.drop(['target'], axis=1))

In [12]:
znew

91     167.274205
36     155.772083
436    131.489825
239    159.606123
26     113.597636
          ...    
92     144.269961
50     163.440164
423    154.494069
336    144.269961
97     141.713934
Length: 111, dtype: float64

In [4]:
import pandas as pd

In [6]:
x = pd.DataFrame({'x':[1,2,3], 'y':[4,5,6]})

In [7]:
x

Unnamed: 0,x,y
0,1,4
1,2,5
2,3,6


In [8]:
y = x.drop(['x'], axis=1)

In [9]:
y

Unnamed: 0,y
0,4
1,5
2,6


In [10]:
x

Unnamed: 0,x,y
0,1,4
1,2,5
2,3,6


In [7]:
import pandas
from formulaic import Formula

df = pandas.DataFrame({
    'y': [0,1,2],
    'x': ['A', 'B', 'C'],
    'z': [0.3, 0.1, 0.2],
})

y, X = Formula('y ~ x + z').get_model_matrix(df)

In [8]:
y

Unnamed: 0,y
0,0
1,1
2,2


In [9]:
X

Unnamed: 0,Intercept,x[T.B],x[T.C],z
0,1.0,0,0,0.3
1,1.0,1,0,0.1
2,1.0,0,1,0.2


In [10]:
from sklearn.datasets import load_diabetes
import numpy as np

In [11]:
df = load_diabetes(return_X_y=False, as_frame=True)['frame']

In [12]:

y, X = Formula('target ~ age:sex + bp + bmi + bp').get_model_matrix(df)

In [13]:
y

Unnamed: 0,target
0,151.0
1,75.0
2,141.0
3,206.0
4,135.0
...,...
437,178.0
438,104.0
439,132.0
440,220.0


In [14]:
X

Unnamed: 0,Intercept,bmi,bp,age:sex
0,1.0,0.061696,0.021872,0.001930
1,1.0,-0.051474,-0.026328,0.000084
2,1.0,0.044451,-0.005671,0.004323
3,1.0,-0.011595,-0.036656,0.003976
4,1.0,-0.036385,0.021872,-0.000240
...,...,...,...,...
437,1.0,0.019662,0.059744,0.002114
438,1.0,-0.015906,-0.067642,-0.000279
439,1.0,-0.015906,0.017282,0.002114
440,1.0,0.039062,0.001215,0.002030


In [15]:
import bqplot as bq
bq.marks.ScatterGL

bqplot.marks.ScatterGL

In [16]:
fig = bq.figure.Figure()

In [18]:
marks = [bq.marks.ScatterGL(x=X.bmi, y=X.bp)]

In [19]:
fig.marks=marks
fig

Figure(fig_margin={'top': 60, 'bottom': 60, 'left': 60, 'right': 60}, marks=[ScatterGL(colors=['steelblue'], i…

In [412]:
import json
import time
import numpy as np
import bqplot as bq
import pandas as pd
import ipywidgets as ipyw
from regression_callable import run_neural_net
import traitlets as trt
from bqplot.interacts import (
    BrushSelector,
    BrushIntervalSelector,
)

# open our json file
f = open('nn_regression.json', 'r')
model_data = json.load(f)
f.close()
bounds = model_data['bounds']

In [447]:
# generate one million uniformly distributed points for each bound, put into dataframe
points = {}
for dimension, bound in bounds.items():
    points[dimension] = np.random.uniform(bound[0], bound[1], 10000)

df = pd.DataFrame(points)

In [448]:
# now lets get the target column which will be done by evaluating the function...
from functools import partial
neural_net = partial(run_neural_net, model=model_data)
x = neural_net(**points)

In [449]:
df['target'] = x

In [458]:
class Plot(ipyw.VBox):
    data: pd.DataFrame = trt.Instance(pd.DataFrame, allow_none=True, args=())
    plot: bq.figure.Figure = trt.Instance(bq.figure.Figure, allow_none=True, args=())
    x_selector: ipyw.Dropdown = trt.Instance(ipyw.Dropdown, allow_none=False)
    y_selector: ipyw.Dropdown = trt.Instance(ipyw.Dropdown, allow_none=False)
    points: bq.marks.ScatterGL = trt.Instance(bq.marks.ScatterGL, allow_none=True, args=())
    scales: dict = trt.Dict(default_value={'x':bq.LinearScale(), 'y':bq.LinearScale()})
    axes: dict = trt.Dict()
    x_var: str = trt.Unicode(default_value=None, allow_none=True)
    y_var: str = trt.Unicode(default_value=None, allow_none=True)
    brush: BrushSelector = trt.Instance(BrushSelector)
    x_bounds = trt.Any() # need to change
    y_bounds = trt.Any() # need to change
    selected_indices: list = trt.List(default_value=[])
    filter_indices: list = trt.List(default_value=[])
    brushing: bool = trt.Bool()

    
    @trt.default("brush")
    def _make_default_brush(self):
        brush = BrushSelector(x_scale=self.scales['x'], y_scale=self.scales['y'])
        brush.observe(self.update_brushing, "brushing")
        return brush
    
    def update_brushing(self, *_):
        if self.brush.brushing is False:
            if self.points.selected is not None:
                self.selected_indices = list(self.points.selected)
  
    @trt.default("plot")
    def _make_default_figure(self):
        fig = bq.figure.Figure(
            title='Test Plot',
            marks=[self.points],
            axes=list(self.axes.values()),
            interaction=self.brush,
        )
        return fig
    
    @trt.default("axes")
    def _make_axes(self):
        return {
            'x': bq.Axis(scale=self.scales['x']),
            'y': bq.Axis(scale=self.scales['y'], orientation='vertical'),
        }
    
    @trt.default("x_selector")
    def _make_default_x_selector(self):
        dropdown = ipyw.Dropdown(options=self.data.columns)
        trt.link((dropdown, 'value'), (self, 'x_var'))
        return dropdown

    @trt.default("y_selector")
    def _make_default_y_selector(self):
        dropdown = ipyw.Dropdown(options=self.data.columns)
        trt.link((dropdown, 'value'), (self, 'y_var'))
        return dropdown
    
    @trt.observe("data")
    def _update_dataframe(self, *_):
        self.x_selector = self._make_default_x_selector()
        self.y_selector = self._make_default_y_selector()

    @trt.validate("children")
    def _set_children(self, proposal):
        children = proposal.value
        if children:
            return children
        return [self.x_selector, self.y_selector, self.plot]
    
    @trt.observe('x_var', 'y_var')
    def update_plot(self, *_):
        df = self.data
        xdata = df[self.x_var]
        ydata = df[self.y_var]
        
        self.scales['x'] = bq.LinearScale()
        self.scales['y'] = bq.LinearScale()
        self.axes = self._make_axes()

        if self.filter_indices != []:
            xdata = xdata[self.filter_indices]
            ydata = ydata[self.filter_indices]
            self.points.x = xdata
            self.points.y = ydata
            return
        
        self.points.x = xdata
        self.points.y = ydata
        self.points.scales = self.scales
        self.points.opacity = [.3]*len(xdata)
        self.points.size = [.3]*len(xdata)
        self.points.selected_style={'opacity':'1'}
        self.points.unselected_style={'opacity':'.2'}
        
        self.plot.axes = list(self.axes.values())
        self.plot.axes[0].label = self.x_var
        self.plot.axes[1].label = self.y_var
    
        self.brush.marks=[self.points]

        
        
class ScatterPlotMatrix(ipyw.GridBox):
    num_rows: int = trt.Integer(allow_none=True).tag(sync=True)
    num_columns: int = trt.Integer(allow_none=True).tag(sync=True)
    
    def __init__(self, data, num_rows, num_columns, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.num_rows = num_rows
        self.num_columns = num_columns
        self.plots = [Plot(data=df, layout=dict(width='600px')) for i in range(num_rows*num_columns)]
        for plot in self.plots:
            plot.observe(self.update_plots, "selected_indices")
        
        plots = self.plots
        
        children = []
        for i in range(num_rows):
            children.append(ipyw.HBox(children=[plots[i] for i in range(num_columns)]))
        
        self.children=children
        self.layout=dict(width='100%')
        
        
    def update_plots(self, change):
        owner = change['owner']
        for plot in self.plots:
            if plot == owner:
                continue
            else:
                plot.filter_indices = owner.selected_indices
                plot.update_plot()

In [459]:
x = Plot(data=df)
x

Plot(children=(Dropdown(options=('s6', 's1', 's5', 's3', 'bp', 'sex', 'age', 's4', 'bmi', 's2', 'target'), val…

In [456]:
y = ScatterPlotMatrix(data=df, num_rows=6, num_columns=6)

In [457]:
y

ScatterPlotMatrix(children=(HBox(children=(Plot(children=(Dropdown(options=('s6', 's1', 's5', 's3', 'bp', 'sex…