In [6]:
import numpy as np
import site
from sklearn import datasets, metrics, model_selection as modsel, linear_model
import matplotlib.pyplot as plt 
import bokeh 
print("bokeh {}".format(bokeh.__version__))

bokeh 1.0.4


In [7]:
from bokeh.plotting import figure, output_file, show
from bokeh.io import output_notebook
from bokeh.models import Label

def regress_boston(reg_model, model_title):
    """Predict housing prices from data with different regularizers and visualize.
    """
    boston = datasets.load_boston()
    print("Boston {} {} {}".format(dir(boston), boston.data.shape, boston.target.shape))
    X_train, X_test, y_train, y_test = modsel.train_test_split(boston.data, boston.target, test_size=0.1, 
                                                           random_state=42)
    
    reg_ret = reg_model.fit(X_train, y_train)
    train_mse = metrics.mean_squared_error(y_train, reg_model.predict(X_train))
    train_r2 = reg_model.score(X_train, y_train)
    print("train error {}, score {}".format(train_mse, train_r2))
    y_pred = reg_model.predict(X_test)
    test_mse = metrics.mean_squared_error(y_test, y_pred)
    test_r2 = reg_model.score(X_test, y_test)
    print("test error {}, score {}".format(test_mse, test_r2))

    x = list(range(0, len(y_test)))
    output_notebook()
    test_label = Label(text="test R\u00B2={:.3} MSE={:.3}".format(test_r2, test_mse), x=30, y=10)
    train_label = Label(text="train R\u00B2={:.3} MSE={:.3}".format(train_r2, train_mse), x=20, y=35)
    p = figure(plot_width=800, plot_height=400, 
               title='{} Regression, ground truth and prediction R\u00B2={:.3}, MSE={:.3}'.format(
                   model_title, test_r2, test_mse), 
               x_axis_label='x', y_axis_label='y')
    p.add_layout(test_label)
    p.add_layout(train_label)
    p.circle(x, y_test, line_color="firebrick", legend="ground truth", alpha=1.0, fill_color=None)
    p.circle(x, y_pred, line_color="navy", legend="prediction", alpha=1.0, fill_color=None)
    p.line(x, y_test, line_color="firebrick", legend="ground truth", alpha=.5, line_width=2)
    p.line(x, y_pred, line_color="navy", legend="prediction", alpha=0.5, line_width=2)
    show(p)
    
    p = figure(plot_width=800, plot_height=400, 
               title='{} R\u00B2={:.3}, MSE={:.3}'.format(model_title, test_r2, test_mse), 
               x_axis_label='ground truth', y_axis_label='predicted')
    p.circle(y_test, y_pred, line_color="green", fill_color=None)
    p.line(y_test, y_test, color="gray", line_width=1, alpha=0.3, line_dash=[2, 2])
    show(p)

In [8]:
regress_boston(reg_model=linear_model.LinearRegression(), model_title="Linear")

Boston ['DESCR', 'data', 'feature_names', 'filename', 'target'] (506, 13) (506,)
train error 22.7375901544866, score 0.7375152736886281
test error 14.995852876582541, score 0.7598135533532491


In [9]:
regress_boston(reg_model=linear_model.Lasso(), model_title="Lasso")

Boston ['DESCR', 'data', 'feature_names', 'filename', 'target'] (506, 13) (506,)
train error 27.60650506200869, score 0.6813080948165031
test error 18.645326946116242, score 0.7013604452769767


In [10]:
regress_boston(reg_model=linear_model.Ridge(), model_title="Ridge")

Boston ['DESCR', 'data', 'feature_names', 'filename', 'target'] (506, 13) (506,)
train error 22.923754350520948, score 0.7353661779527544
test error 14.77545251121533, score 0.7633436747163269
