In [1]:
from pandas import read_csv
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from mlxtend.evaluate import bias_variance_decomp

In [2]:
# import data
url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/housing.csv'
dataframe = read_csv(url, header=None)

In [3]:
# separate data
data = dataframe.values
X, y = data[:, :-1], data[:, -1]

In [4]:
# split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)

In [5]:
# define the model
model = LinearRegression()

In [6]:
# estimate bias and variance
mse, bias, var = bias_variance_decomp(model, X_train, y_train, X_test, y_test, loss='mse', num_rounds=200, random_seed=1)

In [7]:
# report results
print('MSE: %.3f' % mse)
print('Bias: %.3f' % bias)
print('Variance: %.3f' % var)

MSE: 22.418
Bias: 20.744
Variance: 1.674


In [8]:
# The model has a high bias with low variance, though this is expected from a 
# linear regression model. Similarly, from these results we can see that the 
# sum of the estimated mean and variance equals the estimated error of the model
# (20.726 + 1.761 = 22.487)