In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.colors import ListedColormap
from sklearn import linear_model, datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error

import numpy as np
from ipywidgets import interactive
from IPython.display import display
%matplotlib widget

# Part 1 - Intuition

We have three datasets:
 - Diabete dataset contains data from diabetic patients and contains certain features such as their bmi, age , blood pressure and glucose levels which are useful in predicting the diabetes disease progression in patients.
- "Perfect regression" is the simpler case of regression
- Noisy sin wave has a shape of sine wave with some noise

This interactive demo lets you explore the Linear Regression algorithm. 

We can visualize the how the regressor fits the diferent datasets.

In [None]:
def fit_linear_regressor(kernel, X, y):
    if kernel == "linear_model":
        regr = linear_model.LinearRegression()
    elif kernel == "lasso":
        regr = linear_model.Lasso()
    else:
        regr = linear_model.Ridge()
    regr.fit(X, y)
    return regr

In [None]:
# Real Dataset
dataset = datasets.load_diabetes() # try with another dataset 

X = dataset.data[:, :2]
y = dataset.target

# Perfect regression
X, y = datasets.make_regression(n_samples=100, n_features=1, random_state=0, noise=100.0, bias=200.0)

# Sin wave 
rng = np.random.RandomState(1)
X = np.sort(5 * rng.rand(80, 1), axis=0)
y = np.sin(X).ravel()
y[::5] += 3 * (0.5 - rng.rand(16))

In [None]:
plt.ioff()
fig = plt.figure()
fig.canvas.header_visible = False
plt.scatter(X[:, 0], y, color="darkorange",s=30, alpha=0.6)
x = np.linspace(min(X), max(X), len(X))

lines = plt.plot(x, y, label="Regression Tree")


def plot_boundary(kernel):
    regr = fit_linear_regressor(kernel, X, y)
    

    y_preds = regr.predict(x)

    lines[0].set_data(x, y_preds)
    fig.canvas.draw()
    fig.canvas.flush_events()

    plt.legend()
    display(fig)
    return regr


inter = interactive(
    plot_boundary,
    kernel = ["linear_model", "lasso", "ridge"]
)

display(inter)

Using the interactive demo gain intuition on how this regressor fits the different types of data.

# Train and Test

In [None]:
len(dataset)

In [None]:
# 1. Split the dataset in training and testing. Use a test_size of 33%
# Hint: https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html
X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.3, random_state=42)

In [None]:
# 2. Instantiate a Linear Regressor
# Hint: https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html#sklearn.linear_model.LinearRegression
regr = linear_model.Ridge()
regr.fit(X_train, y_train)

In [None]:
# 3. Use the regressor to predict the test set.
# Hint: https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html#sklearn.linear_model.LinearRegression.predict
y_preds = regr.predict(X_test)

In [None]:
# 4. Evaluate the regressor error on the test set (also known as hold-out evaluation)
# Hint: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_squared_error.html?highlight=rmse

mean_squared_error(y_test, y_preds), mean_squared_error(y_test, y_preds, squared=False), mean_absolute_error(y_test, y_preds)

## Extra - Compare the predictions

In [None]:
def evaluate(y_test, prediction, ax=None):
    min_value = min(np.min(y_test), np.min(prediction))*0.9
    max_value = max(np.max(y_test), np.max(prediction))*1.1

    if ax is None:
        fig, ax = plt.subplots(figsize=(5, 5))
    ax.plot([min_value,max_value], [min_value,max_value], color='grey')
    ax.scatter(prediction, y_test, facecolor='steelblue', s=30, alpha=0.6)
    ax.set_xlabel('Predicted', fontsize=12)
    ax.set_ylabel('Actual', fontsize=12)
    ax.set_xlim([min_value,max_value])
    ax.set_ylim([min_value,max_value])
    plt.show()

evaluate(y_test, y_preds)

In [None]:
len(y_test), len(y_preds)