# HLMA 408: Modèle linéaire

***
> __Auteur__: Joseph Salmon
> <joseph.salmon@umontpellier.fr>

In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
from matplotlib import rc
from sklearn import linear_model, preprocessing
import seaborn as sns
from os import mkdir, path
from mpl_toolkits.mplot3d import Axes3D
from download import download

import ipywidgets as widgets
from ipywidgets import interactive
from matplotlib.ticker import MaxNLocator

In [2]:
# Original url:
url = 'https://forge.scilab.org/index.php/p/rdataset/source/file/master/csv/datasets/cars.csv'
# Alternative url:
# url = 'http://josephsalmon.eu/enseignement/TELECOM/MDI720/datasets/cars.csv'
path_target = "./cars.csv"
download(url, path_target, replace=False)

Replace is False and data exists, so doing nothing. Use replace=True to re-download the data.


'./cars.csv'

In [3]:
%matplotlib widget

In [4]:
dat = pd.read_csv(url)
dat = dat.drop(columns='Unnamed: 0')
dat.columns = ['Vitesse (mph)', 'Distance (ft)']

# Beware dat['speed'].shape = (50,), issue with sklearn API need (50,1)
X = dat[['Vitesse (mph)']]
y = dat['Distance (ft)']

# CHANGE HERE FOR WITH OR WITHOUT STANDARDISATION:
standardisation = False

if standardisation is True:  # après standardisation:
    scaler = preprocessing.StandardScaler().fit(X.values)
    X.loc[:, 'Vitesse (mph)'] = scaler.transform(X.values).copy()
    y = (y - y.mean(axis=0)) / np.std(y)
Xval = X.values.squeeze()


# Regression model (with sklearn)
skl_linmod = linear_model.LinearRegression()
skl_linmod.fit(X, y)

delta_x = Xval.max() - Xval.min()
delta_y = y.max() - y.min()

xmin_normal = Xval.min() - delta_x * 0.2
xmax_normal = Xval.max() + delta_x * 0.2
ymin_normal = y.min() - delta_y * 0.5
ymax_normal = y.max() + delta_y * 0.2

X_to_predict = np.linspace(xmin_normal, xmax_normal, num=50).reshape(50, 1)
X_to_predict = pd.DataFrame(X_to_predict, columns=['Vitesse (mph)'])


n_samples, _ = X.shape

slopes = y / Xval
delta_slopes = slopes.max() - slopes.min()
n_grid_cplx = 50j
n_betas = int(n_grid_cplx.imag)
beta_0_grid, beta_1_grid = np.mgrid[ymin_normal:ymax_normal:n_grid_cplx,
                                    slopes.min() - 0.2 * delta_slopes:slopes.max() + 0.2 * delta_slopes:n_grid_cplx]

betas_1 = np.linspace(slopes.min() - 0.2 * delta_slopes,
                      slopes.max() + 0.2 * delta_slopes, n_betas)
betas_0 = np.linspace(ymin_normal - 0.2 * delta_y,
                      ymax_normal + 0.2 * delta_y, n_betas)

In [5]:
xlabels = dat.columns[0]
ylabels = dat.columns[1]

In [6]:
# def show_MCO_slope(beta_1=1.):
#     beta_0 = skl_linmod.intercept_
#     y_by_line = beta_0 + beta_1 * X_to_predict

#     fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
#     ax1.set_xlim(left=xmin_normal, right=xmax_normal)
#     ax1.set_ylim(bottom=ymin_normal, top=ymax_normal)

#     ax1.plot(X, y, 'o', label=r'Données',
#              markeredgecolor='k', markeredgewidth=1)
#     ax1.plot(X_to_predict, skl_linmod.predict(X_to_predict), "--",
#              linewidth=2, color='k', label=r"Moindres carrés")
#     ax1.plot(X_to_predict, y_by_line,
#              linewidth=2, color='k', label=r"$x \to \hat\beta_0 + \beta_1 x$")

#     ax1.legend(numpoints=1, loc=2)  # numpoints = 1 for nicer display
#     ax1.set_xlabel(xlabels)
#     ax1.set_ylabel(ylabels)
#     ax1.set_title(r"Données et moindres carrés:" + "\n" +
#                   r"$\beta_0 = {0:.2f}, \beta_1 = \hat\beta_1 = {1:.2f}$".format(beta_0, beta_1))

#     ax2.plot(betas_1, np.linalg.norm(np.tile(y, (n_betas, 1)) - betas_1.reshape(n_betas, 1)
#                                      * Xval.reshape(1, n_samples) - beta_0, axis=1)**2, label=r'$|| y-X\beta_1 -\hat\beta_0||^2$')
#     ax2.plot(beta_1, np.linalg.norm(y - Xval * beta_1 - beta_0)
#              ** 2, 'kv', label=r'$\beta_1$')
#     ax2.plot(skl_linmod.coef_, np.linalg.norm(
#         y - Xval * skl_linmod.coef_ - beta_0)**2, 'ro', label=r'$\hat\beta_1$')
#     ax2.set_xlabel("Pente")
#     ax2.set_ylabel("Objectif")
#     ax2.set_xlim(left=beta_1_grid.min(), right=beta_1_grid.max())

#     ax2.legend(numpoints=1, loc=2)  # numpoints = 1 for nicer display
#     ax2.set_title("Objectif des moindres carrés")
#     fig.suptitle("Moindre carrés: influence de la pente", size=20)
#     fig.tight_layout(rect=[0, 0.03, 1, 0.91])
# #     plt.show()

In [7]:
# interactive_plot = interactive(show_MCO_slope, beta_1=widgets.FloatSlider(
#     min=beta_1_grid.min(), max=beta_1_grid.max(), step=0.2, value=3))
# output = interactive_plot.children[-1]
# output.layout.height = '300px'
# interactive_plot

In [8]:
# def show_MCO_intercept(beta_0=1.1 * skl_linmod.intercept_):
#     beta_1 = skl_linmod.coef_
#     y_by_line = beta_0 + beta_1 * X_to_predict

#     fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
#     ax1.set_xlim(left=xmin_normal, right=xmax_normal)
#     ax1.set_ylim(bottom=ymin_normal, top=ymax_normal)

#     ax1.plot(X, y, 'o', label=r'Données',
#              markeredgecolor='k', markeredgewidth=1)
#     ax1.plot(X_to_predict, skl_linmod.predict(X_to_predict), "--",
#              linewidth=2, color='k', label=r"Moindres carrés")
#     ax1.plot(X_to_predict, y_by_line,
#              linewidth=2, color='k', label=r"$x \to \beta_0 + \hat\beta_1 x$")

#     ax1.legend(numpoints=1, loc=2)  # numpoints = 1 for nicer display
#     ax1.set_xlabel(xlabels), ax1.set_ylabel(ylabels)

#     ax1.set_title(r"Données et moindres carrés:" + "\n" +
#                   r"$\beta_0 = {0:.2f}, \beta_1 = \hat\beta_1 = {1:.2f}$".format(beta_0, beta_1[0]))
#     nb_betas = betas_0.shape[0]
#     ax2.plot(betas_0, np.linalg.norm(np.tile(y, (nb_betas, 1)) - beta_1 * Xval.reshape(1, n_samples) -
#                                      betas_0.reshape(nb_betas, 1), axis=1)**2, label=r'$|| y-X\hat\beta_1 -\beta_0||^2$')
#     ax2.set_xlim(left=betas_0.min(), right=betas_0.max())
#     ax2.plot(beta_0, np.linalg.norm(y - Xval * beta_1 - beta_0)
#              ** 2, 'kv', label=r'$\beta_0$')
#     ax2.plot(skl_linmod.intercept_, np.linalg.norm(
#         y - Xval * beta_1 - skl_linmod.intercept_)**2, 'ro', label=r'$\hat\beta_0$')
#     ax2.legend(numpoints=1, loc=2)  # numpoints = 1 for nicer display
#     ax2.set_xlabel("Ordonnée à l'origine")
#     ax2.set_ylabel("Objectif")

#     ax2.set_title("Objectif des moindres carrés:" + "\n" +
#                   r"$\beta_0 = {0:.2f}, \beta_1 = \hat\beta_1 = {1:.2f}$".format(beta_0, beta_1[0]))
#     fig.suptitle(
#         "Moindre carrés: influence de l'ordonnée à l'origine", size=20)
#     fig.tight_layout(rect=[0, 0.03, 1, 0.91])
#     plt.show()

In [9]:
# interactive_plot = interactive(show_MCO_intercept, beta_0=widgets.FloatSlider(
#     min=ymin_normal, max=ymax_normal, value=1.1 * skl_linmod.intercept_))
# output = interactive_plot.children[-1]
# output.layout.height = '300px'
# interactive_plot

In [10]:
# def show_MCO_bidim(beta_0=0, beta_1=1):
#     y_by_line = beta_0 + beta_1 * X_to_predict

#     fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
#     ax1.set_title('Sharing Y axis')
#     ax1.set_xlim(left=xmin_normal, right=xmax_normal)
#     ax1.set_ylim(bottom=ymin_normal, top=ymax_normal)

#     ax1.plot(X, y, 'o', label=r'Données',
#              markeredgecolor='k', markeredgewidth=1)
#     ax1.plot(X_to_predict, skl_linmod.predict(X_to_predict), "--",
#              linewidth=2, color='k', label=r"Moindres carrés")
#     ax1.plot(X_to_predict, y_by_line,
#              linewidth=2, color='k', label=r"$x \to \beta_0 + \beta_1 x$")

#     ax1.legend(numpoints=1, loc=2)  # numpoints = 1 for nicer display
#     ax1.set_xlabel(xlabels)
#     ax1.set_ylabel(ylabels)
#     ax1.set_title(r"Données et moindres carrés: $\beta_0 = {0:.2f}, \beta_1 = {1:.2f}$".format(
#         beta_0, beta_1[0]))

#     nb_betas = betas.shape[0]
#     ax2.plot(betas, np.linalg.norm(np.tile(y, (nb_betas, 1)) - beta_1 * Xval.reshape(1, n_samples) -
#                                    betas.reshape(nb_betas, 1), axis=1)**2, label=r'$|| y-X\hat\beta_1 -\beta_0||^2$')

#     ax2.plot(beta_0, np.linalg.norm(y - Xval * beta_1 - beta_0)
#              ** 2, 'kv', label=r'$\beta_0$')
#     ax2.plot(skl_linmod.intercept_, np.linalg.norm(
#         y - Xval * beta_1 - skl_linmod.intercept_)**2, 'ro', label=r'$\hat\beta_0$')
#     ax2.legend(numpoints=1, loc=2)  # numpoints = 1 for nicer display
#     ax2.set_title("Objectif des moindres carrés")
#     fig.suptitle("Moindre carrés: influence des deux paramètres", size=20)
#     fig.tight_layout(rect=[0, 0.03, 1, 0.91])
#     plt.show()

In [11]:
def funct_quad(beta_0, beta_1):
    """Quadratic function to be displayed."""
    # Compute: np.linalg.norm(y - Xval * beta_1 - beta_0)**2
    return np.linalg.norm(y)**2 + n_samples * beta_0**2 + np.linalg.norm(Xval)**2 * beta_1**2 - 2 * np.sum(y) * beta_0 - 2 * np.dot(y, Xval) * beta_1 + 2 * np.sum(Xval) * beta_0 * beta_1


Z = funct_quad(beta_0_grid, beta_1_grid)  # to speed up visualization


def plotting_level_set(ax, fig, beta_0, beta_1, Z):
    """Plotting level sets."""
    levels = MaxNLocator(nbins=30).tick_values(Z.min(), Z.max())
    cs = ax.contourf(beta_0, beta_1, Z, alpha=.75,
                     cmap=plt.cm.hot, levels=levels)
    ax.plot(skl_linmod.intercept_, skl_linmod.coef_[0], 'or', ms=12)
    ax.contour(beta_0, beta_1, Z, colors='black',  levels=levels)
    cbar = fig.colorbar(cs)

In [13]:
# def show_MCO_3D(beta_0=-17, beta_1=4, azim=280):
beta_0=-17
beta_1=4
azim=280
y_by_line = beta_0 + beta_1 * X_to_predict

fig = plt.figure(figsize=(9, 9))
fig.canvas.toolbar_visible = False

ax1 = fig.add_subplot(2, 2, 1)
ax1.set_xlim(left=xmin_normal, right=xmax_normal)
ax1.set_ylim(bottom=ymin_normal, top=ymax_normal)

ax1.plot(X, y, 'o', label=r'Données',
         markeredgecolor='k', markeredgewidth=1)
ax1.plot(X_to_predict, skl_linmod.predict(X_to_predict), "--",
         linewidth=2, color='k', label=r"Moindres carrés")
ax1_pred, = ax1.plot(X_to_predict, y_by_line,
         linewidth=2, color='k', label=r"$x \to \beta_0 + \beta_1 x$")

ax1.legend(numpoints=1, loc=2)  # numpoints = 1 for nicer display
ax1.set_xlabel(xlabels)
ax1.set_ylabel(ylabels)
ax1.set_title(r"Données et moindres carrés:"
#                   + "\n" +
#                   r"$\beta_0 = {0:.2f}, \beta_1 = \hat\beta_1 = {1:.2f}$".format(beta_0, beta_1)
             )

ax2 = fig.add_subplot(2, 2, 2, projection='3d')
ax2.scatter(skl_linmod.intercept_, skl_linmod.coef_[0], np.linalg.norm(y - Xval * skl_linmod.coef_[0] - skl_linmod.intercept_)
            ** 2, marker="o", edgecolors='k', s=130, color="red", label=r"$\hat\beta = (\hat\beta_0,\hat\beta_1)$", alpha=1)
ax2_scatter_current = ax2.scatter(beta_0, beta_1, 1.001 * np.linalg.norm(y - Xval * beta_1 - beta_0)
            ** 2, marker="v", edgecolors='k', s=130, color="k", label=r"$\beta = (\beta_0,\beta_1)$", alpha=1)
ax2.set_title("Fonction à minimiser")
ax2.plot_surface(beta_0_grid, beta_1_grid, Z)
ax2.view_init(azim=azim, elev=20)
ax2.set_xlabel(r"$\beta_0$")
ax2.set_ylabel(r"$\beta_1$")

ax3 = fig.add_subplot(2, 2, 3)
plotting_level_set(ax3, fig, beta_0_grid, beta_1_grid, Z)
ax3_pts, = ax3.plot(beta_0, beta_1, 'kv', ms=12)
ax3.set_title(r"Lignes de niveau de la fonction à minimiser" +
              "\n" + r'$(\beta_0,\beta_1) \to || y-X\beta_1 -\beta_0||^2$')
ax3.set_xlabel(r"Ordonnée à l'origine ($\beta_0$)")
ax3.set_ylabel(r"Pente ($\beta_1$)")
fig.tight_layout(pad=0.2, w_pad=0.3)

plt.show()

def update(beta_0=-17, beta_1=4, azim=280):
    y_by_line = beta_0 + beta_1 * X_to_predict
    ax1_pred.set_ydata(y_by_line)
    ax2_scatter_current.set_offsets(np.c_[beta_0,beta_1])
    ax2.view_init(azim=azim, elev=20)
    ax3_pts.set_xdata(beta_0)
    ax3_pts.set_ydata(beta_1)
    fig.canvas.draw()

    
interactive_plot = interactive(update,
                           beta_0=widgets.FloatSlider(min=beta_0_grid.min(), max=beta_0_grid.max(),
                                                      value=1.1 * skl_linmod.intercept_, description=r'$\beta_0$'),
                           beta_1=widgets.FloatSlider(min=beta_1_grid.min(), max=beta_1_grid.max(),
                                                      value=0.9 * skl_linmod.coef_, description=r'$\beta_1$'),
                           azim=widgets.IntSlider(min=0, max=360, step=1, value=180, description=r'Azimut'))
# output = interactive_plot.children[-1]
# output.layout.height = '500px'
interactive_plot

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

interactive(children=(FloatSlider(value=-19.33700437956205, description='$\\beta_0$', max=143.6, min=-57.0), F…