# Fit a Polynomial

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [None]:
df = pd.read_csv('polyfit_data.csv')
x = df['x'].values
y = df['y'].values
plt.plot(x,y,'o')
plt.xlabel('x')
plt.ylabel('y')
plt.show()

# Exercice :
1) Compute the Design matrix :
$ \begin{bmatrix} 1 & x_{1} & x_{1}^2 & \cdots & x_{1}^M \\ 1 & x_{2} &
    x_{2}^2 & \cdots & x_{2}^M \\ \vdots & & \ddots & & \vdots \\ 1 & x_{N} & x_{N}^2 & \cdots & x_{N}^M \end{bmatrix}$

2) Compute the Least-Square solution : $\mathbf{w} = (X^TX)^{-1}X^TY$

3) Compute the MSE

4) Make a prediction for xplt = np.linspace(0, 5, 100) and plot your polynomial over the data points

5) Try to find the "best" value for M

In [None]:
# Choose any integer value for M
M = 3


# Function to generate the design matrix
def design_matrix(x, m):
    # You could use a for-loop or python broadcasting (a for-loop is the easiest choice)
    return x[:, None] ** np.arange(m+1)[None, :]


# compute the design matrix of x
X = design_matrix(x, M)
# remember to reshape y to a column vector
Y = y.reshape(-1, 1)

# Compute w using the least-square solution above (you can use np.linalg.pinv)
w = np.linalg.pinv(X) @ Y
print((w**2).sum())
# Compute the MSE
y_hat = X @ w
mse = ((y - y_hat) ** 2).mean()
print(f"MSE: {mse:.3}")

In [None]:
# Plot the regression/fit
plt.plot(x,y,'o')
xplt = np.linspace(x.min(), x.max(), 100)
yplt = design_matrix(xplt, M) @ w
plt.plot(xplt, yplt, '-', linewidth=3)
plt.xlabel('x')
plt.ylabel('y')
plt.show()


# Bonus :

Reproduce your code with sklearn !

In [None]:
from sklearn import linear_model

# create a regression model
reg = linear_model.LinearRegression(fit_intercept=False)

# fit the model
reg.fit(X, y)

# use the model to predict
yhat = reg.predict(X)

# plot the same as above (use Xplt as input to your model)
plt.plot(x, y, 'o')
plt.plot(xplt, yplt)

In [None]:
Ms_test = range(1, 21)
mse_s = []

for M in Ms_test:
    X = design_matrix(x, M)

    reg = linear_model.LinearRegression(fit_intercept=False)

    reg.fit(X, y)

    yhat = reg.predict(X)

    # plt.figure(figsize=(2, 1))
    # plt.title(M)
    # plt.plot(x, y, 'o', markersize=2)
    # yplt = reg.predict(design_matrix(xplt, M))
    # plt.plot(xplt, yplt, '-', linewidth=.5)

    mse = ((yhat - y) ** 2).mean()
    mse_s.append(mse)
    print(f"{M: 4}:{mse: 10.8f}")


In [None]:
plt.figure(figsize=(15,10))
plt.plot(Ms_test, mse_s, '-x', markersize=10, linewidth=2)
plt.yscale('log')
plt.xlabel('M')
plt.ylabel('mse')
plt.show()
