### Self-learning exercises 

The below figure shows synthetic data points based on a polynomial equation with an added random error (noise) in the range of -0.1 to 0.1. Change the values of the model parameters $\beta_0$, $\beta_1$, $\beta_2$, $\beta_3$ and $\beta_4$ one at a time, and note the changes in the plot and values of $SSE$, $Rsqr$ and $DW$ printed below. Do you think it is possible to fit a model to such data manually? Initial values have been given to help you with the process.

In [1]:
import numpy as np
from bqplot import pyplot as plt

In [2]:
import ipywidgets as widgets

In [3]:
# synthetic input data
np.random.seed(0)
x = np.linspace(-1,1,30)
z = x - 2 * (x ** 2) + 0.5 * (x ** 3) + 0.2 * (x ** 4) + np.random.normal(-0.1, 0.1, 30)

# function to update plot based on input model parameters
def update_plot(b4, b3, b2, b1, b0):
    #ax.clear()
    plt.figure(figsize=(8,4))
    y = b4*x**4 + b3*x**3 + b2*x**2 + b1*x + b0
    SSE = np.sum((z-y)**2)
    SST = np.sum((z - np.mean(z))**2)
    Rsqr = 1 - SSE/SST
    err = z - y
    DW = (np.sum((np.diff(err))**2))/sum(err**2)
    evalu = 'SSE= {} \nRsq = {} \nDW = {}'
    label=evalu.format(SSE,Rsqr,DW)
    plt.plot(x,y)
    plt.scatter(x,z,colors=['orange'])
    plt.show()
    print(label)
# create widgets
b4 = widgets.FloatText(value=0.5, description=r'\(\beta_4\)')
b3 = widgets.FloatText(value=0.5, description=r'\(\beta_3\)')
b2 = widgets.FloatText(value=-1.5977, description=r'\(\beta_2\)')
b1 = widgets.FloatText(value=0.6048, description=r'\(\beta_1\)')
b0 = widgets.FloatText(value=-0.0922, description=r'\(\beta_0\)')

# call function update_plot when changing the model parameters values
widgets.interactive(update_plot, b4=b4, b3=b3, b2=b2, b1=b1, b0=b0)

interactive(children=(FloatText(value=0.5, description='\\(\\beta_4\\)'), FloatText(value=0.5, description='\\…

In the below figure, the same data points are used as in the previous task. Change the polynomial degree and note the changes in model parameters (coef) and other evaluation values shown in the table printed below. 
   - Based on your understanding from the sessions, what is the best model equation that can fit this sample of data? 
   - Compare the OLS parameters results to the values you tried in the last task.

In [4]:
from sklearn.linear_model import LinearRegression

In [5]:
from sklearn.preprocessing import PolynomialFeatures

In [6]:
import statsmodels.api as sm

In [7]:
# same synthetic data used in exercise 1
np.random.seed(0)
x = np.linspace(-1,1,30)
z = x - 2 * (x ** 2) + 0.5 * (x ** 3) + np.random.normal(-0.1, 0.1, 30)

# define function for plot update when changing the degree widget
def update_plot(deg):
    if deg >= 0:
        plt.figure(title='Polynomial Degrees',figsize=(8,4))
    
        # use sklearn functions to fit the data given different model degrees
        x1 = x[:, np.newaxis]
        y1 = z[:, np.newaxis]
        polynomial_features= PolynomialFeatures(degree=deg)
        x_poly = polynomial_features.fit_transform(x1)
        model = LinearRegression()
        model.fit(x_poly, y1)
        y_poly_pred = model.predict(x_poly)
        X = x_poly
    
        # plot the updated model and data
        plt.plot(x,y_poly_pred)
        plt.scatter(x,z,colors=['orange'])
        plt.show()

        # print the updated OLS regression results
        model = sm.OLS(z, X)
        results = model.fit()
        print(results.summary())
        
    else:
         print("Polynomial Degree has to be larger than 0")   
    
# set the degree widget and call function update_plot
Degree = widgets.IntText(value=0, min=0, max=28, description='Degree')

widgets.interactive(update_plot, deg = Degree)

interactive(children=(IntText(value=0, description='Degree'), Output()), _dom_classes=('widget-interact',))