In [1]:
### 

# A polinomial fit is evidently not a good model, but it is used because of its mathematical simplicity

In [2]:
#%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import gridspec
from ipywidgets import interact,ToggleButton,IntSlider,Layout,VBox
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

MAX_DEGREE = 10
NOISE_LEVEL = 0.2
N_POINTS = 30
TEST_SIZE = 0.4

SEED = 0
np.random.seed(SEED)



def plot_polynomial_fit(degree,test_visible):
    # reproducibility:
    
    np.random.seed(SEED)
    #domain:
    min_x,max_x = -1,1
    x = np.linspace(min_x, max_x, N_POINTS)
    # real function:
    def fun(x):
        return  0.5*x**3 + np.cos(2*x) +np.random.normal(0, NOISE_LEVEL, x.shape)
    
    y = fun(x)
    # train test splitting and output evaluation for synthetic data:
    # to avoid extrapolation, make sure first and last points are in train:
    # this is just to focus around the point intended in the class:
    indices = range(N_POINTS)
    
    # Split using indices
    train_idx, test_idx = train_test_split(indices, test_size=TEST_SIZE, random_state=SEED)
    train_idx += [0,N_POINTS-1]
    train_idx = list(set(train_idx))
    test_idx = [v for v in test_idx if v not in train_idx]
    # Use the indices to get the train and test sets
    x_train, x_test = x[train_idx], x[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]

    #x_train,x_test,y_train,y_test =  train_test_split(x,y,test_size=TEST_SIZE, random_state=SEED,)
    
    # fits mapping:
    fits_dict = {}
    train_MSE_dict = {}
    test_MSE_dict = {}
    colors = []
    # build fits:
    degree_range = (1, MAX_DEGREE, 1) #start,end and step

    # Create plot
    # Create a figure with 1 row and 2 columns
    fig = plt.figure(figsize=(16, 6))

    # Define grid spec: first subplot will take more space (e.g. 2/3 of the width)
    gs = gridspec.GridSpec(1, 2, width_ratios=[3, 1])  # The first subplot is twice as wide as the second

    # Create subplots with the custom grid spec
    axs = [plt.subplot(g) for g in gs]

    # iteract of ipywidgets includes last element but range doesnt
    # which casues with range below to be a little ugly:
    for deg in range(degree_range[0],degree_range[1]+1,degree_range[2]):   

        # Fit polynomial
        coeffs,residuals, _, _, _  = np.polyfit(x_train, y_train, deg,full=True,rcond=np.finfo(float).eps/10)
        
        poly_func= np.poly1d(coeffs)
        #append new fit:
        fits_dict[deg] = poly_func
        #per numpy: residuals – sum of squared residuals of the least squares fit
        #residuals provided before as sum of squared values:
        train_MSE_dict[deg] = np.sqrt(residuals)/len(x_train)

        #the outputs calculated using the "trained" model
        y_pred_train = poly_func(x_train)
        y_pred_test = poly_func(x_test)
        # the error between the real values for x_test, time N to make it sum
        #as in redisuals
        test_MSE_dict[deg] = mean_squared_error(y_true=y_test,y_pred=y_pred_test)
        
        if deg == degree:
            colors.append('red')
            #plot the given left side chart
            #axs[0].scatter(x_train, y_train, label='Noisy Data')
            axs[0].plot(x_train, y_train, 'x', color='black', markersize = 16, label='Train Data - Real')
            axs[0].plot(x_train, y_pred_train, 'o', color='gray', label='Train Data - Predicted')
            if test_visible:
                axs[0].plot(x_test, y_test, 'x', color='lightgreen', label='Data point (test)')
                axs[0].plot(x_test, y_pred_test, 'o', color='green', label='Predicted by model (test)')
            # plot the actual model including point inbeweeen:
            x1 = np.linspace(min_x, max_x, 5*N_POINTS)
            axs[0].plot(x1, poly_func(x1),'k--', label=f'Model fit')

            axs[0].set_title(f' A fit of some data points with some simplistic model')
            axs[0].set_xlabel('x : The data we have as predictor(s)')
            axs[0].set_ylabel('y : The Value we attempt to predict')


        else:
            colors.append('black')
    # there are better ways of doing this, but this is simpler to explain:
    y_min = min(min(y),min(y_pred_test),min(y_pred_train))
    y_max = max(max(y),max(y_pred_test),max(y_pred_train))
    axs[0].set_ylim(1.1*y_min,1.1*y_max)

    #plot the accuracies
    axs[1].scatter(train_MSE_dict.keys(), train_MSE_dict.values(), color = colors, label='Error (entrenamiento)')
    axs[1].plot(train_MSE_dict.keys(), train_MSE_dict.values(),"k--")
    if test_visible:
        axs[1].scatter(test_MSE_dict.keys(), test_MSE_dict.values(), color = [c if c == 'red' else 'green' for c in colors], label='Error (test)')
        axs[1].plot(test_MSE_dict.keys(), test_MSE_dict.values(),"g--")
    axs[1].axvline(x=degree, color='red', linestyle='--', label='error')
    axs[1].set_xlabel('<-- lower -- Model Complexity  -- higher-->')
    axs[1].set_ylabel('Mean Squared Error')
    axs[1].set_yscale("log")


    fig.tight_layout()
    axs[0].legend(loc='upper left',)

    #accuracy_score()

    #fig.show()

# Create interaction
interact(plot_polynomial_fit, 
         degree=IntSlider(description='Model Complexity', 
                          min=1, 
                          max=MAX_DEGREE, 
                          step=1, 
                          value=5, 
                          layout=Layout(width='600px'),  # Set the overall width
                          style={'description_width (polinomial degree)': '400px'},  # Increase the description width
                          height='60px'),
         test_visible=False)

interactive(children=(IntSlider(value=5, description='Model Complexity', layout=Layout(width='600px'), max=10,…

<function __main__.plot_polynomial_fit(degree, test_visible)>

In [3]:
# from ipywidgets.embed import embed_minimal_html
# embed_minimal_html('export.html', views=[plot_polynomial_fit], title='Widgets export')

In [4]:
### What happens if i try to use that model with high variance for prediction?