### About
$\bullet$ Using the GaussianProcessRegression library from scikit-learn for polynomial regression
<br> $\bullet$ Source: https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.GaussianProcessRegressor.html
<br> $\bullet$ Last worked on: August 13, 2021

### Import libraries

In [None]:
%matplotlib qt

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from warnings import catch_warnings
from warnings import simplefilter
from sklearn.gaussian_process import GaussianProcessRegressor
from pandas.plotting import register_matplotlib_converters

### $\downarrow$ Function to enable interactive plotting

In [None]:
def onpick(event):
    # on the pick event, find the orig line corresponding to the
    # legend proxy line, and toggle the visibility
    legline = event.artist
    origline = lined[legline]
    vis = not origline.get_visible()
    origline.set_visible(vis)
    # Change the alpha on the line in the legend so we can see what lines
    # have been toggled
    if vis:
        legline.set_alpha(1.0)
    else:
        legline.set_alpha(0.2)
    fig1.canvas.draw()

    return

In [None]:
def onpick2(event):
    # on the pick event, find the orig line corresponding to the
    # legend proxy line, and toggle the visibility
    legline = event.artist
    origline = lined2[legline]
    vis = not origline.get_visible()
    origline.set_visible(vis)
    # Change the alpha on the line in the legend so we can see what lines
    # have been toggled
    if vis:
        legline.set_alpha(1.0)
    else:
        legline.set_alpha(0.2)
    fig2.canvas.draw()

    return

### $\downarrow$ Plotting functions

In [None]:
def single_axis_plotting(input_df, sample_df):
    
    register_matplotlib_converters()
    
    Plot_dict = {}
    fig1, ax1 = plt.subplots(figsize = (15, 6))
    Plot_dict['sparse'], = ax1.plot(input_df['x'], input_df['y'], 'r.', markersize = 15, label = 'Sparse data')
    Plot_dict['surrogate'], = ax1.plot(sample_df['x'], sample_df['y'], 'teal', linewidth = 2.5, alpha = 0.7, label = 'Surrogate model fit')
        
    ax1.set_xlabel('X', fontsize = 12)
    ax1.set_ylabel('Y', fontsize = 12)
    ax1.grid(True)
    leg1 = ax1.legend(loc='best', fontsize = 12, ncol=1, borderaxespad=0)

    lines1 = [Plot_dict[column_name] for column_name in ['sparse', 'surrogate']]
    lined = {}
    for legline1, origline1 in zip(leg1.get_lines(), lines1):
        legline1.set_picker(5)  # 5 pts tolerance
        lined[legline1] = origline1

    fig1.canvas.mpl_connect('pick_event', onpick)
    ax1.tick_params(labelsize=15)
    plt.show()
    
    return fig1, lined

In [None]:
def double_axis_plotting(input_df, sample_df):
    
    register_matplotlib_converters()

    Plot_dict = {}
    fig2,(ax1,ax2) = plt.subplots(2, 1, sharex=True, facecolor='w', figsize = (15, 9))
    Plot_dict['sparse'], = ax1.plot(input_df['x'], input_df['y'], 'r.', markersize = 15, label = 'Sparse data')
    Plot_dict['surrogate'], = ax1.plot(sample_df['x'], sample_df['y'], 'teal', linewidth = 2.5, alpha = 0.7, \
                                       label = 'Surrogate model fit')
#     ax1.set_xlabel('X', fontsize = 12)
    ax1.set_ylabel('Y', fontsize = 12)
    ax1.grid(True)

    Plot_dict['surrogate_dev'], = ax2.plot(sample_df['x'], sample_df['y (std dev)'], 'tomato', linewidth = 2.5, alpha = 0.7, \
                                       label = 'Surrogate model confidence')
    ax2.set_xlabel('X', fontsize = 18)
    ax2.set_ylabel('Standard Deviation', fontsize = 18)
    ax2.grid(True)

    ax1.spines['bottom'].set_visible(False)
    ax2.spines['top'].set_visible(False)

    d = .012
    kwargs = dict(transform=ax1.transAxes, color='k', clip_on=False)
    ax1.plot((-d, +d), (-d, +d), **kwargs)                                        # bottom-left diagonal
    ax1.plot((1 - d, 1 + d), (-d, +d), **kwargs)                                  # bottom-right diagonal

    kwargs.update(transform=ax2.transAxes)                                        # switch to the fourth axes
    ax2.plot((-d, +d), (1 - d, 1 + d), **kwargs)                                  # top-left diagonal
    ax2.plot((1 - d, 1 + d), (1 - d, 1 + d), **kwargs)                            # top-right diagonal

    leg1 = ax1.legend(loc='best', fontsize = 15, ncol=1, borderaxespad=0)
    lines1 = [Plot_dict[column_name_ax1] for column_name_ax1 in ['sparse', 'surrogate']]
    lined2 = {}
    for legline1, origline1 in zip(leg1.get_lines(), lines1):
        legline1.set_picker(5)  # 5 pts tolerance
        lined2[legline1] = origline1

    leg2 = ax2.legend(loc='best', fontsize = 15, ncol=1, borderaxespad=0)
    lines2 = [Plot_dict[column_name_ax2] for column_name_ax2 in ['surrogate_dev']]
    for legline2, origline2 in zip(leg2.get_lines(), lines2):
        legline2.set_picker(5)  # 5 pts tolerance
        lined2[legline2] = origline2

    fig2.canvas.mpl_connect('pick_event', onpick2)
    
    ax1.tick_params(labelsize=15)
    ax2.tick_params(labelsize=15)
    
    plt.show()
    
    return fig2, lined2

### Read data

In [None]:
filename = 'Dataset 1'
input_df = pd.read_csv(r'{}.csv'.format(filename), low_memory=False)

In [None]:
input_df = input_df.drop(columns=['Unnamed: 0'])
input_df

In [None]:
X = np.vstack(input_df['x'])
y = np.vstack(input_df['y'])

### Regression

In [None]:
def surrogate(model, X): 
    # catch any warning generated when making a prediction
    with catch_warnings():
        # ignore generated warnings
        simplefilter('ignore')
        return model.predict(X, return_std = True)

In [None]:
# Define model
model = GaussianProcessRegressor()

# Fit data
model.fit(X, y)

# Get the Surrogate model
Xsamples = np.asarray(np.arange(1, len(input_df)-1, 0.01))
Xsamples = Xsamples.reshape(len(Xsamples), 1)
ysamples,y_std = surrogate(model, Xsamples)

In [None]:
dict_samples = {'x':np.hstack(Xsamples), 'y':np.hstack(ysamples), 'y (std dev)':np.hstack(y_std)}
df_samples = pd.DataFrame(data = dict_samples, index = range(len(Xsamples)))
df_samples

### Single axis plotting

In [None]:
fig1, lined = single_axis_plotting(input_df, df_samples)

### Dual axis plotting

In [None]:
fig2, lined2 = double_axis_plotting(input_df, df_samples)