# Import Library

In [None]:
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt
from matplotlib.pylab import rcParams
from sklearn.metrics import mean_absolute_error

%matplotlib inline
rcParams['figure.figsize'] = 8, 5

# Generate Sin(x) Dataset

In [None]:
# define input array with angles from 60deg to 300deg converted to radians
x = np.array([i*np.pi/180 for i in range(60, 300, 4)])
np.random.seed(100)  # Setting seed for reproducability
y = np.sin(x) + np.random.normal(0, 0.15, len(x))  # 加上雜訊

data = pd.DataFrame(np.column_stack([x, y]), columns=['x', 'y'])
plt.plot(data['x'], data['y'], '.')

# Generate New Features with higher power 
自行產生新的feature，x^2～x^15

In [None]:
for i in range(2, 16):  # power of 1 is already there
    colname = 'x_%d' % i      # new var will be x_power
    data[colname] = data['x']**i
data.head()

# 0. Function definition

In [None]:
from sklearn.linear_model import LinearRegression, Lasso, Ridge

In [None]:
def example_regression(data, power, plot_dict, reg_type, alpha = 0):
    # define estimator object
    type_dict = {'Linear':LinearRegression(),
                 'Lasso':Lasso(alpha = alpha),
                 'Ridge':Ridge(alpha = alpha)}
    
    # generate X in order
    X = ['x']
    if power >=2:
        X.extend(['x_%d'%i for i in range(2, power+1)])
    
    # fit the model
    if reg_type in type_dict:
        model = type_dict[reg_type]
    model.fit(data[X],data['y'])
    y_pred = model.predict(data[X])
    mae = mean_absolute_error(y_pred, y)
    # check if a plot is to be made for the entered power
    if power in plot_dict:
        plt.subplot(plot_dict[power])
        plt.tight_layout()
        plt.plot(data['x'], data['y'], '.')
        plt.plot(data['x'],y_pred)
        plt.title('Plot for power: %d'%power + '\n' + 'mae:%.2f'%mae)
        
    # return the result in pre-defined format
    rss = sum((y_pred-data['y'])**2)
    ret = [rss]
    ret.extend([model.intercept_])
    ret.extend(model.coef_)
    return ret

In [None]:
# initialize a dataframe to store the results:
col = ['rss', 'intercept'] + ['coef_x_%d' % i for i in range(1, 16)]
ind = ['pow_%d' % i for i in range(1, 16)]

perf_Linear = pd.DataFrame(index=ind, columns=col)

# define the powers for which a plot is required: {power:where}
plot_dict = {1:231, 3:232, 6:233, 9:234, 12:235, 15:236}

# iterate through all powers and assimilate results
for i in range(1, 16):
    perf_Linear.iloc[i-1, 0:i+2] = example_regression(data, power=i, plot_dict=plot_dict, reg_type='Linear')

In [None]:
pd.options.display.float_format = '{:,.2g}'.format
perf_Linear