# Concept Session- Multiple Linear Regression

### Import the standard libraries numpy and (parts of) sklearn

In [1]:
# the following code snippet are taken from https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html

In [2]:
import numpy as np
from sklearn.linear_model import LinearRegression

from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

### Generating a toy data set

In [3]:
# in this example we use a very simple toy dataset

In [4]:
# we set some x-values

In [5]:
X = np.array([[1, 10], [1, 20], [2, 20], [2, 30], [5, 40], [-2, 30]])

In [6]:
# and construct corresponding y-values

In [7]:
# y = 4 * x_0 + 2 * x_1 + 13

y = np.dot(X, np.array([4, 2])) + 13

# add some random noise to the y-values

y = y + 50*np.random.randn(len(y))
y

array([-15.87138486,  30.39442769,  76.22198967, 103.06917331,
       111.01926289,  41.52606423])

In [8]:
# i.e. we are mapping 2D input values onto 1D target values

### Training the model

In [9]:
# we use the class sklearn.linear_model.LinearRegression in the following. 
# Before we actually fit a linear model to data, we have to 'configure' the linear model.

In [10]:
# We determine whether to use an intercept: "fit_intercept = True / False", default value is "True"

In [11]:
# and we determine whether to use normalized attributes: "normalize = True / False", default value is "False"

In [1]:
# Now we create a "LinearRegression" object and fit it into the toy dataset

In [13]:
reg = LinearRegression().fit(X, y)

### Evaluating the model

In [14]:
# various scores for evaluation: root-mean-square error (RMSE), mean absolute error (MAE), R^2 score

In [15]:
# first, we evaluate the R^2 score (on training data, to check the quality of the fit)

In [16]:
reg.score(X, y)

0.7766081110537782

In [17]:
# We reach a value of approx. 0.5. We do not reach an ideal value of 1.0 as we added random noise that cannot be predicted.

In [18]:
mean_squared_error(y, reg.predict(X))

434.28517626887486

In [19]:
mean_absolute_error(y, reg.predict(X))

18.921294591882305

In [20]:
# Let's have a look at the learned model parameters:

In [21]:
reg.coef_

array([7.07410156, 3.2271259 ])

In [2]:
# Above we decided that the linear model should have a learnable intercept. 
# Let's have a look at which value it takes on:

In [23]:
reg.intercept_

-33.5627111058762

### Inference

In [24]:
# Having optimized the model, we can now predict y-values for previously unseen input values x.

In [25]:
# Let's check the following two datapoints:

In [26]:
xnew1 = [3,5]

In [27]:
xnew2 = [-23,9]

In [28]:
# They can evaluate several input values at a time by passing them as array elements.

In [30]:
reg.predict(np.array([xnew1, xnew2]))

array([   3.79522309, -167.2229138 ])