# Linear regression (aka ordinary least squares)

In [2]:
#Imports from sklearn
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import mglearn 

In [3]:
# Get the data, split it into training and test sets, and fit a linear regression model
X, y = mglearn.datasets.make_wave(n_samples=60)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
lr = LinearRegression().fit(X_train, y_train)

In [6]:
#Print the coefficients and intercept of the model
#The coeficient is the slope of the line, and the intercept is the offset in the y-axis: in the book it is called w[0] and b
print("lr.coef_: {}".format(lr.coef_))
print("lr.intercept_: {}".format(lr.intercept_))

print("Training set score: {:.2f}".format(lr.score(X_train, y_train)))
print("Test set score: {:.2f}".format(lr.score(X_test, y_test)))

lr.coef_: [0.39390555]
lr.intercept_: -0.031804343026759746
Training set score: 0.67
Test set score: 0.66


The scores on the training and test sets are very close together. This means we are likely underfitting.

In [7]:
#Testing the model with the Boston Housing dataset
X, y = mglearn.datasets.load_extended_boston()
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
lr = LinearRegression().fit(X_train, y_train)
print("Training set score: {:.2f}".format(lr.score(X_train, y_train)))
print("Test set score: {:.2f}".format(lr.score(X_test, y_test)))