In [1]:
#Test-Train Data Splits 1-Applications
# https://realpython.com/train-test-split-python-data/#regression-example

import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

In [2]:
# Minimalist Example of Linear Regression

x = np.arange(20).reshape(-1, 1)
y = np.array([5, 12, 11, 19, 30, 29, 23, 40, 51, 54, 74,
              62, 68, 73, 89, 84, 89, 101, 99, 106])
x

array([[ 0],
       [ 1],
       [ 2],
       [ 3],
       [ 4],
       [ 5],
       [ 6],
       [ 7],
       [ 8],
       [ 9],
       [10],
       [11],
       [12],
       [13],
       [14],
       [15],
       [16],
       [17],
       [18],
       [19]])

In [3]:
y

array([  5,  12,  11,  19,  30,  29,  23,  40,  51,  54,  74,  62,  68,
        73,  89,  84,  89, 101,  99, 106])

In [4]:
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=8, random_state=0
)

In [5]:
# use the training set to fit the model

# LinearRegression creates the object that represents the model
# .fit() trains, or fits, the model and returns it
model = LinearRegression().fit(x_train, y_train)
model.intercept_

model.coef_

array([5.53121801])

In [6]:
# An unbiased estimation of the predictive performance of your model is based on test data
# .score() returns the coefficient of determination, or R², for the data passed
model.score(x_train, y_train)

0.9868175024574795

In [7]:
model.score(x_test, y_test)

0.9465896927715023

In [8]:
# Three Regression Examples:

from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split

In [9]:
# load the data
x, y = load_boston(return_X_y=True)

In [10]:
# split the data
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.4, random_state=0
)

In [11]:
# 1 Linear Regression

from sklearn.linear_model import LinearRegression
model = LinearRegression().fit(x_train, y_train)
model.score(x_train, y_train)

0.7668160223286261

In [12]:
model.score(x_test, y_test)

0.688260714253803

In [13]:
# 2 Gradient Boost Regressor

from sklearn.ensemble import GradientBoostingRegressor
model = GradientBoostingRegressor(random_state=0).fit(x_train, y_train)
model.score(x_train, y_train)

0.9859065238883613

In [14]:
model.score(x_test, y_test)

0.8530127436482149

In [15]:
# 3 Random Forest Regressor

from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor(random_state=0).fit(x_train, y_train)
model.score(x_train, y_train)

0.9811695664860354

In [16]:
model.score(x_test, y_test)

0.8325867908704008