# TPOT for Regression
In this section, we will use TPOT to discover a model for the auto insurance dataset.

In [1]:
# import libraries
from pandas import read_csv
from sklearn.model_selection import RepeatedKFold
from tpot import TPOTRegressor



In [6]:
# load dataset
url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/auto-insurance.csv'
dataframe = read_csv(url, header=None)
# split into input and output elements
data = dataframe.values
data = data.astype('float32')
X, y = data[:, :-1], data[:, -1]
print(X.shape, y.shape)

(63, 1) (63,)


In [7]:
# define evaluation procedure
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)

In [8]:
...
# define search
model = TPOTRegressor(generations=5, population_size=50, scoring='neg_mean_absolute_error', cv=cv, verbosity=2, random_state=1, n_jobs=-1)

In [9]:
...
# perform the search
model.fit(X, y)
# export the best model
model.export('tpot_insurance_best_model.py')

HBox(children=(FloatProgress(value=0.0, description='Optimization Progress', max=300.0, style=ProgressStyle(de…


Generation 1 - Current best internal CV score: -29.147625969129034
Generation 2 - Current best internal CV score: -29.147625969129034
Generation 3 - Current best internal CV score: -29.128768841518855
Generation 4 - Current best internal CV score: -28.938324906842755
Generation 5 - Current best internal CV score: -28.938324906842755
Best pipeline: LinearSVR(CombineDFs(ZeroCount(input_matrix), input_matrix), C=1.0, dual=False, epsilon=0.0001, loss=squared_epsilon_insensitive, tol=0.001)


now run whole example with best model (LinearSVR) 

In [13]:
# example of fitting a final model and making a prediction on the insurance dataset
from pandas import read_csv
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVR
from sklearn.pipeline import make_pipeline, make_union
from tpot.export_utils import set_param_recursive
from tpot.builtins import StackingEstimator, ZeroCount
from sklearn.preprocessing import FunctionTransformer
from copy import copy
# load dataset
url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/auto-insurance.csv'
dataframe = read_csv(url, header=None)
# split into input and output elements
data = dataframe.values
data = data.astype('float32')
X, y = data[:, :-1], data[:, -1]
# Average CV score on the training set was: -28.938324906842755
exported_pipeline = make_pipeline(
    make_union(
        ZeroCount(),
        FunctionTransformer(copy)
    ),
    LinearSVR(C=1.0, dual=False, epsilon=0.0001, loss="squared_epsilon_insensitive", tol=0.001)
)
# Fix random state for all the steps in exported pipeline
set_param_recursive(exported_pipeline.steps, 'random_state', 1)
# fit the model
exported_pipeline.fit(X, y)
# make a prediction on a new row of data
row = [108]
yhat = exported_pipeline.predict([row])
print('Predicted: %.3f' % yhat[0])

Predicted: 389.642


read article for more detials https://machinelearningmastery.com/tpot-for-automated-machine-learning-in-python/