# Auto ML using TPOT

This time, I am using Auto ML.
Let's see which model will it choose under 10 minutes timeframe.

In [1]:
# import required libraries
import sys, os
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, make_scorer
from tpot import TPOTRegressor

local_module_path = os.path.abspath(os.path.join(os.path.dirname(os.getcwd()),'preprocess'))
sys.path.append(local_module_path)
from pscript import preprocess

In [2]:
# load the dataset
train = pd.read_csv("../../../dataset/train.csv")
test = pd.read_csv("../../../dataset/test.csv")

In [3]:
# preprocess the datasets
ptrain = preprocess(train)
ptest = preprocess(test)

In [4]:
# ready dataset for training and testing
rtrain = ptrain.drop(['id','accident_risk'], axis=1)
target = ptrain['accident_risk']

rtest = ptest.drop('id', axis=1)

In [5]:
# optimization methods
def rmse_func(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

rmse_scorer = make_scorer(rmse_func, greater_is_better=False)

In [6]:
# initialize the model
model = TPOTRegressor(verbose=3, max_eval_time_mins=30, max_time_mins=360, n_jobs=1, scorers=rmse_scorer)

In [None]:
# train the model
model.fit(rtrain, target)

In [None]:
print(model.fitted_pipeline_)

In [9]:
# predict the values
preds = model.predict(rtest)

In [10]:
prediction = []
for x in preds:
    prediction.append(round(x,3))

In [11]:
result = pd.DataFrame({
    'id': ptest['id'],
    'accident_risk': prediction
})

In [12]:
result.to_csv("../submissions/submission04.csv", index=False)