# auto-sklearn

ディープラーニングモデルを自動構築するPythonライブラリ

## データセットの読み込み(ボストン住宅価格)

In [1]:
from sklearn.datasets import load_boston
import pandas as pd
from sklearn.model_selection import train_test_split

boston = load_boston()
df_x_boston = pd.DataFrame(boston['data'], columns=boston['feature_names'])
df_y_boston = pd.DataFrame(boston['target'], columns=['MEDV'])
df_boston = pd.concat([df_x_boston, df_y_boston], axis=1)

x_train, x_test, y_train, y_test = train_test_split(df_x_boston, df_y_boston, test_size=0.3, random_state=0)

df_train = pd.concat([x_train, y_train], axis=1)
df_test = pd.concat([x_test, y_test], axis=1)

In [2]:
x_train

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
141,1.62864,0.0,21.89,0.0,0.624,5.019,100.0,1.4394,4.0,437.0,21.2,396.90,34.41
272,0.11460,20.0,6.96,0.0,0.464,6.538,58.7,3.9175,3.0,223.0,18.6,394.96,7.73
135,0.55778,0.0,21.89,0.0,0.624,6.335,98.2,2.1107,4.0,437.0,21.2,394.67,16.96
298,0.06466,70.0,2.24,0.0,0.400,6.345,20.1,7.8278,5.0,358.0,14.8,368.24,4.97
122,0.09299,0.0,25.65,0.0,0.581,5.961,92.9,2.0869,2.0,188.0,19.1,378.09,17.93
...,...,...,...,...,...,...,...,...,...,...,...,...,...
323,0.28392,0.0,7.38,0.0,0.493,5.708,74.3,4.7211,5.0,287.0,19.6,391.13,11.74
192,0.08664,45.0,3.44,0.0,0.437,7.178,26.3,6.4798,5.0,398.0,15.2,390.49,2.87
117,0.15098,0.0,10.01,0.0,0.547,6.021,82.6,2.7474,6.0,432.0,17.8,394.51,10.30
47,0.22927,0.0,6.91,0.0,0.448,6.030,85.5,5.6894,3.0,233.0,17.9,392.74,18.80


In [3]:
y_train

Unnamed: 0,MEDV
141,14.4
272,24.4
135,18.1
298,22.5
122,20.5
...,...
323,18.5
192,36.4
117,19.2
47,16.6


In [4]:
from pprint import pprint
import sklearn.metrics

import autosklearn.regression

In [5]:
tmp_folder='/tmp/sample/autosklearn'
!rm -rf {tmp_folder}

In [6]:
automl = autosklearn.regression.AutoSklearnRegressor(
    time_left_for_this_task=1000,
    per_run_time_limit=100,
    tmp_folder=tmp_folder,
    memory_limit=None,
)
automl.fit(x_train, y_train, dataset_name='boston')



  self.metafeatures = self.metafeatures.append(metafeatures)
  self.algorithm_runs[metric].append(runs)


AutoSklearnRegressor(memory_limit=None, per_run_time_limit=100,
                     time_left_for_this_task=1000,
                     tmp_folder='/tmp/sample/autosklearn')

In [7]:
print(automl.leaderboard())

          rank  ensemble_weight               type      cost  duration
model_id                                                              
11           1             0.44        extra_trees  0.056418  1.909549
140          2             0.16  gradient_boosting  0.064094  0.651229
9            3             0.02  gradient_boosting  0.066737  0.816513
165          4             0.14           adaboost  0.070369  1.149160
163          5             0.10           adaboost  0.071310  3.732050
71           6             0.14  gradient_boosting  0.071793  2.073982


In [8]:
train_pred = automl.predict(x_train)
test_pred = automl.predict(x_test)

In [9]:
print("Train MAE score:", sklearn.metrics.mean_absolute_error(y_train, train_pred))
print("Test MAE score:", sklearn.metrics.mean_absolute_error(y_test, test_pred))

Train MAE score: 0.8214889575322133
Test MAE score: 2.5812213965348505


In [10]:
print("Train R2 score:", sklearn.metrics.r2_score(y_train, train_pred))
print("Test R2 score:", sklearn.metrics.r2_score(y_test, test_pred))

Train R2 score: 0.9796309028754003
Test R2 score: 0.7821445958907044
