In [None]:
%pip install auto-sklearn

Collecting auto-sklearn
  Downloading auto-sklearn-0.14.7.tar.gz (6.4 MB)
[K     |████████████████████████████████| 6.4 MB 7.1 MB/s 
Collecting distro
  Downloading distro-1.7.0-py3-none-any.whl (20 kB)
Collecting scipy>=1.7.0
  Downloading scipy-1.7.3-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (38.1 MB)
[K     |████████████████████████████████| 38.1 MB 10.5 MB/s 
Collecting scikit-learn<0.25.0,>=0.24.0
  Downloading scikit_learn-0.24.2-cp37-cp37m-manylinux2010_x86_64.whl (22.3 MB)
[K     |████████████████████████████████| 22.3 MB 11.8 MB/s 
[?25hCollecting dask>=2021.12
  Downloading dask-2022.2.0-py3-none-any.whl (1.1 MB)
[K     |████████████████████████████████| 1.1 MB 36.1 MB/s 
[?25hCollecting distributed>=2012.12
  Downloading distributed-2022.2.0-py3-none-any.whl (837 kB)
[K     |████████████████████████████████| 837 kB 45.0 MB/s 
Collecting liac-arff
  Downloading liac-arff-2.5.0.tar.gz (13 kB)
Collecting ConfigSpace<0.5,>=0.4.21
  Downloading ConfigSpace-

In [None]:
import sklearn.datasets
import sklearn.model_selection
import autosklearn.regression
import autosklearn.metrics
import pandas as pd

In [None]:
def dataSetup(name):
  X,y = sklearn.datasets.fetch_openml(data_id = name, as_frame=True, return_X_y=True)
  train = pd.concat([X, y], axis=1, join='inner')
  train.dropna()
  train = train.apply(lambda x: pd.factorize(x)[0])
  X,y = train.iloc[:,:-1], train.iloc[:, -1]
  # X = X.apply(lambda x: pd.factorize(x)[0])
  X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X,y,random_state=42)
  return (X_train, X_test, y_train, y_test)

In [None]:
X_train, X_test, y_train, y_test = dataSetup(225)
X_train.head()

Unnamed: 0,theta1,theta2,theta3,thetad1,thetad2,thetad3,tau1,tau2
6936,6931,6927,6929,6930,6929,6929,6918,6915
380,380,380,379,380,380,380,380,380
4098,4098,4093,4091,4096,4096,4095,4093,4093
2684,2684,2682,2681,2684,2684,2684,2682,2683
1912,1912,1910,1910,1912,1912,1912,1911,1912


In [None]:
estimator_askl = autosklearn.regression.AutoSklearnRegressor(time_left_for_this_task=3600,seed=42,resampling_strategy='cv',)
estimator_askl.fit(X_train, y_train, dataset_name='boston')
prediction = estimator_askl.predict(X_test)



In [None]:
(prediction == y_test).sum()/len(prediction)

0.0

In [None]:
print(estimator_askl.sprint_statistics())

auto-sklearn results:
  Dataset name: boston
  Metric: r2
  Best validation score: 0.999999
  Number of target algorithm runs: 146
  Number of successful target algorithm runs: 132
  Number of crashed target algorithm runs: 1
  Number of target algorithms that exceeded the time limit: 1
  Number of target algorithms that exceeded the memory limit: 12



In [None]:
from pprint import pprint
pprint(estimator_askl.show_models(), indent=4)

{   97: {   'cost': 6.732467163277137e-07,
            'ensemble_weight': 0.48,
            'estimators': [   {   'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f66e6ad9c10>,
                                  'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f66e6050cd0>,
                                  'regressor': <autosklearn.pipeline.components.regression.RegressorChoice object at 0x7f66e6057110>,
                                  'sklearn_regressor': KNeighborsRegressor(n_neighbors=47, p=1, weights='distance')},
                              {   'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f66e5f97d50>,
                                  'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f66e5faded0>,
                      

In [None]:
for run_key in estimator_askl.automl_.runhistory_.data:
    print('#########')
    print(run_key)
    print(estimator_askl.automl_.runhistory_.data[run_key])

#########
RunKey(config_id=1, instance_id='{"task_id": "boston"}', seed=0, budget=0.0)
RunValue(cost=1.0, time=60.214600801467896, status=<StatusType.MEMOUT: 5>, starttime=1651697611.5728996, endtime=1651697672.8889544, additional_info={'error': 'Memout (used more than 3072 MB).', 'configuration_origin': 'Initial design'})
#########
RunKey(config_id=2, instance_id='{"task_id": "boston"}', seed=0, budget=0.0)
RunValue(cost=0.00013958603189357947, time=22.182713985443115, status=<StatusType.SUCCESS: 1>, starttime=1651697672.9394155, endtime=1651697695.146645, additional_info={'duration': 21.932364225387573, 'num_run': 3, 'train_loss': 0.00011236727788201174, 'configuration_origin': 'Initial design'})
#########
RunKey(config_id=3, instance_id='{"task_id": "boston"}', seed=0, budget=0.0)
RunValue(cost=1.0, time=2.553816318511963, status=<StatusType.MEMOUT: 5>, starttime=1651697695.2015517, endtime=1651697698.78454, additional_info={'error': 'Memout (used more than 3072 MB).', 'configuratio

In [None]:
print("Accuracy score", sklearn.metrics.r2_score(y_test, prediction))

Accuracy score 0.999999666478939


In [None]:
print(estimator_askl.leaderboard())

          rank  ensemble_weight                 type          cost  duration
model_id                                                                    
140          1             0.02  k_nearest_neighbors  6.354001e-07  3.231696
146          2             0.48  k_nearest_neighbors  6.521273e-07  3.969275
97           3             0.48  k_nearest_neighbors  6.732467e-07  2.783325
108          4             0.02  k_nearest_neighbors  1.149004e-06  3.365221


In [None]:
print("Accuracy score", sklearn.metrics.mean_squared_error(y_test, prediction))

Accuracy score 1.8877127483081704
