## Step 0: Install FLAML

In [1]:
!pip install flaml
from flaml import AutoML

Collecting flaml
  Downloading FLAML-2.1.2-py3-none-any.whl (296 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/296.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m112.6/296.7 kB[0m [31m3.1 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m296.7/296.7 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: flaml
Successfully installed flaml-2.1.2


In [2]:
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_california_housing

X, y = fetch_california_housing(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Step 1: FLAML (AutoML)

In [3]:
# Initialize an AutoML instance
automl = AutoML()
# Specify automl goal and constraint
automl_settings = {
    "time_budget": 20,  # in seconds
    "metric": "r2",
    "task": "regression"
}

automl.fit(X_train=X_train, y_train=y_train, **automl_settings)

[flaml.automl.logger: 04-20 21:44:33] {1680} INFO - task = regression
[flaml.automl.logger: 04-20 21:44:33] {1691} INFO - Evaluation method: holdout
[flaml.automl.logger: 04-20 21:44:33] {1789} INFO - Minimizing error metric: 1-r2
[flaml.automl.logger: 04-20 21:44:33] {1901} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'xgboost', 'extra_tree', 'xgb_limitdepth']
[flaml.automl.logger: 04-20 21:44:33] {2219} INFO - iteration 0, current learner lgbm
[flaml.automl.logger: 04-20 21:44:33] {2345} INFO - Estimated sufficient time budget=786s. Estimated necessary time budget=6s.
[flaml.automl.logger: 04-20 21:44:33] {2392} INFO -  at 0.1s,	estimator lgbm's best error=0.7234,	best estimator lgbm's best error=0.7234
[flaml.automl.logger: 04-20 21:44:33] {2219} INFO - iteration 1, current learner lgbm
[flaml.automl.logger: 04-20 21:44:34] {2392} INFO -  at 0.2s,	estimator lgbm's best error=0.7234,	best estimator lgbm's best error=0.7234
[flaml.automl.logger: 04-20 21:44:34] {2219} INFO

In [4]:
print("best model:")
print(automl.model.estimator)
# evaluate the model performance
r2_score(y_true = y_test, y_pred = automl.predict(X_test))

best model:
LGBMRegressor(colsample_bytree=0.5967846088487322,
              learning_rate=0.09348689572544734, max_bin=127,
              min_child_samples=4, n_estimators=1, n_jobs=-1, num_leaves=69,
              reg_alpha=0.006958608037974516, reg_lambda=0.001895876878997586,
              verbose=-1)


0.840920034588206

## Step 2: Random Forests

In [5]:
from sklearn.ensemble import RandomForestRegressor
regr = RandomForestRegressor()
regr.fit(X_train, y_train)
r2_score(y_true = y_test, y_pred = regr.predict(X_test))

0.8032544946756442