ray-project · inventormc · Apr 24, 2020 · Apr 17, 2020 · Apr 17, 2020 · Apr 17, 2020
diff --git a/.travis.yml b/.travis.yml
@@ -9,14 +9,14 @@ matrix:
     - os: osx
       osx_image: xcode9.4
       language: generic
-      env: PYTHON=3.6 PYTHONWARNINGS=ignore
+      env: PYTHON=3.6 PYTHONWARNINGS=ignore OS=MAC
     - os: linux
       python: 3.7
       env: PYTHON=3.7
     - os: osx
       osx_image: xcode10.2
       language: generic
-      env: PYTHON=3.7 PYTHONWARNINGS=ignore
+      env: PYTHON=3.7 PYTHONWARNINGS=ignore OS=MAC
     - os: linux
       env: LINT=1 PYTHONWARNINGS=ignore
       # before_install:
@@ -34,6 +34,7 @@ install:
   - pip3 install -r requirements.txt
 
 script:
+  - if [ "$OS" == "MAC" ]; then brew install libomp; fi
   - python3 setup.py install
   - cd tests
   - python3 -m unittest test_randomizedsearch.py
@@ -42,8 +43,10 @@ script:
   - python3 random_forest.py
   - python3 sgd.py
   - python3 torch_nn.py
+  - python3 lgbm.py
   - python3 xgbclassifier.py
   - python3 keras_example.py
+  - python3 sklearn_pipeline.py
 
 cache:
   directories:

diff --git a/examples/lgbm.py b/examples/lgbm.py
@@ -0,0 +1,41 @@
+"""Example using LightGBM with TuneRandomizedSearchCV.
+
+Example taken from https://mlfromscratch.com/gridsearch-keras-sklearn/#/
+"""
+
+from tune_sklearn.tune_search import TuneRandomizedSearchCV
+import lightgbm as lgb
+from sklearn.datasets import load_breast_cancer
+from sklearn.model_selection import train_test_split
+
+# Load breast cancer dataset
+cancer = load_breast_cancer()
+X = cancer.data
+y = cancer.target
+
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y, test_size=0.2, random_state=42)
+
+model = lgb.LGBMClassifier()
+param_dists = {
+    "n_estimators": [400, 700, 1000],
+    "colsample_bytree": [0.7, 0.8],
+    "max_depth": [15, 20, 25],
+    "num_leaves": [50, 100, 200],
+    "reg_alpha": [1.1, 1.2, 1.3],
+    "reg_lambda": [1.1, 1.2, 1.3],
+    "min_split_gain": [0.3, 0.4],
+    "subsample": [0.7, 0.8, 0.9],
+    "subsample_freq": [20]
+}
+
+gs = TuneRandomizedSearchCV(model, param_dists, n_iter=5, scoring="accuracy")
+gs.fit(X_train, y_train)
+print(gs.cv_results_)
+
+pred = gs.predict(X_test)
+correct = 0
+for i in range(len(y_test)):
+    if pred[i] == y_test[i]:
+        correct += 1
+print("Accuracy:", correct / len(pred))
diff --git a/examples/sklearn_pipeline.py b/examples/sklearn_pipeline.py
@@ -0,0 +1,31 @@
+"""Example using an sklearn Pipeline with TuneGridSearchCV.
+
+Example taken and modified from
+https://scikit-learn.org/stable/auto_examples/compose/
+plot_compare_reduction.html
+"""
+
+from tune_sklearn.tune_search import TuneGridSearchCV
+from sklearn.datasets import load_digits
+from sklearn.pipeline import Pipeline
+from sklearn.svm import LinearSVC
+from sklearn.decomposition import PCA, NMF
+
+pipe = Pipeline([
+    # the reduce_dim stage is populated by the param_grid
+    ("reduce_dim", "passthrough"),
+    ("classify", LinearSVC(dual=False, max_iter=10000))
+])
+
+N_FEATURES_OPTIONS = [2, 4, 8]
+C_OPTIONS = [1, 10]
+param_grid = {
+    "reduce_dim": [PCA(iterated_power=7), NMF()],
+    "reduce_dim__n_components": N_FEATURES_OPTIONS,
+    "classify__C": C_OPTIONS
+}
+
+grid = TuneGridSearchCV(pipe, param_grid=param_grid)
+X, y = load_digits(return_X_y=True)
+grid.fit(X, y)
+print(grid.cv_results_)
diff --git a/requirements.txt b/requirements.txt
@@ -10,5 +10,6 @@ xgboost
 torch
 torchvision
 skorch
+lightgbm
 keras
 tensorflow
diff --git a/tune_sklearn/tune_search.py b/tune_sklearn/tune_search.py
@@ -72,7 +72,6 @@ def _setup(self, config):
             "early_stopping_max_epochs")
         self.cv = config.pop("cv")
         self.return_train_score = config.pop("return_train_score")
-
         self.estimator_config = config
 
         if self.early_stopping:
@@ -584,7 +583,7 @@ def _clean_config_dict(self, config):
                 "early_stopping_max_epochs",
                 "return_train_score",
         ]:
-            config.pop(key)
+            config.pop(key, None)
         return config
 
     def _format_results(self, n_splits, out):
@@ -894,15 +893,19 @@ def _fill_config_hyperparam(self, config):
         for key, distribution in self.param_distributions.items():
             if isinstance(distribution, list):
                 import random
-                config[key] = tune.sample_from((lambda d: lambda spec:
-                                                d[random.randint(
-                                                    0, len(d) - 1)])
-                                               (distribution))
+
+                def get_sample(dist):
+                    return lambda spec: dist[random.randint(0, len(dist) - 1)]
+
+                config[key] = tune.sample_from(get_sample(distribution))
                 samples *= len(distribution)
             else:
                 all_lists = False
-                config[key] = tune.sample_from(
-                    (lambda d: lambda spec: d.rvs(1)[0])(distribution))
+
+                def get_sample(dist):
+                    return lambda spec: dist.rvs(1)[0]
+
+                config[key] = tune.sample_from(get_sample(distribution))
         if all_lists:
             self.num_samples = min(self.num_samples, samples)