In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import metrics
import numpy as np
from sklearn.metrics import zero_one_loss, accuracy_score
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
import wandb

idle_time_data = pd.read_csv('../data/final_df_points_18_21_class.csv')

TargetVariable = ['idle_time']
Predictors = ['bike_id', 'lat', 'lng', 'temp', 'rain', 'snow', 'wind_speed', 'humidity', 'dt_start'
    , 'hex_enc', 'start_min', 'year', 'month', 'day', 'on_station', 'in_zone', 'zone_name_enc']

X = idle_time_data[Predictors].values
y = idle_time_data[TargetVariable].values

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, shuffle=False)

In [2]:
sweep_configuration_rfr = {
    "project": "RandomForestRegressor",
    "name": "my-awesome-sweep",
    "metric": {"name": "accuracy", "goal": "maximize"},
    "method": "random",
    "parameters": {
        "n_estimators": {
            "values": [8, 16, 32, 64, 128, 256, 512]
        },
        "criterion": {
            "values": ['squared_error', 'absolute_error', 'poisson']
        },
        "max_depth": {
            "values": [2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 120, 140, 160, 180, 200, None]
        },
        "bootstrap": {
            "values": [True, False]
        },
        "max_features": {
            "values": ['auto', 'sqrt', 'log2']
        },
        "min_samples_leaf": {
            "values": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 18, 20, 25, 30]
        },
        "min_samples_split": {
            "values": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 18, 20, 25, 30]
        }
    }
}

In [3]:
def my_train_func():
    wandb.init()

    _n_estimators = wandb.config.n_estimators
    _criterion = wandb.config.criterion
    _max_depth = wandb.config.max_depth
    _bootstrap = wandb.config.bootstrap
    _max_features = wandb.config.max_features
    _min_samples_leaf = wandb.config.min_samples_leaf
    _min_samples_split = wandb.config.min_samples_split

    model = RandomForestRegressor(n_estimators=_n_estimators,
                                  criterion=_criterion,
                                  max_depth=_max_depth,
                                  bootstrap=_bootstrap,
                                  max_features=_max_features,
                                  min_samples_leaf=_min_samples_leaf,
                                  min_samples_split=_min_samples_split,
                                  n_jobs=-1)

    model.fit(X_train, y_train.ravel())
    y_pred = model.predict(X_test)

    score_training = model.score(X_train, y_train.ravel())
    score_validation = model.score(X_test, y_test.ravel())
    rmse = metrics.mean_squared_error(y_test.ravel(), y_pred.ravel())

    wandb.log({"score_training": score_training, "score_validation": score_validation, "rmse": rmse})

In [4]:
# INIT SWEEP
sweep_id_rfc = wandb.sweep(sweep_configuration_rfr, project="RandomForestRegressor")
# RUN SWEEP
wandb.agent(sweep_id_rfc, function=my_train_func)

Create sweep with ID: ojmoz3d0
Sweep URL: https://wandb.ai/jonathanweske/RandomForestRegressor/sweeps/ojmoz3d0


[34m[1mwandb[0m: Agent Starting Run: yp0c855k with config:
[34m[1mwandb[0m: 	bootstrap: True
[34m[1mwandb[0m: 	criterion: squared_error
[34m[1mwandb[0m: 	max_depth: 120
[34m[1mwandb[0m: 	max_features: auto
[34m[1mwandb[0m: 	min_samples_leaf: 18
[34m[1mwandb[0m: 	min_samples_split: 11
[34m[1mwandb[0m: 	n_estimators: 512
[34m[1mwandb[0m: Currently logged in as: [33mjonathanweske[0m (use `wandb login --relogin` to force relogin)





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run yp0c855k errored: ValueError("Classification metrics can't handle a mix of multiclass and continuous targets")
[34m[1mwandb[0m: [32m[41mERROR[0m Run yp0c855k errored: ValueError("Classification metrics can't handle a mix of multiclass and continuous targets")
[34m[1mwandb[0m: Agent Starting Run: 4nsdwa0j with config:
[34m[1mwandb[0m: 	bootstrap: True
[34m[1mwandb[0m: 	criterion: poisson
[34m[1mwandb[0m: 	max_depth: 5
[34m[1mwandb[0m: 	max_features: auto
[34m[1mwandb[0m: 	min_samples_leaf: 5
[34m[1mwandb[0m: 	min_samples_split: 30
[34m[1mwandb[0m: 	n_estimators: 512





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run 4nsdwa0j errored: ValueError("Classification metrics can't handle a mix of multiclass and continuous targets")
[34m[1mwandb[0m: [32m[41mERROR[0m Run 4nsdwa0j errored: ValueError("Classification metrics can't handle a mix of multiclass and continuous targets")
[34m[1mwandb[0m: Agent Starting Run: sizmy7qo with config:
[34m[1mwandb[0m: 	bootstrap: False
[34m[1mwandb[0m: 	criterion: poisson
[34m[1mwandb[0m: 	max_depth: 7
[34m[1mwandb[0m: 	max_features: auto
[34m[1mwandb[0m: 	min_samples_leaf: 11
[34m[1mwandb[0m: 	min_samples_split: 18
[34m[1mwandb[0m: 	n_estimators: 8





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run sizmy7qo errored: ValueError("Classification metrics can't handle a mix of multiclass and continuous targets")
[34m[1mwandb[0m: [32m[41mERROR[0m Run sizmy7qo errored: ValueError("Classification metrics can't handle a mix of multiclass and continuous targets")
[34m[1mwandb[0m: Agent Starting Run: v8h72z6z with config:
[34m[1mwandb[0m: 	bootstrap: True
[34m[1mwandb[0m: 	criterion: poisson
[34m[1mwandb[0m: 	max_depth: 3
[34m[1mwandb[0m: 	max_features: log2
[34m[1mwandb[0m: 	min_samples_leaf: 10
[34m[1mwandb[0m: 	min_samples_split: 11
[34m[1mwandb[0m: 	n_estimators: 256





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run v8h72z6z errored: ValueError("Classification metrics can't handle a mix of multiclass and continuous targets")
[34m[1mwandb[0m: [32m[41mERROR[0m Run v8h72z6z errored: ValueError("Classification metrics can't handle a mix of multiclass and continuous targets")
[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 2ubjpwoa with config:
[34m[1mwandb[0m: 	bootstrap: False
[34m[1mwandb[0m: 	criterion: squared_error
[34m[1mwandb[0m: 	max_depth: 20
[34m[1mwandb[0m: 	max_features: log2
[34m[1mwandb[0m: 	min_samples_leaf: 2
[34m[1mwandb[0m: 	min_samples_split: 20
[34m[1mwandb[0m: 	n_estimators: 128





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run 2ubjpwoa errored: ValueError("Classification metrics can't handle a mix of multiclass and continuous targets")
[34m[1mwandb[0m: [32m[41mERROR[0m Run 2ubjpwoa errored: ValueError("Classification metrics can't handle a mix of multiclass and continuous targets")
[34m[1mwandb[0m: Agent Starting Run: gtkja29h with config:
[34m[1mwandb[0m: 	bootstrap: False
[34m[1mwandb[0m: 	criterion: squared_error
[34m[1mwandb[0m: 	max_depth: None
[34m[1mwandb[0m: 	max_features: auto
[34m[1mwandb[0m: 	min_samples_leaf: 8
[34m[1mwandb[0m: 	min_samples_split: 30
[34m[1mwandb[0m: 	n_estimators: 64





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run gtkja29h errored: ValueError("Classification metrics can't handle a mix of multiclass and continuous targets")
[34m[1mwandb[0m: [32m[41mERROR[0m Run gtkja29h errored: ValueError("Classification metrics can't handle a mix of multiclass and continuous targets")
Detected 5 failed runs in a row at start, killing sweep.
[34m[1mwandb[0m: [32m[41mERROR[0m Detected 5 failed runs in a row at start, killing sweep.
[34m[1mwandb[0m: To change this value set WANDB_AGENT_MAX_INITIAL_FAILURES=val
