### Random Forest Classifier WandB Sweep

In [1]:
import pandas as pd

In [2]:
idle_time_data = pd.read_csv('../data/df_points/df_points_18_21_class.csv')

In [3]:
from sklearn.preprocessing import StandardScaler

TargetVariable = ['idle_time_class']
Predictors = ['bike_id', 'lat', 'lng', 'temp', 'rain', 'snow', 'dt_start', 'hex_enc', 'start_min', 'month', 'day']

X = idle_time_data[Predictors].values
y = idle_time_data[TargetVariable].values

PredictorScaler=StandardScaler()
PredictorScalerFit=PredictorScaler.fit(X)
X=PredictorScalerFit.transform(X)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.9, shuffle=False)
# , random_state=42

#### ggf irgendwann ts_cv zur cross validation

from sklearn.model_selection import TimeSeriesSplit

ts_cv = TimeSeriesSplit(n_splits=4,max_train_size=10000)

all_splits = list(ts_cv.split(X_train, y_train))
train_0, test_0 = all_splits[0]

In [4]:
from sklearn.metrics import zero_one_loss, accuracy_score
from sklearn.model_selection import cross_validate, cross_val_score, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
import wandb
import wandb

sweep_configuration_rfc = {
    "project": "RandomForestClass",
    "name": "my-awesome-sweep",
    "metric": {"name": "accuracy", "goal": "maximize"},
    "method": "random",
    "parameters": {
        "n_estimators": {
            "values": [8, 16, 32, 64, 128, 256]
        },
        "criterion": {
            "values": ['entropy','gini']
        },
        "max_depth": {
            "values": [2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 120, 140, 160, 180, 200, None]
        },
        "bootstrap": {
            "values": [True, False]
        },
        "max_features": {
            "values": ['auto', 'sqrt', 'log2']
        },
        "min_samples_leaf": {
            "values": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 25, 30]
        },
        "min_samples_split": {
            "values": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 18, 20, 25, 30]
        }
    }
}

def my_train_func():
    wandb.init()

    _n_estimators = wandb.config.n_estimators
    _criterion = wandb.config.criterion
    _max_depth = wandb.config.max_depth
    _bootstrap = wandb.config.bootstrap
    _max_features = wandb.config.max_features
    _min_samples_leaf = wandb.config.min_samples_leaf
    _min_samples_split = wandb.config.min_samples_split

    model = RandomForestClassifier(n_estimators=_n_estimators,
                                   criterion=_criterion,
                                   max_depth=_max_depth,
                                   bootstrap=_bootstrap,
                                   max_features=_max_features,
                                   min_samples_leaf=_min_samples_leaf,
                                   min_samples_split=_min_samples_split)

    model.fit(X_train,y_train.ravel())
    y_pred = model.predict(X_test)

    acc = zero_one_loss(y_test.ravel(), y_pred.ravel())
    loss = accuracy_score(y_test.ravel(), y_pred.ravel())

    wandb.sklearn.plot_feature_importances(model, Predictors)

    wandb.log({"accuracy": acc})
    wandb.log({"conf_matrix": wandb.plot.confusion_matrix(y_true=y_test.ravel(), preds=y_pred.ravel())})
    wandb.log({"feature_imp": wandb.sklearn.plot_feature_importances(model, Predictors)})
    wandb.log({"loss": loss})


# INIT SWEEP
sweep_id_rfc = wandb.sweep(sweep_configuration_rfc, project="RandomForestClass")
# RUN SWEEP
wandb.agent(sweep_id_rfc, function=my_train_func)


Create sweep with ID: 7aukrm52
Sweep URL: https://wandb.ai/jonathanweske/RandomForestClass/sweeps/7aukrm52


[34m[1mwandb[0m: Agent Starting Run: 7tirjsst with config:
[34m[1mwandb[0m: 	bootstrap: True
[34m[1mwandb[0m: 	criterion: entropy
[34m[1mwandb[0m: 	max_depth: 100
[34m[1mwandb[0m: 	max_features: log2
[34m[1mwandb[0m: 	min_samples_leaf: 25
[34m[1mwandb[0m: 	min_samples_split: 5
[34m[1mwandb[0m: 	n_estimators: 256
[34m[1mwandb[0m: Currently logged in as: [33mjonathanweske[0m (use `wandb login --relogin` to force relogin)





VBox(children=(Label(value='0.003 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.897954…

0,1
accuracy,▁
loss,▁

0,1
accuracy,0.42449
loss,0.57551


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: d93o6tx6 with config:
[34m[1mwandb[0m: 	bootstrap: False
[34m[1mwandb[0m: 	criterion: gini
[34m[1mwandb[0m: 	max_depth: 60
[34m[1mwandb[0m: 	max_features: auto
[34m[1mwandb[0m: 	min_samples_leaf: 6
[34m[1mwandb[0m: 	min_samples_split: 9
[34m[1mwandb[0m: 	n_estimators: 256





VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁
loss,▁

0,1
accuracy,0.41085
loss,0.58915


[34m[1mwandb[0m: Agent Starting Run: 4se9qin0 with config:
[34m[1mwandb[0m: 	bootstrap: False
[34m[1mwandb[0m: 	criterion: gini
[34m[1mwandb[0m: 	max_depth: 4
[34m[1mwandb[0m: 	max_features: log2
[34m[1mwandb[0m: 	min_samples_leaf: 14
[34m[1mwandb[0m: 	min_samples_split: 10
[34m[1mwandb[0m: 	n_estimators: 128





VBox(children=(Label(value='0.003 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.897322…

0,1
accuracy,▁
loss,▁

0,1
accuracy,0.51071
loss,0.48929


[34m[1mwandb[0m: Agent Starting Run: 6dzyh0l2 with config:
[34m[1mwandb[0m: 	bootstrap: False
[34m[1mwandb[0m: 	criterion: entropy
[34m[1mwandb[0m: 	max_depth: 120
[34m[1mwandb[0m: 	max_features: auto
[34m[1mwandb[0m: 	min_samples_leaf: 1
[34m[1mwandb[0m: 	min_samples_split: 11
[34m[1mwandb[0m: 	n_estimators: 128





VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁
loss,▁

0,1
accuracy,0.41107
loss,0.58893


[34m[1mwandb[0m: Agent Starting Run: i8f96khh with config:
[34m[1mwandb[0m: 	bootstrap: False
[34m[1mwandb[0m: 	criterion: entropy
[34m[1mwandb[0m: 	max_depth: 100
[34m[1mwandb[0m: 	max_features: sqrt
[34m[1mwandb[0m: 	min_samples_leaf: 16
[34m[1mwandb[0m: 	min_samples_split: 30
[34m[1mwandb[0m: 	n_estimators: 64





VBox(children=(Label(value='0.003 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.898296…

0,1
accuracy,▁
loss,▁

0,1
accuracy,0.41784
loss,0.58216


[34m[1mwandb[0m: Agent Starting Run: u9lu792p with config:
[34m[1mwandb[0m: 	bootstrap: True
[34m[1mwandb[0m: 	criterion: gini
[34m[1mwandb[0m: 	max_depth: 180
[34m[1mwandb[0m: 	max_features: log2
[34m[1mwandb[0m: 	min_samples_leaf: 2
[34m[1mwandb[0m: 	min_samples_split: 8
[34m[1mwandb[0m: 	n_estimators: 64





VBox(children=(Label(value='0.003 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.897783…

0,1
accuracy,▁
loss,▁

0,1
accuracy,0.41558
loss,0.58442


[34m[1mwandb[0m: Agent Starting Run: yxg4sbb2 with config:
[34m[1mwandb[0m: 	bootstrap: False
[34m[1mwandb[0m: 	criterion: gini
[34m[1mwandb[0m: 	max_depth: 60
[34m[1mwandb[0m: 	max_features: sqrt
[34m[1mwandb[0m: 	min_samples_leaf: 30
[34m[1mwandb[0m: 	min_samples_split: 2
[34m[1mwandb[0m: 	n_estimators: 8





VBox(children=(Label(value='0.003 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.897954…

0,1
accuracy,▁
loss,▁

0,1
accuracy,0.42728
loss,0.57272


[34m[1mwandb[0m: Agent Starting Run: trnbti9z with config:
[34m[1mwandb[0m: 	bootstrap: True
[34m[1mwandb[0m: 	criterion: gini
[34m[1mwandb[0m: 	max_depth: 180
[34m[1mwandb[0m: 	max_features: log2
[34m[1mwandb[0m: 	min_samples_leaf: 10
[34m[1mwandb[0m: 	min_samples_split: 6
[34m[1mwandb[0m: 	n_estimators: 8





VBox(children=(Label(value='0.003 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.898296…

0,1
accuracy,▁
loss,▁

0,1
accuracy,0.43329
loss,0.56671


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: pj6k7g3r with config:
[34m[1mwandb[0m: 	bootstrap: True
[34m[1mwandb[0m: 	criterion: entropy
[34m[1mwandb[0m: 	max_depth: 4
[34m[1mwandb[0m: 	max_features: auto
[34m[1mwandb[0m: 	min_samples_leaf: 1
[34m[1mwandb[0m: 	min_samples_split: 11
[34m[1mwandb[0m: 	n_estimators: 16





VBox(children=(Label(value='0.003 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.897152…

0,1
accuracy,▁
loss,▁

0,1
accuracy,0.52524
loss,0.47476


[34m[1mwandb[0m: Agent Starting Run: dxk2vcv5 with config:
[34m[1mwandb[0m: 	bootstrap: False
[34m[1mwandb[0m: 	criterion: entropy
[34m[1mwandb[0m: 	max_depth: 50
[34m[1mwandb[0m: 	max_features: auto
[34m[1mwandb[0m: 	min_samples_leaf: 25
[34m[1mwandb[0m: 	min_samples_split: 4
[34m[1mwandb[0m: 	n_estimators: 128





VBox(children=(Label(value='0.003 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.898125…

0,1
accuracy,▁
loss,▁

0,1
accuracy,0.42047
loss,0.57953


[34m[1mwandb[0m: Agent Starting Run: m7n66xh2 with config:
[34m[1mwandb[0m: 	bootstrap: False
[34m[1mwandb[0m: 	criterion: gini
[34m[1mwandb[0m: 	max_depth: 90
[34m[1mwandb[0m: 	max_features: auto
[34m[1mwandb[0m: 	min_samples_leaf: 16
[34m[1mwandb[0m: 	min_samples_split: 11
[34m[1mwandb[0m: 	n_estimators: 8





VBox(children=(Label(value='0.003 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.898468…

0,1
accuracy,▁
loss,▁

0,1
accuracy,0.42639
loss,0.57361


[34m[1mwandb[0m: Agent Starting Run: iu6osvca with config:
[34m[1mwandb[0m: 	bootstrap: True
[34m[1mwandb[0m: 	criterion: entropy
[34m[1mwandb[0m: 	max_depth: 50
[34m[1mwandb[0m: 	max_features: log2
[34m[1mwandb[0m: 	min_samples_leaf: 30
[34m[1mwandb[0m: 	min_samples_split: 5
[34m[1mwandb[0m: 	n_estimators: 256





VBox(children=(Label(value='0.003 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.898296…

0,1
accuracy,▁
loss,▁

0,1
accuracy,0.42618
loss,0.57382


[34m[1mwandb[0m: Agent Starting Run: uno41jal with config:
[34m[1mwandb[0m: 	bootstrap: False
[34m[1mwandb[0m: 	criterion: gini
[34m[1mwandb[0m: 	max_depth: 8
[34m[1mwandb[0m: 	max_features: log2
[34m[1mwandb[0m: 	min_samples_leaf: 10
[34m[1mwandb[0m: 	min_samples_split: 13
[34m[1mwandb[0m: 	n_estimators: 32





VBox(children=(Label(value='0.003 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.897542…

0,1
accuracy,▁
loss,▁

0,1
accuracy,0.47891
loss,0.52109


[34m[1mwandb[0m: Agent Starting Run: ewp4vlwz with config:
[34m[1mwandb[0m: 	bootstrap: False
[34m[1mwandb[0m: 	criterion: gini
[34m[1mwandb[0m: 	max_depth: 160
[34m[1mwandb[0m: 	max_features: auto
[34m[1mwandb[0m: 	min_samples_leaf: 8
[34m[1mwandb[0m: 	min_samples_split: 4
[34m[1mwandb[0m: 	n_estimators: 128





VBox(children=(Label(value='0.003 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.898296…

0,1
accuracy,▁
loss,▁

0,1
accuracy,0.4124
loss,0.5876


[34m[1mwandb[0m: Agent Starting Run: pdtzrdkd with config:
[34m[1mwandb[0m: 	bootstrap: True
[34m[1mwandb[0m: 	criterion: entropy
[34m[1mwandb[0m: 	max_depth: 9
[34m[1mwandb[0m: 	max_features: sqrt
[34m[1mwandb[0m: 	min_samples_leaf: 25
[34m[1mwandb[0m: 	min_samples_split: 7
[34m[1mwandb[0m: 	n_estimators: 8





VBox(children=(Label(value='0.003 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.897322…

0,1
accuracy,▁
loss,▁

0,1
accuracy,0.47262
loss,0.52738


[34m[1mwandb[0m: Agent Starting Run: umm9dhyt with config:
[34m[1mwandb[0m: 	bootstrap: True
[34m[1mwandb[0m: 	criterion: entropy
[34m[1mwandb[0m: 	max_depth: 70
[34m[1mwandb[0m: 	max_features: sqrt
[34m[1mwandb[0m: 	min_samples_leaf: 18
[34m[1mwandb[0m: 	min_samples_split: 3
[34m[1mwandb[0m: 	n_estimators: 16





VBox(children=(Label(value='0.003 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.754563…

0,1
accuracy,▁
loss,▁

0,1
accuracy,0.42793
loss,0.57207


[34m[1mwandb[0m: Agent Starting Run: l4f9bjn4 with config:
[34m[1mwandb[0m: 	bootstrap: True
[34m[1mwandb[0m: 	criterion: gini
[34m[1mwandb[0m: 	max_depth: 2
[34m[1mwandb[0m: 	max_features: log2
[34m[1mwandb[0m: 	min_samples_leaf: 14
[34m[1mwandb[0m: 	min_samples_split: 20
[34m[1mwandb[0m: 	n_estimators: 16





VBox(children=(Label(value='0.003 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.910389…

0,1
accuracy,▁
loss,▁

0,1
accuracy,0.55239
loss,0.44761


[34m[1mwandb[0m: Agent Starting Run: k5m9zmg0 with config:
[34m[1mwandb[0m: 	bootstrap: True
[34m[1mwandb[0m: 	criterion: gini
[34m[1mwandb[0m: 	max_depth: 80
[34m[1mwandb[0m: 	max_features: log2
[34m[1mwandb[0m: 	min_samples_leaf: 25
[34m[1mwandb[0m: 	min_samples_split: 18
[34m[1mwandb[0m: 	n_estimators: 256





VBox(children=(Label(value='0.003 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.898125…

0,1
accuracy,▁
loss,▁

0,1
accuracy,0.42393
loss,0.57607


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: pe2vrhzc with config:
[34m[1mwandb[0m: 	bootstrap: False
[34m[1mwandb[0m: 	criterion: gini
[34m[1mwandb[0m: 	max_depth: None
[34m[1mwandb[0m: 	max_features: sqrt
[34m[1mwandb[0m: 	min_samples_leaf: 6
[34m[1mwandb[0m: 	min_samples_split: 7
[34m[1mwandb[0m: 	n_estimators: 256





VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁
loss,▁

0,1
accuracy,0.41096
loss,0.58904


[34m[1mwandb[0m: Agent Starting Run: 0c2ks51x with config:
[34m[1mwandb[0m: 	bootstrap: True
[34m[1mwandb[0m: 	criterion: entropy
[34m[1mwandb[0m: 	max_depth: 10
[34m[1mwandb[0m: 	max_features: log2
[34m[1mwandb[0m: 	min_samples_leaf: 14
[34m[1mwandb[0m: 	min_samples_split: 9
[34m[1mwandb[0m: 	n_estimators: 128





VBox(children=(Label(value='0.003 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.897662…

0,1
accuracy,▁
loss,▁

0,1
accuracy,0.46581
loss,0.53419


[34m[1mwandb[0m: Agent Starting Run: 47nsppg5 with config:
[34m[1mwandb[0m: 	bootstrap: False
[34m[1mwandb[0m: 	criterion: entropy
[34m[1mwandb[0m: 	max_depth: 2
[34m[1mwandb[0m: 	max_features: auto
[34m[1mwandb[0m: 	min_samples_leaf: 14
[34m[1mwandb[0m: 	min_samples_split: 5
[34m[1mwandb[0m: 	n_estimators: 64





VBox(children=(Label(value='0.003 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.903608…

0,1
accuracy,▁
loss,▁

0,1
accuracy,0.55376
loss,0.44624


[34m[1mwandb[0m: Agent Starting Run: votifsvz with config:
[34m[1mwandb[0m: 	bootstrap: False
[34m[1mwandb[0m: 	criterion: gini
[34m[1mwandb[0m: 	max_depth: 7
[34m[1mwandb[0m: 	max_features: sqrt
[34m[1mwandb[0m: 	min_samples_leaf: 18
[34m[1mwandb[0m: 	min_samples_split: 20
[34m[1mwandb[0m: 	n_estimators: 64





VBox(children=(Label(value='0.003 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.897542…

0,1
accuracy,▁
loss,▁

0,1
accuracy,0.48658
loss,0.51342


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 53e9x23o with config:
[34m[1mwandb[0m: 	bootstrap: False
[34m[1mwandb[0m: 	criterion: entropy
[34m[1mwandb[0m: 	max_depth: 7
[34m[1mwandb[0m: 	max_features: log2
[34m[1mwandb[0m: 	min_samples_leaf: 25
[34m[1mwandb[0m: 	min_samples_split: 25
[34m[1mwandb[0m: 	n_estimators: 256





VBox(children=(Label(value='0.003 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.897612…

0,1
accuracy,▁
loss,▁

0,1
accuracy,0.48337
loss,0.51663


[34m[1mwandb[0m: Agent Starting Run: q4duqr85 with config:
[34m[1mwandb[0m: 	bootstrap: False
[34m[1mwandb[0m: 	criterion: gini
[34m[1mwandb[0m: 	max_depth: 2
[34m[1mwandb[0m: 	max_features: log2
[34m[1mwandb[0m: 	min_samples_leaf: 3
[34m[1mwandb[0m: 	min_samples_split: 10
[34m[1mwandb[0m: 	n_estimators: 64





VBox(children=(Label(value='0.003 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.903559…

0,1
accuracy,▁
loss,▁

0,1
accuracy,0.54595
loss,0.45405


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: qvfsz5u1 with config:
[34m[1mwandb[0m: 	bootstrap: True
[34m[1mwandb[0m: 	criterion: entropy
[34m[1mwandb[0m: 	max_depth: 90
[34m[1mwandb[0m: 	max_features: auto
[34m[1mwandb[0m: 	min_samples_leaf: 10
[34m[1mwandb[0m: 	min_samples_split: 8
[34m[1mwandb[0m: 	n_estimators: 256


[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.


array([4, 2, 2, ..., 2, 1, 4])

#### CV cross_validate()
Mean Absolute Error:     0.672 +/- 0.019
Root Mean Squared Error: 1.057 +/- 0.016

#### cross_val_score
[0.49957698 0.53019349 0.54551058 0.49902538]
Accuracy of Model with Cross Validation is: 51.85766080195356