In [1]:
!pip install wandb
!pip install scikit-learn
!pip install pandas
!pip install numpy



In [2]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import KFold, train_test_split
import wandb
import pandas as pd
from sklearn.metrics import f1_score, precision_score, recall_score
import numpy as np

# Loading in data

In [3]:
data = load_breast_cancer()

In [4]:
print(data.feature_names)

['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']


In [5]:
print(data.target_names)

['malignant' 'benign']


In [6]:
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target

In [14]:
X_train, X_test, y_train, y_test = train_test_split(np.array(X), np.array(y), test_size=0.2)

# Setup Code

In [15]:
sweep_config = {
    "method": "bayes",
    "metric": {"goal": "maximize", "name": "F1-Score"},
    "parameters": {
        "criterion": {"values": ["gini", "entropy", "log_loss"]},
        "max_depth": {"min": 5, "max": 10},
        "n_estimators": {"min": 100, "max": 500}
    }
}

In [16]:
def objective():
#     Call wandb.init() to start server
    with wandb.init(project="rf_hyperparam_tuning_tech_discussion_group_bayes_cross_val", config=sweep_config) as run:
        
        cv = KFold(n_splits=3, shuffle=True, random_state=42)
        
        f1_evals, recall_evals, precision_evals = [], [], []
        
        for train_index, val_index in cv.split(X_train):
            
            X_train_cv, X_val = X_train[train_index], X_train[val_index]
            y_train_cv, y_val = y_train[train_index], y_train[val_index]
        
        #     Instantiate model
            model = RandomForestClassifier(
                random_state=42, 
                n_estimators=wandb.config.n_estimators,
                criterion=wandb.config.criterion,
                max_depth=wandb.config.max_depth
            )
        #     Fit model with training data
            model.fit(X_train_cv, y_train_cv)
        #     Make predictions
            eval_pred = model.predict(X_val)

        #     Compute performance
            f1 = f1_score(y_val, eval_pred)
            precision = precision_score(y_val, eval_pred)
            recall = recall_score(y_val, eval_pred)
            
            f1_evals.append(f1)
            recall_evals.append(recall)
            precision_evals.append(precision)
            
        #     Log results
            wandb.log(
                {
                    "F1-Score": np.mean(f1_evals),
                    "Precision": np.mean(precision_evals),
                    "Recall": np.mean(recall_evals)
                }
            )

# Run Sweeps

In [18]:
sweep_id = wandb.sweep(sweep_config, project="rf_hyperparam_tuning_tech_discussion_group_bayes_cross_val")
wandb.agent(sweep_id, function=objective, count=5)

Create sweep with ID: g04t3vhm
Sweep URL: https://wandb.ai/scott-clare/rf_hyperparam_tuning_tech_discussion_group_bayes_cross_val/sweeps/g04t3vhm


[34m[1mwandb[0m: Agent Starting Run: kf6cvaml with config:
[34m[1mwandb[0m: 	criterion: log_loss
[34m[1mwandb[0m: 	max_depth: 8
[34m[1mwandb[0m: 	n_estimators: 116


VBox(children=(Label(value='0.001 MB of 0.009 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.103357…

0,1
F1-Score,▁█▅
Precision,▁██
Recall,█▆▁

0,1
F1-Score,0.97092
Precision,0.96958
Recall,0.97286


[34m[1mwandb[0m: Agent Starting Run: 82lm9udy with config:
[34m[1mwandb[0m: 	criterion: log_loss
[34m[1mwandb[0m: 	max_depth: 10
[34m[1mwandb[0m: 	n_estimators: 363


0,1
F1-Score,▁█▅
Precision,▁▇█
Recall,█▄▁

0,1
F1-Score,0.96713
Precision,0.96575
Recall,0.96912


[34m[1mwandb[0m: Agent Starting Run: gyf5m6a7 with config:
[34m[1mwandb[0m: 	criterion: log_loss
[34m[1mwandb[0m: 	max_depth: 10
[34m[1mwandb[0m: 	n_estimators: 273


VBox(children=(Label(value='0.001 MB of 0.009 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.103336…

0,1
F1-Score,▁█▅
Precision,▁▇█
Recall,█▆▁

0,1
F1-Score,0.96906
Precision,0.96579
Recall,0.97286


[34m[1mwandb[0m: Agent Starting Run: hfqq4qs4 with config:
[34m[1mwandb[0m: 	criterion: gini
[34m[1mwandb[0m: 	max_depth: 7
[34m[1mwandb[0m: 	n_estimators: 277


VBox(children=(Label(value='0.001 MB of 0.009 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.103420…

0,1
F1-Score,▁█▅
Precision,▁▇█
Recall,█▄▁

0,1
F1-Score,0.96529
Precision,0.962
Recall,0.96912


[34m[1mwandb[0m: Agent Starting Run: w4hyu39s with config:
[34m[1mwandb[0m: 	criterion: entropy
[34m[1mwandb[0m: 	max_depth: 6
[34m[1mwandb[0m: 	n_estimators: 214


VBox(children=(Label(value='0.001 MB of 0.009 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.103357…

0,1
F1-Score,▁█▇
Precision,▁▇█
Recall,█▃▁

0,1
F1-Score,0.96881
Precision,0.96584
Recall,0.97232
