In [8]:
from sklearn.datasets import load_iris
from sklearn import tree
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import roc_auc_score, accuracy_score
from lightgbm.sklearn import LGBMClassifier
import numpy as np
import wandb
from wandb.integration.lightgbm import wandb_callback

In [2]:
data = load_iris()
X = data['data']
y = data['target']
feature_names = data['feature_names']
labels = data['target_names']

In [3]:
wandb.init(project='covid-cf', entity='rhacking')
wandb.config.max_depth = 2
clf = LGBMClassifier(max_depth=wandb.config.max_depth)
cv = KFold(shuffle=True)

wandb: Currently logged in as: rhacking (use `wandb login --relogin` to force relogin)


In [4]:
test_size = 0.2
val_size = 0.2
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, shuffle=True)
# X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=val_size/(1-test_size))

In [11]:
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [14]:
clf = LGBMClassifier(max_depth=wandb.config.max_depth, objective='multiclass', num_classes=len(np.unique(y)))
clf.fit(X_train, y_train, callbacks=[wandb_callback()])
clf.predict(X_test)

array([1, 1, 0, 2, 2, 0, 2, 2, 0, 0, 2, 1, 2, 2, 1, 0, 2, 0, 1, 2, 2, 1,
       1, 1, 0, 0, 0, 0, 0, 0])

In [15]:
clf = LGBMClassifier(max_depth=wandb.config.max_depth, objective='multiclass', num_classes=len(np.unique(y)))
clf.fit(X_train, y_train, callbacks=[wandb_callback()])
y_pred = clf.predict(X_test)
y_probas = clf.predict_proba(X_test)
wandb.sklearn.plot_classifier(clf, X_train, X_test, y_train, y_test, y_pred, y_probas, labels, model_name='lgbm', feature_names=feature_names)

wandb: 
wandb: Plotting lgbm.
wandb: Logged feature importances.
wandb: Logged learning curve.
wandb: Logged confusion matrix.
wandb: Logged summary metrics.
wandb: Logged class proportions.


ValueError: y contains previously unseen labels: [ 16222 114222]

In [3]:
for fold, (train, test) in enumerate(cv.split(X, y)):
    wandb.init(group=f"fold_{fold}", project='covid-cf', entity='rhacking')
    wandb.config.max_depth = 4
    clf = tree.DecisionTreeClassifier(max_depth=wandb.config.max_depth)
    X_train, y_train = X[train], y[train]
    X_test, y_test = X[test], y[test]
    
    clf.fit(X_train, y_train)
    score = accuracy_score(y_test, clf.predict(X_test))
    wandb.log({'accuracy': score})

wandb: Currently logged in as: rhacking (use `wandb login --relogin` to force relogin)


0,1
accuracy,1.0
_runtime,2.0
_timestamp,1617277217.0
_step,0.0


0,1
accuracy,▁
_runtime,▁
_timestamp,▁
_step,▁


0,1
accuracy,0.96667
_runtime,1.0
_timestamp,1617277222.0
_step,0.0


0,1
accuracy,▁
_runtime,▁
_timestamp,▁
_step,▁


0,1
accuracy,0.96667
_runtime,1.0
_timestamp,1617277227.0
_step,0.0


0,1
accuracy,▁
_runtime,▁
_timestamp,▁
_step,▁


0,1
accuracy,0.86667
_runtime,2.0
_timestamp,1617277233.0
_step,0.0


0,1
accuracy,▁
_runtime,▁
_timestamp,▁
_step,▁


In [19]:
import pandas as pd
pd.DataFrame({
    'dataset': {
        'name': 'ictcf', 
        'validation': '5 fold'
    }, 
    'preprocessing': {
        'sfm': 0.7
    }, 
    'model_parameters': {
        'n_estimators': 10, 
        'boosting_type': 'dart'
    }
})

Unnamed: 0,dataset,preprocessing,model_parameters
name,ictcf,,
validation,5 fold,,
sfm,,0.7,
n_estimators,,,10
boosting_type,,,dart


In [21]:
import pandas as pd
import yaml

In [22]:
with open('../configs/ictcf.yaml', 'r') as f:
    df = pd.io.json.json_normalize(yaml.load(f))

  


In [23]:
df

Unnamed: 0,dataset.name,dataset.target,dataset.validation.fraction,dataset.test.fraction,preprocessing.imputation.categorical,preprocessing.imputation.numeric,preprocessing.rfe,model_parameters.boosting_type,model_parameters.n_estimators,model_parameters.learning_rate
0,ictcf,pcr,0.1,0.1,0,mean,,dart,40,0.01


In [20]:
!pip install wandb

Collecting wandb
  Downloading wandb-0.10.24-py2.py3-none-any.whl (2.0 MB)
Collecting subprocess32>=3.5.3
  Downloading subprocess32-3.5.4.tar.gz (97 kB)
Collecting shortuuid>=0.5.0
  Downloading shortuuid-1.0.1-py3-none-any.whl (7.5 kB)
Collecting configparser>=3.8.1
  Downloading configparser-5.0.2-py3-none-any.whl (19 kB)
Collecting promise<3,>=2.0
  Using cached promise-2.3-py3-none-any.whl
Collecting GitPython>=1.0.0
  Downloading GitPython-3.1.14-py3-none-any.whl (159 kB)
Collecting docker-pycreds>=0.4.0
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting sentry-sdk>=0.4.0
  Downloading sentry_sdk-1.0.0-py2.py3-none-any.whl (131 kB)
Collecting protobuf>=3.12.0
  Downloading protobuf-3.15.6-cp37-cp37m-win_amd64.whl (904 kB)
Collecting pathtools
  Downloading pathtools-0.1.2.tar.gz (11 kB)
Collecting psutil>=5.0.0
  Downloading psutil-5.8.0-cp37-cp37m-win_amd64.whl (244 kB)
Collecting gitdb<5,>=4.0.1
  Downloading gitdb-4.0.7-py3-none-any.whl (63 kB)
Collect

In [26]:
!wandb login --help

Usage: wandb login [OPTIONS] [KEY]...

  Login to Weights & Biases

Options:
  --cloud        Login to the cloud instead of local
  --host TEXT    Login to a specific instance of W&B
  --relogin      Force relogin if already logged in.
  --anonymously  Log in anonymously
  --help         Show this message and exit.
