In [1]:
import warnings

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.tree import DecisionTreeClassifier
from sklearn.cluster import KMeans
from sklearn import datasets, cluster

from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight

from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings("ignore", category=ConvergenceWarning)

In [2]:
import wandb

wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mjonathanweske[0m (use `wandb login --relogin` to force relogin)


True

In [3]:
# Load data
housing = datasets.fetch_california_housing()
X = pd.DataFrame(housing.data, columns=housing.feature_names)
y = housing.target
X, y = X[::2], y[::2]  # subsample for faster demo
wandb.errors.term._show_warnings = False
# ignore warnings about charts being built from subset of data

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

# Train model, get predictions
reg = Ridge()
reg.fit(X_train, y_train)
y_pred = reg.predict(X_test)

In [4]:
run = wandb.init(project='my-scikit-test', name="regression")

In [5]:
wandb.sklearn.plot_residuals(reg, X_train, y_train)



In [6]:
wandb.sklearn.plot_outlier_candidates(reg, X_train, y_train)



In [7]:
wandb.sklearn.plot_regressor(reg, X_train, X_test, y_train, y_test, model_name='Ridge')

wandb.finish()

[34m[1mwandb[0m: 
[34m[1mwandb[0m: Plotting Ridge.
[34m[1mwandb[0m: Logged summary metrics.
[34m[1mwandb[0m: Logged learning curve.
[34m[1mwandb[0m: Logged outlier candidates.
[34m[1mwandb[0m: Logged residuals.





VBox(children=(Label(value='0.230 MB of 0.248 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.928944…

In [8]:
# Load data
wbcd = wisconsin_breast_cancer_data = datasets.load_breast_cancer()
feature_names = wbcd.feature_names
labels = wbcd.target_names

X_train, X_test, y_train, y_test = train_test_split(wbcd.data, wbcd.target, test_size=0.2)


# Train model, get predictions
model = RandomForestClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
y_probas = model.predict_proba(X_test)
importances = model.feature_importances_
indices = np.argsort(importances)[::-1]

In [9]:
run = wandb.init(project='my-scikit-test', name="classification")

In [10]:
wandb.sklearn.plot_class_proportions(y_train, y_test, labels)

In [11]:
wandb.sklearn.plot_learning_curve(model, X_train, y_train)

In [12]:
wandb.sklearn.plot_roc(y_test, y_probas, labels)

In [13]:
wandb.sklearn.plot_precision_recall(y_test, y_probas, labels)

In [14]:
wandb.sklearn.plot_feature_importances(model);

In [15]:
wandb.sklearn.plot_classifier(model,
                              X_train, X_test,
                              y_train, y_test,
                              y_pred, y_probas,
                              labels,
                              is_binary=True,
                              model_name='RandomForest')

wandb.finish()

[34m[1mwandb[0m: 
[34m[1mwandb[0m: Plotting RandomForest.
[34m[1mwandb[0m: Logged feature importances.
[34m[1mwandb[0m: Logged confusion matrix.
[34m[1mwandb[0m: Logged summary metrics.
[34m[1mwandb[0m: Logged class proportions.
[34m[1mwandb[0m: Logged calibration curve.
[34m[1mwandb[0m: Logged roc curve.
[34m[1mwandb[0m: Logged precision-recall curve.





VBox(children=(Label(value='0.017 MB of 0.018 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.940427…

In [18]:
feature_names.shape

(30,)

In [19]:
labels

array(['malignant', 'benign'], dtype='<U9')