diff --git a/docs/conf.py b/docs/conf.py index 097b758..a2caa35 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -44,7 +44,8 @@ 'optuna', 'scikitplot', 'scikitplot.metrics', - 'xgboost'] + 'xgboost', + 'yellowbrick'] # -- Project information ----------------------------------------------------- diff --git a/docs/index.rst b/docs/index.rst index 4697b2f..2472e74 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -55,7 +55,7 @@ And the best thing is you can extend it yourself or... tell us to do it for you api.chart api.explainers api.html - api.table + api.table api.utils api.video hpo.utils @@ -63,6 +63,7 @@ And the best thing is you can extend it yourself or... tell us to do it for you monitoring.keras monitoring.lightgbm monitoring.xgboost + monitoring.sklearn monitoring.fastai monitoring.metrics monitoring.fairness diff --git a/docs/user_guide/monitoring/sklearn.rst b/docs/user_guide/monitoring/sklearn.rst new file mode 100644 index 0000000..b91b13f --- /dev/null +++ b/docs/user_guide/monitoring/sklearn.rst @@ -0,0 +1,6 @@ +Scikit-learn +============ + +.. automodule:: neptunecontrib.monitoring.sklearn + :members: + :show-inheritance: diff --git a/neptunecontrib/api/__init__.py b/neptunecontrib/api/__init__.py index 148f849..e58ba6a 100644 --- a/neptunecontrib/api/__init__.py +++ b/neptunecontrib/api/__init__.py @@ -18,7 +18,7 @@ from neptunecontrib.api.chart import log_chart from neptunecontrib.api.explainers import log_explainer, log_local_explanations, log_global_explanations from neptunecontrib.api.html import log_html -from neptunecontrib.api.table import log_table +from neptunecontrib.api.table import log_table, log_csv from neptunecontrib.api.utils import ( concat_experiments_on_channel, extract_project_progress_info, @@ -39,6 +39,7 @@ 'log_audio', 'log_video', 'log_table', + 'log_csv', 'log_html', 'log_chart', 'log_pickle', diff --git a/neptunecontrib/api/html.py b/neptunecontrib/api/html.py index d550480..3addba0 100644 --- a/neptunecontrib/api/html.py +++ b/neptunecontrib/api/html.py @@ -29,11 +29,10 @@ def log_html(name, html, experiment=None): Args: name (:obj:`str`): | Name of the chart (without extension) that will be used as a part of artifact's destination. - html_body (:obj:`str`): + html (:obj:`str`): | HTML string that is logged and rendered as HTML. experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): - | For advanced users only. Pass Neptune - `Experiment `_ + | For advanced users only. Pass Neptune ``Experiment`` object if you want to control to which experiment data is logged. | If ``None``, log to currently active, and most recent experiment. diff --git a/neptunecontrib/api/table.py b/neptunecontrib/api/table.py index 15cfe81..522645e 100644 --- a/neptunecontrib/api/table.py +++ b/neptunecontrib/api/table.py @@ -13,13 +13,14 @@ # See the License for the specific language governing permissions and # limitations under the License. # - import neptune __all__ = [ 'log_table', + 'log_csv', ] + def log_table(name, table, experiment=None): """Logs pandas dataframe to neptune. @@ -62,13 +63,53 @@ def log_table(name, table, experiment=None): """ _exp = experiment if experiment else neptune - _exp.log_artifact(export_pandas_dataframe(table), "tables/" + name + '.html') + _exp.log_artifact(export_pandas_dataframe(table, 'html'), 'tables/{}.html'.format(name)) + + +def log_csv(name, table, experiment=None): + """Logs pandas dataframe to neptune as csv file. + Pandas dataframe is converted to csv fie and logged to Neptune as an artifact with path csv/{name}.csv + + Args: + name (:obj:`str`): + | Name of the file (without extension) that will be used as a part of csv's destination. + table (:obj:`pandas.Dataframe`): + | DataFrame table + experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): + | Neptune Experiment object if you want to control to which experiment you log the data. + | If ``None``, log to currently active, and most recent experiment. + + Examples: + Create or load dataframe: -def export_pandas_dataframe(table): + .. code:: python3 + + import pandas as pd + iris_df = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv', nrows=100) + + Log it to Neptune: + + .. code:: python3 + + from neptunecontrib.api import log_csv + log_csv('pandas_df', iris_df) + """ + _exp = experiment if experiment else neptune + + _exp.log_artifact(export_pandas_dataframe(table, 'csv'), 'csv/{}.csv'.format(name)) + + +def export_pandas_dataframe(table, target_type): from io import StringIO - buffer = StringIO(table.to_html()) + if target_type == 'csv': + buffer = StringIO(table.to_csv()) + elif target_type == 'html': + buffer = StringIO(table.to_html()) + else: + ValueError('Unsupported format: {}'.format(target_type)) + buffer.seek(0) return buffer diff --git a/neptunecontrib/monitoring/sklearn.py b/neptunecontrib/monitoring/sklearn.py new file mode 100644 index 0000000..65a8b56 --- /dev/null +++ b/neptunecontrib/monitoring/sklearn.py @@ -0,0 +1,1214 @@ +# +# Copyright (c) 2020, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import matplotlib.pyplot as plt +import neptune +import pandas as pd +from scikitplot.estimators import plot_learning_curve +from scikitplot.metrics import plot_precision_recall +from sklearn.base import is_regressor, is_classifier +from sklearn.cluster import KMeans +from sklearn.metrics import explained_variance_score, max_error, mean_absolute_error, r2_score, \ + precision_recall_fscore_support +from yellowbrick.classifier import ClassificationReport, ConfusionMatrix, ROCAUC, ClassPredictionError +from yellowbrick.cluster import SilhouetteVisualizer, KElbowVisualizer +from yellowbrick.model_selection import FeatureImportances +from yellowbrick.regressor import ResidualsPlot, PredictionError, CooksDistance + +from neptunecontrib.api.table import log_csv +from neptunecontrib.api.utils import log_pickle + + +def log_regressor_summary(regressor, X_train, X_test, y_train, y_test, + model_name=None, nrows=1000, experiment=None): + """Log sklearn regressor summary. + + This method automatically logs all regressor parameters, pickled estimator (model), + test predictions as table, model performance visualizations and test metrics. + + Regressor should be fitted before calling this function. + + Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method. + + Tip: + Check `Neptune documentation `_ for the full example. + + Args: + regressor (:obj:`regressor`): + | Fitted sklearn regressor object + X_train (:obj:`ndarray`): + | Training data matrix + X_test (:obj:`ndarray`): + | Testing data matrix + y_train (:obj:`ndarray`): + | The regression target for training + y_test (:obj:`ndarray`): + | The regression target for testing + model_name (`str`, optional, default is ``None``): + | If logging picked model, define a name of the file to be logged to `model/` + | If ``None`` - `model/estimator.skl` is used. + nrows (`int`, optional, default is 1000): + | Log first ``nrows`` rows of test predictions. + experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): + | Neptune ``Experiment`` object to control to which experiment you log the data. + | If ``None``, log to currently active, and most recent experiment. + + Returns: + ``None`` + + Examples: + Log random forest regressor summary + + .. code:: python3 + + rfr = RandomForestRegressor() + rfr.fit(X_train, y_train) + + neptune.init('my_workspace/my_project') + neptune.create_experiment() + + log_regressor_summary(rfr, X_train, X_test, y_train, y_test) + """ + assert is_regressor(regressor), 'regressor should be sklearn regressor.' + + exp = _validate_experiment(experiment) + + log_estimator_params(regressor, exp) + log_pickled_model(regressor, model_name, exp) + + y_pred = regressor.predict(X_test) + log_test_predictions(regressor, X_test, y_test, y_pred=y_pred, nrows=nrows, experiment=exp) + log_scores(regressor, X_test, y_test, y_pred=y_pred, name='test', experiment=exp) + + # visualizations + log_learning_curve_chart(regressor, X_train, y_train, experiment=exp) + log_feature_importance_chart(regressor, X_train, y_train, experiment=exp) + log_residuals_chart(regressor, X_train, X_test, y_train, y_test, experiment=exp) + log_prediction_error_chart(regressor, X_train, X_test, y_train, y_test, experiment=exp) + log_cooks_distance_chart(regressor, X_train, y_train, experiment=exp) + + +def log_classifier_summary(classifier, X_train, X_test, y_train, y_test, + model_name=None, nrows=1000, experiment=None): + """Log sklearn classifier summary. + + This method automatically logs all classifier parameters, pickled estimator (model), + test predictions, predictions probabilities as table, model performance visualizations and test metrics. + + Classifier should be fitted before calling this function. + + Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method. + + Tip: + Check `Neptune documentation `_ for the full example. + + Args: + classifier (:obj:`classifier`): + | Fitted sklearn classifier object + X_train (:obj:`ndarray`): + | Training data matrix + X_test (:obj:`ndarray`): + | Testing data matrix + y_train (:obj:`ndarray`): + | The classification target for training + y_test (:obj:`ndarray`): + | The classification target for testing + model_name (`str`, optional, default is ``None``): + | If logging picked model, define a name of the file to be logged to `model/` + | If ``None`` - `estimator.skl` is used. + nrows (`int`, optional, default is 1000): + | Log first ``nrows`` rows of test predictions and predictions probabilities. + experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): + | Neptune ``Experiment`` object to control to which experiment you log the data. + | If ``None``, log to currently active, and most recent experiment. + + Returns: + ``None`` + + Examples: + Log random forest classifier summary + + .. code:: python3 + + rfc = RandomForestClassifier() + rfc.fit(X_train, y_train) + + neptune.init('my_workspace/my_project') + neptune.create_experiment() + + log_classifier_summary(rfc, X_train, X_test, y_train, y_test) + """ + assert is_classifier(classifier), 'classifier should be sklearn classifier.' + + exp = _validate_experiment(experiment) + + log_estimator_params(classifier, exp) + log_pickled_model(classifier, model_name, exp) + log_test_preds_proba(classifier, X_test, nrows=nrows, experiment=exp) + + y_pred = classifier.predict(X_test) + log_test_predictions(classifier, X_test, y_test, y_pred=y_pred, nrows=nrows, experiment=exp) + log_scores(classifier, X_test, y_test, y_pred=y_pred, name='test', experiment=exp) + + # visualizations + log_classification_report_chart(classifier, X_train, X_test, y_train, y_test, experiment=exp) + log_confusion_matrix_chart(classifier, X_train, X_test, y_train, y_test, experiment=exp) + log_roc_auc_chart(classifier, X_train, X_test, y_train, y_test, experiment=exp) + log_precision_recall_chart(classifier, X_test, y_test, experiment=exp) + log_class_prediction_error_chart(classifier, X_train, X_test, y_train, y_test, experiment=exp) + + +def log_estimator_params(estimator, experiment=None): + """Log estimator parameters. + + Log all estimator parameters as experiment properties. + + Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method. + + Tip: + Check `Neptune documentation `_ for the full example. + + Args: + estimator (:obj:`estimator`): + | Scikit-learn estimator from which to log parameters. + experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): + | Neptune ``Experiment`` object to control to which experiment you log the data. + | If ``None``, log to currently active, and most recent experiment. + + Returns: + ``None`` + + Examples: + .. code:: python3 + + rfr = RandomForestRegressor() + + neptune.init('my_workspace/my_project') + neptune.create_experiment() + + log_estimator_params(rfr) + """ + assert is_regressor(estimator) or is_classifier(estimator) or isinstance(estimator, KMeans),\ + 'Estimator should be sklearn regressor, classifier or kmeans clusterer.' + + exp = _validate_experiment(experiment) + + for param, value in estimator.get_params().items(): + exp.set_property(param, value) + + +def log_pickled_model(estimator, model_name=None, experiment=None): + """Log pickled estimator. + + Log estimator as pickled file to Neptune artifacts. + + Estimator should be fitted before calling this function. + + Path to file in the Neptune artifacts is ``model/``. + + Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method. + + Tip: + Check `Neptune documentation `_ for the full example. + + Args: + estimator (:obj:`estimator`): + | Scikit-learn estimator to log. + model_name (`str`, optional, default is ``None``): + | Name of the file. + | If ``None`` - ``estimator.skl`` is used. + experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): + | Neptune ``Experiment`` object to control to which experiment you log the data. + | If ``None``, log to currently active, and most recent experiment. + + Returns: + ``None`` + + Examples: + .. code:: python3 + + rfr = RandomForestRegressor() + rfr.fit(X_train, y_train) + + neptune.init('my_workspace/my_project') + neptune.create_experiment() + + log_pickled_model(rfr, 'my_model') + """ + assert is_regressor(estimator) or is_classifier(estimator),\ + 'Estimator should be sklearn regressor or classifier.' + assert isinstance(model_name, str) or model_name is None, 'model_name should be str,' \ + ' {} was passed instead.'.format(type(model_name)) + + exp = _validate_experiment(experiment) + + if model_name: + model_name = 'model/{}'.format(model_name) + else: + model_name = 'model/estimator.skl' + + log_pickle(model_name, estimator, exp) + + +def log_test_predictions(estimator, X_test, y_test, y_pred=None, nrows=1000, experiment=None): + """Log test predictions. + + Calculate and log test predictions and have them as csv file in the Neptune artifacts. + + If you pass ``y_pred``, then predictions are logged without computing from ``X_test`` data. + + Estimator should be fitted before calling this function. + + Path to predictions in the Neptune artifacts is 'csv/test_predictions.csv'. + + Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method. + + Tip: + Check `Neptune documentation `_ for the full example. + + Args: + estimator (:obj:`estimator`): + | Scikit-learn estimator to compute predictions. + X_test (:obj:`ndarray`): + | Testing data matrix. + y_test (:obj:`ndarray`): + | Target for testing. + y_pred (:obj:`ndarray`, optional, default is ``None``): + | Estimator predictions on test data. + nrows (`int`, optional, default is 1000): + | Number of rows to log. + experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): + | Neptune ``Experiment`` object to control to which experiment you log the data. + | If ``None``, log to currently active, and most recent experiment. + + Returns: + ``None`` + + Examples: + .. code:: python3 + + rfr = RandomForestRegressor() + rfr.fit(X_train, y_train) + + neptune.init('my_workspace/my_project') + neptune.create_experiment() + + log_test_predictions(rfr, X_test, y_test) + """ + assert is_regressor(estimator) or is_classifier(estimator),\ + 'Estimator should be sklearn regressor or classifier.' + assert isinstance(nrows, int), 'nrows should be integer, {} was passed'.format(type(nrows)) + + exp = _validate_experiment(experiment) + + if y_pred is None: + y_pred = estimator.predict(X_test) + + # single output + if len(y_pred.shape) == 1: + df = pd.DataFrame(data={'y_true': y_test, 'y_pred': y_pred}) + log_csv('test_predictions', df.head(nrows), exp) + + # multi output + if len(y_pred.shape) == 2: + df = pd.DataFrame() + for j in range(y_pred.shape[1]): + df['y_test_output_{}'.format(j)] = y_test[:, j] + df['y_pred_output_{}'.format(j)] = y_pred[:, j] + log_csv('test_predictions', df.head(nrows), exp) + + +def log_test_preds_proba(classifier, X_test, y_pred_proba=None, nrows=1000, experiment=None): + """Log test predictions probabilities. + + Calculate and log test predictions probabilities and have them as csv file in the Neptune artifacts. + + If you pass ``y_pred_proba``, then predictions probabilities are logged without computing from ``X_test`` data. + + Estimator should be fitted before calling this function. + + Path to predictions probabilities in the Neptune artifacts is 'csv/test_preds_proba.csv'. + + Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method. + + Tip: + Check `Neptune documentation `_ for the full example. + + Args: + classifier (:obj:`classifier`): + | Scikit-learn classifier to compute predictions probabilities. + X_test (:obj:`ndarray`): + | Testing data matrix. + y_pred_proba (:obj:`ndarray`, optional, default is ``None``): + | Classifier predictions probabilities on test data. + nrows (`int`, optional, default is 1000): + | Number of rows to log. + experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): + | Neptune ``Experiment`` object to control to which experiment you log the data. + | If ``None``, log to currently active, and most recent experiment. + + Returns: + ``None`` + + Examples: + .. code:: python3 + + rfc = RandomForestClassifier() + rfc.fit(X_train, y_train) + + neptune.init('my_workspace/my_project') + neptune.create_experiment() + + log_test_preds_proba(rfc, X_test, y_test) + """ + assert is_classifier(classifier), 'Classifier should be sklearn classifier.' + assert isinstance(nrows, int), 'nrows should be integer, {} was passed'.format(type(nrows)) + + exp = _validate_experiment(experiment) + + if y_pred_proba is None: + try: + y_pred_proba = classifier.predict_proba(X_test) + except Exception as e: + print('This classifier does not provide predictions probabilities. Error: {}'.format(e)) + return + + df = pd.DataFrame(data=y_pred_proba, columns=classifier.classes_) + log_csv('test_preds_proba', df.head(nrows), exp) + + +def log_scores(estimator, X, y, y_pred=None, name=None, experiment=None): + """Log estimator scores on ``X``. + + Calculate and log scores on data and have them as metrics in Neptune. + If you pass ``y_pred``, then predictions are not computed from ``X`` data. + + Estimator should be fitted before calling this function. + + Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method. + + **Regressor** + + For regressors that outputs single value, following scores are logged: + + * explained variance + * max error + * mean absolute error + * r2 + + For multi-output regressor: + + * r2 + + **Classifier** + + For classifier, following scores are logged: + + * precision + * recall + * f beta score + * support + + Tip: + Check `Neptune documentation `_ for the full example. + + Args: + estimator (:obj:`estimator`): + | Scikit-learn estimator to compute scores. + X (:obj:`ndarray`): + | Data matrix. + y (:obj:`ndarray`): + | Target for testing. + y_pred (:obj:`ndarray`, optional, default is ``None``): + | Estimator predictions on data. + name (`str`, optional, default is ``None``): + | Use 'train', 'valid', 'test' to better define on what data scores are logged. + experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): + | Neptune ``Experiment`` object to control to which experiment you log the data. + | If ``None``, log to currently active, and most recent experiment. + + Returns: + ``None`` + + Examples: + .. code:: python3 + + rfc = RandomForestClassifier() + rfc.fit(X_train, y_train) + + neptune.init('my_workspace/my_project') + neptune.create_experiment() + + log_scores(rfc, X, y, name='test', experiment=exp) + """ + assert is_regressor(estimator) or is_classifier(estimator),\ + 'Estimator should be sklearn regressor or classifier.' + assert isinstance(name, str), 'name should be str. {} was passed.'.format(type(name)) + + exp = _validate_experiment(experiment) + + if y_pred is None: + y_pred = estimator.predict(X) + + if is_regressor(estimator): + # single output + if len(y_pred.shape) == 1: + evs = explained_variance_score(y, y_pred) + me = max_error(y, y_pred) + mae = mean_absolute_error(y, y_pred) + r2 = r2_score(y, y_pred) + + exp.log_metric('evs_{}_sklearn'.format(name), evs) + exp.log_metric('me_{}_sklearn'.format(name), me) + exp.log_metric('mae_{}_sklearn'.format(name), mae) + exp.log_metric('r2_{}_sklearn'.format(name), r2) + + # multi output + if len(y_pred.shape) == 2: + r2 = estimator.score(X, y) + exp.log_metric('r2_{}_sklearn'.format(name), r2) + elif is_classifier(estimator): + for metric_name, values in zip(['precision', 'recall', 'fbeta_score', 'support'], + precision_recall_fscore_support(y, y_pred)): + for i, value in enumerate(values): + exp.log_metric('{}_class_{}_{}_sklearn'.format(metric_name, i, name), value) + + +def log_learning_curve_chart(regressor, X_train, y_train, experiment=None): + """Log learning curve chart. + + Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method. + + Tip: + Check `Neptune documentation `_ for the full example. + + Args: + regressor (:obj:`regressor`): + | Fitted sklearn regressor object + X_train (:obj:`ndarray`): + | Training data matrix + y_train (:obj:`ndarray`): + | The regression target for training + experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): + | Neptune ``Experiment`` object to control to which experiment you log the data. + | If ``None``, log to currently active, and most recent experiment. + + Returns: + ``None`` + + Examples: + .. code:: python3 + + rfr = RandomForestRegressor() + rfr.fit(X_train, y_train) + + neptune.init('my_workspace/my_project') + neptune.create_experiment() + + log_learning_curve_chart(rfr, X_train, y_train) + """ + assert is_regressor(regressor), 'regressor should be sklearn regressor.' + exp = _validate_experiment(experiment) + + try: + fig, ax = plt.subplots() + plot_learning_curve(regressor, X_train, y_train, ax=ax) + exp.log_image('charts_sklearn', fig, image_name='Learning Curve') + plt.close(fig) + except Exception as e: + print('Did not log learning curve chart. Error: {}'.format(e)) + + +def log_feature_importance_chart(regressor, X_train, y_train, experiment=None): + """Log feature importance chart. + + Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method. + + Tip: + Check `Neptune documentation `_ for the full example. + + Args: + regressor (:obj:`regressor`): + | Fitted sklearn regressor object + X_train (:obj:`ndarray`): + | Training data matrix + y_train (:obj:`ndarray`): + | The regression target for training + experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): + | Neptune ``Experiment`` object to control to which experiment you log the data. + | If ``None``, log to currently active, and most recent experiment. + + Returns: + ``None`` + + Examples: + .. code:: python3 + + rfr = RandomForestRegressor() + rfr.fit(X_train, y_train) + + neptune.init('my_workspace/my_project') + neptune.create_experiment() + + log_feature_importance_chart(rfr, X_train, y_train) + """ + assert is_regressor(regressor), 'regressor should be sklearn regressor.' + exp = _validate_experiment(experiment) + + try: + fig, ax = plt.subplots() + visualizer = FeatureImportances(regressor, is_fitted=True, ax=ax) + visualizer.fit(X_train, y_train) + visualizer.finalize() + exp.log_image('charts_sklearn', fig, image_name='Feature Importance') + plt.close(fig) + except Exception as e: + print('Did not log feature importance chart. Error: {}'.format(e)) + + +def log_residuals_chart(regressor, X_train, X_test, y_train, y_test, experiment=None): + """Log residuals chart. + + Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method. + + Tip: + Check `Neptune documentation `_ for the full example. + + Args: + regressor (:obj:`regressor`): + | Fitted sklearn regressor object + X_train (:obj:`ndarray`): + | Training data matrix + X_test (:obj:`ndarray`): + | Testing data matrix + y_train (:obj:`ndarray`): + | The regression target for training + y_test (:obj:`ndarray`): + | The regression target for testing + experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): + | Neptune ``Experiment`` object to control to which experiment you log the data. + | If ``None``, log to currently active, and most recent experiment. + + Returns: + ``None`` + + Examples: + .. code:: python3 + + rfr = RandomForestRegressor() + rfr.fit(X_train, y_train) + + neptune.init('my_workspace/my_project') + exp = neptune.create_experiment() + + log_residuals_chart(rfr, X_train, X_test, y_train, y_test, experiment=exp) + """ + assert is_regressor(regressor), 'regressor should be sklearn regressor.' + exp = _validate_experiment(experiment) + + try: + fig, ax = plt.subplots() + visualizer = ResidualsPlot(regressor, is_fitted=True, ax=ax) + visualizer.fit(X_train, y_train) + visualizer.score(X_test, y_test) + visualizer.finalize() + exp.log_image('charts_sklearn', fig, image_name='Residuals Plot') + plt.close(fig) + except Exception as e: + print('Did not log residuals chart. Error: {}'.format(e)) + + +def log_prediction_error_chart(regressor, X_train, X_test, y_train, y_test, experiment=None): + """Log prediction error chart. + + Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method. + + Tip: + Check `Neptune documentation `_ for the full example. + + Args: + regressor (:obj:`regressor`): + | Fitted sklearn regressor object + X_train (:obj:`ndarray`): + | Training data matrix + X_test (:obj:`ndarray`): + | Testing data matrix + y_train (:obj:`ndarray`): + | The regression target for training + y_test (:obj:`ndarray`): + | The regression target for testing + experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): + | Neptune ``Experiment`` object to control to which experiment you log the data. + | If ``None``, log to currently active, and most recent experiment. + + Returns: + ``None`` + + Examples: + .. code:: python3 + + rfr = RandomForestRegressor() + rfr.fit(X_train, y_train) + + neptune.init('my_workspace/my_project') + neptune.create_experiment() + + log_prediction_error_chart(rfr, X_train, X_test, y_train, y_test) + """ + assert is_regressor(regressor), 'regressor should be sklearn regressor.' + exp = _validate_experiment(experiment) + + try: + fig, ax = plt.subplots() + visualizer = PredictionError(regressor, is_fitted=True, ax=ax) + visualizer.fit(X_train, y_train) + visualizer.score(X_test, y_test) + visualizer.finalize() + exp.log_image('charts_sklearn', fig, image_name='Prediction Error') + plt.close(fig) + except Exception as e: + print('Did not log prediction error chart. Error: {}'.format(e)) + + +def log_cooks_distance_chart(regressor, X_train, y_train, experiment=None): + """Log feature importance chart. + + Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method. + + Tip: + Check `Neptune documentation `_ for the full example. + + Args: + regressor (:obj:`regressor`): + | Fitted sklearn regressor object + X_train (:obj:`ndarray`): + | Training data matrix + y_train (:obj:`ndarray`): + | The regression target for training + experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): + | Neptune ``Experiment`` object to control to which experiment you log the data. + | If ``None``, log to currently active, and most recent experiment. + + Returns: + ``None`` + + Examples: + .. code:: python3 + + rfr = RandomForestRegressor() + rfr.fit(X_train, y_train) + + neptune.init('my_workspace/my_project') + neptune.create_experiment() + + log_cooks_distance_chart(rfr, X_train, y_train) + """ + assert is_regressor(regressor), 'regressor should be sklearn regressor.' + exp = _validate_experiment(experiment) + + try: + fig, ax = plt.subplots() + visualizer = CooksDistance(ax=ax) + visualizer.fit(X_train, y_train) + visualizer.finalize() + exp.log_image('charts_sklearn', fig, image_name='Cooks Distance') + plt.close(fig) + except Exception as e: + print('Did not log cooks distance chart. Error: {}'.format(e)) + + +def log_classification_report_chart(classifier, X_train, X_test, y_train, y_test, experiment=None): + """Log classification report chart. + + Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method. + + Tip: + Check `Neptune documentation `_ for the full example. + + Args: + classifier (:obj:`classifier`): + | Fitted sklearn classifier object + X_train (:obj:`ndarray`): + | Training data matrix + X_test (:obj:`ndarray`): + | Testing data matrix + y_train (:obj:`ndarray`): + | The classification target for training + y_test (:obj:`ndarray`): + | The classification target for testing + experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): + | Neptune ``Experiment`` object to control to which experiment you log the data. + | If ``None``, log to currently active, and most recent experiment. + + Returns: + ``None`` + + Examples: + .. code:: python3 + + rfc = RandomForestClassifier() + rfc.fit(X_train, y_train) + + neptune.init('my_workspace/my_project') + exp = neptune.create_experiment() + + log_classification_report_chart(rfc, X_train, X_test, y_train, y_test, experiment=exp) + """ + assert is_classifier(classifier), 'classifier should be sklearn classifier.' + exp = _validate_experiment(experiment) + + try: + fig, ax = plt.subplots() + visualizer = ClassificationReport(classifier, support=True, is_fitted=True, ax=ax) + visualizer.fit(X_train, y_train) + visualizer.score(X_test, y_test) + visualizer.finalize() + exp.log_image('charts_sklearn', fig, image_name='Classification Report') + plt.close(fig) + except Exception as e: + print('Did not log Classification Report chart. Error: {}'.format(e)) + + +def log_confusion_matrix_chart(classifier, X_train, X_test, y_train, y_test, experiment=None): + """Log confusion matrix. + + Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method. + + Tip: + Check `Neptune documentation `_ for the full example. + + Args: + classifier (:obj:`classifier`): + | Fitted sklearn classifier object + X_train (:obj:`ndarray`): + | Training data matrix + X_test (:obj:`ndarray`): + | Testing data matrix + y_train (:obj:`ndarray`): + | The classification target for training + y_test (:obj:`ndarray`): + | The classification target for testing + experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): + | Neptune ``Experiment`` object to control to which experiment you log the data. + | If ``None``, log to currently active, and most recent experiment. + + Returns: + ``None`` + + Examples: + .. code:: python3 + + rfc = RandomForestClassifier() + rfc.fit(X_train, y_train) + + neptune.init('my_workspace/my_project') + neptune.create_experiment() + + log_confusion_matrix_chart(rfc, X_train, X_test, y_train, y_test) + """ + assert is_classifier(classifier), 'classifier should be sklearn classifier.' + exp = _validate_experiment(experiment) + + try: + fig, ax = plt.subplots() + visualizer = ConfusionMatrix(classifier, is_fitted=True, ax=ax) + visualizer.fit(X_train, y_train) + visualizer.score(X_test, y_test) + visualizer.finalize() + exp.log_image('charts_sklearn', fig, image_name='Confusion Matrix') + plt.close(fig) + except Exception as e: + print('Did not log Confusion Matrix chart. Error: {}'.format(e)) + + +def log_roc_auc_chart(classifier, X_train, X_test, y_train, y_test, experiment=None): + """Log ROC-AUC chart. + + Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method. + + Tip: + Check `Neptune documentation `_ for the full example. + + Args: + classifier (:obj:`classifier`): + | Fitted sklearn classifier object + X_train (:obj:`ndarray`): + | Training data matrix + X_test (:obj:`ndarray`): + | Testing data matrix + y_train (:obj:`ndarray`): + | The classification target for training + y_test (:obj:`ndarray`): + | The classification target for testing + experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): + | Neptune ``Experiment`` object to control to which experiment you log the data. + | If ``None``, log to currently active, and most recent experiment. + + Returns: + ``None`` + + Examples: + .. code:: python3 + + rfc = RandomForestClassifier() + rfc.fit(X_train, y_train) + + neptune.init('my_workspace/my_project') + exp = neptune.create_experiment() + + log_roc_auc_chart(rfc, X_train, X_test, y_train, y_test, experiment=exp) + """ + assert is_classifier(classifier), 'classifier should be sklearn classifier.' + exp = _validate_experiment(experiment) + + try: + fig, ax = plt.subplots() + visualizer = ROCAUC(classifier, is_fitted=True, ax=ax) + visualizer.fit(X_train, y_train) + visualizer.score(X_test, y_test) + visualizer.finalize() + exp.log_image('charts_sklearn', fig, image_name='ROC-AUC') + plt.close(fig) + except Exception as e: + print('Did not log ROC-AUC chart. Error {}'.format(e)) + + +def log_precision_recall_chart(classifier, X_test, y_test, y_pred_proba=None, experiment=None): + """Log precision recall chart. + + Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method. + + Tip: + Check `Neptune documentation `_ for the full example. + + Args: + classifier (:obj:`classifier`): + | Fitted sklearn classifier object + X_test (:obj:`ndarray`): + | Testing data matrix + y_test (:obj:`ndarray`): + | The classification target for testing + y_pred_proba (:obj:`ndarray`, optional, default is ``None``): + | Classifier predictions probabilities on test data. + experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): + | Neptune ``Experiment`` object to control to which experiment you log the data. + | If ``None``, log to currently active, and most recent experiment. + + Returns: + ``None`` + + Examples: + .. code:: python3 + + rfc = RandomForestClassifier() + rfc.fit(X_train, y_train) + + neptune.init('my_workspace/my_project') + neptune.create_experiment() + + log_precision_recall_chart(rfc, X_test, y_test) + """ + assert is_classifier(classifier), 'classifier should be sklearn classifier.' + exp = _validate_experiment(experiment) + + if y_pred_proba is None: + try: + y_pred_proba = classifier.predict_proba(X_test) + except Exception as e: + print('Did not log Precision-Recall chart: this classifier does not provide predictions probabilities.' + 'Error {}'.format(e)) + return + + try: + fig, ax = plt.subplots() + plot_precision_recall(y_test, y_pred_proba, ax=ax) + exp.log_image('charts_sklearn', fig, image_name='Precision Recall Curve') + plt.close(fig) + except Exception as e: + print('Did not log Precision-Recall chart. Error {}'.format(e)) + + +def log_class_prediction_error_chart(classifier, X_train, X_test, y_train, y_test, experiment=None): + """Log class prediction error chart. + + Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method. + + Tip: + Check `Neptune documentation `_ for the full example. + + Args: + classifier (:obj:`classifier`): + | Fitted sklearn classifier object + X_train (:obj:`ndarray`): + | Training data matrix + X_test (:obj:`ndarray`): + | Testing data matrix + y_train (:obj:`ndarray`): + | The classification target for training + y_test (:obj:`ndarray`): + | The classification target for testing + experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): + | Neptune ``Experiment`` object to control to which experiment you log the data. + | If ``None``, log to currently active, and most recent experiment. + + Returns: + ``None`` + + Examples: + .. code:: python3 + + rfc = RandomForestClassifier() + rfc.fit(X_train, y_train) + + neptune.init('my_workspace/my_project') + exp = neptune.create_experiment() + + log_class_prediction_error_chart(rfc, X_train, X_test, y_train, y_test, experiment=exp) + """ + assert is_classifier(classifier), 'classifier should be sklearn classifier.' + exp = _validate_experiment(experiment) + + try: + fig, ax = plt.subplots() + visualizer = ClassPredictionError(classifier, is_fitted=True, ax=ax) + visualizer.fit(X_train, y_train) + visualizer.score(X_test, y_test) + visualizer.finalize() + exp.log_image('charts_sklearn', fig, image_name='Class Prediction Error') + plt.close(fig) + except Exception as e: + print('Did not log Class Prediction Error chart. Error {}'.format(e)) + + +def log_kmeans_clustering_summary(model, X, + nrows=1000, experiment=None, **kwargs): + """Log sklearn kmeans summary. + + This method fit KMeans model to data and logs cluster labels, all kmeans parameters + and clustering visualizations: KMeans elbow chart and silhouette coefficients chart. + + Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method. + + Tip: + Check `Neptune documentation `_ for the full example. + + Args: + model (:obj:`KMeans`): + | KMeans object. + X (:obj:`ndarray`): + | Training instances to cluster. + nrows (`int`, optional, default is 1000): + | Number of rows to log in the cluster labels + experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): + | Neptune ``Experiment`` object to control to which experiment you log the data. + | If ``None``, log to currently active, and most recent experiment. + kwargs: + KMeans parameters. + + Returns: + ``None`` + + Examples: + .. code:: python3 + + km = KMeans(n_init=11, max_iter=270) + X, y = make_blobs(n_samples=579, n_features=17, centers=7, random_state=28743) + + neptune.init('my_workspace/my_project') + neptune.create_experiment() + + log_kmeans_clustering_summary(km, X=X) + """ + assert isinstance(model, KMeans), 'model should be sklearn KMeans instance' + + exp = _validate_experiment(experiment) + + model.set_params(**kwargs) + log_estimator_params(model, exp) + log_cluster_labels(model, X, nrows=nrows, experiment=exp, **kwargs) + + # visualizations + log_kelbow_chart(model, X, experiment=exp, **kwargs) + log_silhouette_chart(model, X, experiment=exp, **kwargs) + + +def log_cluster_labels(model, X, nrows=1000, experiment=None, **kwargs): + """Log index of the cluster label each sample belongs to. + + Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method. + + Tip: + Check `Neptune documentation `_ for the full example. + + Args: + model (:obj:`KMeans`): + | KMeans object. + X (:obj:`ndarray`): + | Training instances to cluster. + nrows (`int`, optional, default is 1000): + | Number of rows to log. + experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): + | Neptune ``Experiment`` object to control to which experiment you log the data. + | If ``None``, log to currently active, and most recent experiment. + kwargs: + KMeans parameters. + + Returns: + ``None`` + + Examples: + .. code:: python3 + + km = KMeans(n_init=11, max_iter=270) + X, y = make_blobs(n_samples=579, n_features=17, centers=7, random_state=28743) + + neptune.init('my_workspace/my_project') + neptune.create_experiment() + + log_cluster_labels(km, X=X) + """ + assert isinstance(model, KMeans), 'Model should be sklearn KMeans instance.' + assert isinstance(nrows, int), 'nrows should be integer, {} was passed'.format(type(nrows)) + exp = _validate_experiment(experiment) + + model.set_params(**kwargs) + labels = model.fit_predict(X) + df = pd.DataFrame(data={'cluster_labels': labels}) + log_csv('cluster_labels', df.head(nrows), exp) + + +def log_kelbow_chart(model, X, experiment=None, **kwargs): + """Log K-elbow chart for KMeans clusterer. + + Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method. + + Tip: + Check `Neptune documentation `_ for the full example. + + Args: + model (:obj:`KMeans`): + | KMeans object. + X (:obj:`ndarray`): + | Training instances to cluster. + experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): + | Neptune ``Experiment`` object to control to which experiment you log the data. + | If ``None``, log to currently active, and most recent experiment. + kwargs: + KMeans parameters. + + Returns: + ``None`` + + Examples: + .. code:: python3 + + km = KMeans(n_init=11, max_iter=270) + X, y = make_blobs(n_samples=579, n_features=17, centers=7, random_state=28743) + + neptune.init('my_workspace/my_project') + neptune.create_experiment() + + log_kelbow_chart(km, X=X) + """ + assert isinstance(model, KMeans), 'Model should be sklearn KMeans instance.' + exp = _validate_experiment(experiment) + + model.set_params(**kwargs) + + if 'n_clusters' in kwargs: + k = kwargs['n_clusters'] + else: + k = 10 + + try: + fig, ax = plt.subplots() + visualizer = KElbowVisualizer(model, k=k, ax=ax) + visualizer.fit(X) + visualizer.finalize() + exp.log_image('charts_sklearn', fig, image_name='KMeans elbow chart') + plt.close(fig) + except Exception as e: + print('Did not log KMeans elbow chart. Error {}'.format(e)) + + +def log_silhouette_chart(model, X, experiment=None, **kwargs): + """Log Silhouette Coefficients charts for KMeans clusterer. + + Charts are computed for j = 2, 3, ..., n_clusters. + + Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method. + + Tip: + Check `Neptune documentation `_ for the full example. + + Args: + model (:obj:`KMeans`): + | KMeans object. + X (:obj:`ndarray`): + | Training instances to cluster. + experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): + | Neptune ``Experiment`` object to control to which experiment you log the data. + | If ``None``, log to currently active, and most recent experiment. + kwargs: + KMeans parameters. + + Returns: + ``None`` + + Examples: + .. code:: python3 + + km = KMeans(n_init=11, max_iter=270) + X, y = make_blobs(n_samples=579, n_features=17, centers=7, random_state=28743) + + neptune.init('my_workspace/my_project') + neptune.create_experiment() + + log_silhouette_chart(km, X=X, n_clusters=12) + """ + assert isinstance(model, KMeans), 'Model should be sklearn KMeans instance.' + exp = _validate_experiment(experiment) + + model.set_params(**kwargs) + + n_clusters = model.get_params()['n_clusters'] + + for j in range(2, n_clusters+1): + model.set_params(**{'n_clusters': j}) + model.fit(X) + + try: + fig, ax = plt.subplots() + visualizer = SilhouetteVisualizer(model, is_fitted=True, ax=ax) + visualizer.fit(X) + visualizer.finalize() + exp.log_image('charts_sklearn', fig, image_name='Silhouette Coefficients for k={}'.format(j)) + plt.close(fig) + except Exception as e: + print('Did not log Silhouette Coefficients chart. Error {}'.format(e)) + + +def _validate_experiment(experiment): + if experiment is not None: + if not isinstance(experiment, neptune.experiments.Experiment): + ValueError('Passed experiment is not Neptune experiment. Create one by using "create_experiment()"') + else: + try: + experiment = neptune.get_experiment() + except neptune.exceptions.NeptuneNoExperimentContextException: + raise neptune.exceptions.NeptuneNoExperimentContextException() + + return experiment diff --git a/neptunecontrib/monitoring/skopt.py b/neptunecontrib/monitoring/skopt.py index bb93f42..b270e03 100644 --- a/neptunecontrib/monitoring/skopt.py +++ b/neptunecontrib/monitoring/skopt.py @@ -81,21 +81,21 @@ def log_results(results, experiment=None, log_plots=True, log_pickle=True): best parameters ('best_parameters' property), convergence plot ('diagnostics' log), evaluations plot ('diagnostics' log), and objective plot ('diagnostics' log). - Args: - results('scipy.optimize.OptimizeResult'): Results object that is typically an - output of the function like `skopt.forest_minimize(...)` - experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None. + Args: + results('scipy.optimize.OptimizeResult'): Results object that is typically an output + | of the function like `skopt.forest_minimize(...)` + experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None. log_plots: ('bool'): If True skopt plots will be logged to Neptune. log_pickle: ('bool'): if True pickled skopt results object will be logged to Neptune. - Examples: - Run skopt training:: + Examples: + Run skopt training:: - ... - results = skopt.forest_minimize(objective, space, - base_estimator='ET', n_calls=100, n_random_starts=10) + ... + results = skopt.forest_minimize(objective, space, + base_estimator='ET', n_calls=100, n_random_starts=10) - Initialize Neptune:: + Initialize Neptune:: import neptune @@ -103,15 +103,15 @@ def log_results(results, experiment=None, log_plots=True, log_pickle=True): project_qualified_name='shared/showroom') neptune.create_experiment(name='optuna sweep') - Send best parameters to Neptune:: + Send best parameters to Neptune:: - import neptunecontrib.monitoring.skopt as sk_utils + import neptunecontrib.monitoring.skopt as sk_utils - sk_utils.log_results(results) + sk_utils.log_results(results) - You can explore an example experiment in Neptune: - https://ui.neptune.ai/o/shared/org/showroom/e/SHOW-1065/logs - """ + You can explore an example experiment in Neptune: + https://ui.neptune.ai/o/shared/org/showroom/e/SHOW-1065/logs + """ _exp = experiment if experiment else neptune _log_best_score(results, _exp) diff --git a/neptunecontrib/monitoring/xgboost.py b/neptunecontrib/monitoring/xgboost.py index d79ae01..b5f5fe1 100644 --- a/neptunecontrib/monitoring/xgboost.py +++ b/neptunecontrib/monitoring/xgboost.py @@ -69,8 +69,7 @@ def neptune_callback(log_model=True, | If you run xgb.cv, log specified trees for each folds' booster. | Default is ``None`` - do not log any tree. experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): - | For advanced users only. Pass Neptune - `Experiment `_ + | For advanced users only. Pass Neptune ``Experiment`` object if you want to control to which experiment data is logged. | If ``None``, log to currently active, and most recent experiment. kwargs: @@ -159,12 +158,7 @@ def neptune_callback(log_model=True, neptune.get_experiment() _exp = neptune except neptune.exceptions.NeptuneNoExperimentContextException: - msg = 'No currently running Neptune experiment. \n'\ - 'To start logging to Neptune create experiment by using: `neptune.create_experiment()`. \n'\ - 'More info in the documentation: '\ - '.' - raise neptune.exceptions.NeptuneException(msg) + raise neptune.exceptions.NeptuneNoExperimentContextException() assert isinstance(log_model, bool),\ 'log_model must be bool, got {} instead. Check log_model parameter.'.format(type(log_model)) diff --git a/pylintrc b/pylintrc index 08b1179..aca73e5 100644 --- a/pylintrc +++ b/pylintrc @@ -42,7 +42,7 @@ load-plugins=pylintfileheader # W0703 Allow too broad except clause (Exception). # I0011 Do not show Locally disabled warnings in report # W0702 Allow No exception type(s) specified. -disable=R,C0103,C0111,W0401,W0511,W0614,W0703,I0011,W0613,E0401,C0411,E0611,W0221,C0415,W0702 +disable=R,C0103,C0111,C0302,W0401,W0511,W0614,W0703,I0011,W0613,E0401,C0411,E0611,W0221,C0415,W0702 # Enable the message, report, category or checker with the given id(s). You can # either give multiple identifier separated by comma (,) or put this option diff --git a/setup.py b/setup.py index 900568b..2603d97 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,8 @@ def main(): 'bots': ['python-telegram-bot'], 'hpo': ['scikit-optimize>=0.5.2', 'scipy'], 'monitoring': ['scikit-optimize>=0.7.4', 'sacred>=0.7.5', 'scikit-learn>=0.21.3', - 'scikit-plot>=0.3.7', 'seaborn>=0.8.1', 'aif360>=0.2.1', 'xgboost>=0.82'], + 'scikit-plot>=0.3.7', 'seaborn>=0.8.1', 'aif360>=0.2.1', 'xgboost>=0.82', + 'yellowbrick>=1.2'], 'versioning': ['boto3', 'numpy'], 'viz': ['altair>=2.3.0', 'hiplot>=0.1.5'], } @@ -20,7 +21,7 @@ def main(): all_deps += extras[group_name] extras['all'] = all_deps - base_libs = ['attrdict>=2.0.0', 'neptune-client>=0.4.110', 'joblib>=0.13', 'pandas', 'matplotlib', + base_libs = ['attrdict>=2.0.0', 'neptune-client>=0.4.126', 'joblib>=0.13', 'pandas', 'matplotlib', 'Pillow>=6.2.0'] setup(