add residual plots (#387)

mljar · May 4, 2021 · 62aed76 · 62aed76
1 parent 3ab1c44
commit 62aed76
Show file tree

Hide file tree

Showing 6 changed files with 44 additions and 12 deletions.
diff --git a/supervised/algorithms/catboost.py b/supervised/algorithms/catboost.py
@@ -72,7 +72,7 @@ def catboost_objective(ml_task, eval_metric):
             "R2",
             "spearman",
             "pearson",
-            "user_defined_metric"
+            "user_defined_metric",
         ]:  # cant optimize them directly
             objective = "RMSE"
     return objective

diff --git a/supervised/algorithms/lightgbm.py b/supervised/algorithms/lightgbm.py
@@ -21,7 +21,7 @@
     lightgbm_eval_metric_f1,
     lightgbm_eval_metric_average_precision,
     lightgbm_eval_metric_accuracy,
-    lightgbm_eval_metric_user_defined
+    lightgbm_eval_metric_user_defined,
 )
 from supervised.utils.config import LOG_LEVEL
 
@@ -155,7 +155,6 @@ def __init__(self, params):
             elif self.params["custom_eval_metric_name"] == "user_defined_metric":
                 self.custom_eval_metric = lightgbm_eval_metric_user_defined
 
-
         logger.debug("LightgbmLearner __init__")
 
     def file_extension(self):

diff --git a/supervised/tuner/mljar_tuner.py b/supervised/tuner/mljar_tuner.py
@@ -263,6 +263,7 @@ def generate_params(
             return []
         except Exception as e:
             import traceback
+
             print(str(e), traceback.format_exc())
             return []
 

diff --git a/supervised/tuner/optuna/lightgbm.py b/supervised/tuner/optuna/lightgbm.py
@@ -11,7 +11,7 @@
     lightgbm_eval_metric_f1,
     lightgbm_eval_metric_average_precision,
     lightgbm_eval_metric_accuracy,
-    lightgbm_eval_metric_user_defined
+    lightgbm_eval_metric_user_defined,
 )
 from supervised.algorithms.registry import BINARY_CLASSIFICATION
 from supervised.algorithms.registry import MULTICLASS_CLASSIFICATION

diff --git a/supervised/utils/additional_plots.py b/supervised/utils/additional_plots.py
@@ -153,6 +153,7 @@ def plots_multiclass(target, predicted_labels, predicted_probas):
                     "figure": fig,
                 }
             ]
+            plt.close("all")
         except Exception as e:
             print(str(e))
 
@@ -162,7 +163,7 @@ def plots_multiclass(target, predicted_labels, predicted_probas):
     def plots_regression(target, predictions):
         figures = []
         try:
-            MAX_SAMPLES = 1000
+            MAX_SAMPLES = 5000
             fig = plt.figure(figsize=(10, 7))
             ax1 = fig.add_subplot(1, 1, 1)
             samples = target.shape[0]
@@ -174,13 +175,41 @@ def plots_regression(target, predictions):
             plt.xlabel("True values")
             plt.ylabel("Predicted values")
             plt.title(f"Target values vs Predicted values (samples={samples})")
+            plt.tight_layout(pad=5.0)
             figures += [
                 {
                     "title": "True vs Predicted",
                     "fname": "true_vs_predicted.png",
                     "figure": fig,
                 }
             ]
+
+            # residual plot
+            fig = plt.figure(figsize=(10, 7))
+            ax1 = fig.add_subplot(1, 1, 1)
+            residuals = target[:samples].values - predictions[:samples].values
+            ax1.scatter(predictions[:samples], residuals, c="tab:blue", alpha=0.2)
+            plt.xlabel("Predicted values")
+            plt.ylabel("Residuals")
+            plt.title(f"Predicted values vs Residuals (samples={samples})")
+            plt.tight_layout(pad=5.0)
+            bb = ax1.get_position()
+
+            ax2 = fig.add_axes((bb.x0 + bb.size[0], bb.y0, 0.05, bb.size[1]))
+            ax2.set_xticklabels([])
+            ax2.set_yticklabels([])
+            ax2.hist(residuals, 50, orientation="horizontal", alpha=0.5)
+            ax2.axis("off")
+
+            figures += [
+                {
+                    "title": "Predicted vs Residuals",
+                    "fname": "predicted_vs_residuals.png",
+                    "figure": fig,
+                }
+            ]
+            plt.close("all")
+
         except Exception as e:
             print(str(e))
         return figures

diff --git a/supervised/utils/automl_plots.py b/supervised/utils/automl_plots.py
@@ -3,6 +3,7 @@
 import numpy as np
 import pandas as pd
 import scipy as sp
+
 logger = logging.getLogger(__name__)
 from supervised.utils.metric import Metric
 from supervised.utils.config import LOG_LEVEL
@@ -45,7 +46,6 @@ def add(results_path, models, fout):
                 f"![models spearman correlation]({AutoMLPlots.correlation_heatmap_fname})\n\n"
             )
 
-
     @staticmethod
     def models_feature_importance(results_path, models):
         try:
@@ -109,7 +109,6 @@ def models_feature_importance(results_path, models):
         except Exception as e:
             pass
 
-
     @staticmethod
     def correlation(oof1, oof2):
         cols = [c for c in oof1.columns if "prediction" in c]
@@ -135,9 +134,11 @@ def models_correlation(results_path, models):
 
             corrs = np.ones((len(names), len(names)))
             for i in range(len(names)):
-                for j in range(i+1, len(names)):
-                    corrs[i,j] = corrs[j,i] = AutoMLPlots.correlation(oofs[i], oofs[j])
-
+                for j in range(i + 1, len(names)):
+                    corrs[i, j] = corrs[j, i] = AutoMLPlots.correlation(
+                        oofs[i], oofs[j]
+                    )
+
             fig, ax = plt.subplots(1, 1, figsize=(10, 9))
 
             image = ax.imshow(
@@ -157,8 +158,10 @@ def models_correlation(results_path, models):
             ax.set_title("Spearman Correlation of Models")
 
             plt.tight_layout(pad=2.0)
-            plot_path = os.path.join(results_path, AutoMLPlots.correlation_heatmap_fname)
+            plot_path = os.path.join(
+                results_path, AutoMLPlots.correlation_heatmap_fname
+            )
             plt.savefig(plot_path)
             plt.close("all")
         except Exception as e:
-            pass
+            pass