sdpython
diff --git a/‎_unittests/ut_cli/test_cli_validate.py‎
Lines changed: 0 additions & 2 deletions b/‎_unittests/ut_cli/test_cli_validate.py‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎_unittests/ut_onnx_conv/test_scorers.py‎
Lines changed: 4 additions & 1 deletion b/‎_unittests/ut_onnx_conv/test_scorers.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎_unittests/ut_onnxrt/test_onnxrt_validate_2.py‎
Lines changed: 5 additions & 1 deletion b/‎_unittests/ut_onnxrt/test_onnxrt_validate_2.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎_unittests/ut_testing/data/plot_anomaly_comparison.py‎
Lines changed: 92 additions & 0 deletions b/‎_unittests/ut_testing/data/plot_anomaly_comparison.py‎
Lines changed: 92 additions & 0 deletions
diff --git a/‎_unittests/ut_testing/data/plot_iris_logistic.py‎
Lines changed: 40 additions & 0 deletions b/‎_unittests/ut_testing/data/plot_iris_logistic.py‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎_unittests/ut_testing/data/plot_isotonic_regression.py‎
Lines changed: 40 additions & 0 deletions b/‎_unittests/ut_testing/data/plot_isotonic_regression.py‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎_unittests/ut_testing/data/plot_kernel_ridge_regression.py‎
Lines changed: 132 additions & 0 deletions b/‎_unittests/ut_testing/data/plot_kernel_ridge_regression.py‎
Lines changed: 132 additions & 0 deletions
@@ -30,8 +30,6 @@ def test_cli_validate_model(self):
                    '--out_graph', gr],
              fLOG=st.fprint)
         res = str(st)
-        print('-----------------')
-        print(res)
         self.assertIn('Linear', res)
         self.assertExists(out1)
         self.assertExists(out2)
 
@@ -9,7 +9,10 @@
 # unittest_require_at_least
 from pyquickhelper.pycode import ExtTestCase, get_temp_folder
 from sklearn.metrics import make_scorer
-from sklearn.metrics.scorer import _PredictScorer
+try:
+    from sklearn.metrics._scorer import _PredictScorer
+except ImportError:
+    from sklearn.metrics.scorer import _PredictScorer
 from mlprodict.onnx_conv import to_onnx, register_scorers
 from mlprodict.onnxrt import OnnxInference
 from mlprodict.onnx_conv.scorers.cdist_score import score_cdist_sum
 
@@ -40,8 +40,12 @@ def test_n_features_int(self):
         self.assertEqualArray(X, X2)
         X2 = _modify_dimension(X, 6)
         self.assertEqualArray(X[:, 2:4], X2[:, 2:4])
-        self.assertNotEqualArray(X[:, :2], X2[:, :2])
         self.assertNotEqualArray(X[:, :2], X2[:, 4:6])
+        try:
+            self.assertNotEqualArray(X[:, :2], X2[:, :2])
+        except AssertionError as e:
+            raise AssertionError("Should be different\n{}\n{}".format(
+                X, X2)) from e
 
 
 if __name__ == "__main__":
 
@@ -0,0 +1,92 @@
+import time
+
+import numpy as np
+import matplotlib
+import matplotlib.pyplot as plt
+
+from sklearn import svm
+from sklearn.datasets import make_moons, make_blobs
+from sklearn.covariance import EllipticEnvelope
+from sklearn.ensemble import IsolationForest
+from sklearn.neighbors import LocalOutlierFactor
+
+matplotlib.rcParams['contour.negative_linestyle'] = 'solid'
+
+# Example settings
+n_samples = 300
+outliers_fraction = 0.15
+n_outliers = int(outliers_fraction * n_samples)
+n_inliers = n_samples - n_outliers
+
+# define outlier/anomaly detection methods to be compared
+anomaly_algorithms = [
+    ("Robust covariance", EllipticEnvelope(contamination=outliers_fraction)),
+    ("One-Class SVM", svm.OneClassSVM(nu=outliers_fraction, kernel="rbf",
+                                      gamma=0.1)),
+    ("Isolation Forest", IsolationForest(contamination=outliers_fraction,
+                                         random_state=42)),
+    ("Local Outlier Factor", LocalOutlierFactor(
+        n_neighbors=35, contamination=outliers_fraction))]
+
+# Define datasets
+blobs_params = dict(random_state=0, n_samples=n_inliers, n_features=2)
+datasets = [
+    make_blobs(centers=[[0, 0], [0, 0]], cluster_std=0.5,
+               **blobs_params)[0],
+    make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[0.5, 0.5],
+               **blobs_params)[0],
+    make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[1.5, .3],
+               **blobs_params)[0],
+    4. * (make_moons(n_samples=n_samples, noise=.05, random_state=0)[0] -
+          np.array([0.5, 0.25])),
+    14. * (np.random.RandomState(42).rand(n_samples, 2) - 0.5)]
+
+# Compare given classifiers under given settings
+xx, yy = np.meshgrid(np.linspace(-7, 7, 150),
+                     np.linspace(-7, 7, 150))
+
+plt.figure(figsize=(len(anomaly_algorithms) * 2 + 3, 12.5))
+plt.subplots_adjust(left=.02, right=.98, bottom=.001, top=.96, wspace=.05,
+                    hspace=.01)
+
+plot_num = 1
+rng = np.random.RandomState(42)
+
+for i_dataset, X in enumerate(datasets):
+    # Add outliers
+    X = np.concatenate([X, rng.uniform(low=-6, high=6,
+                                       size=(n_outliers, 2))], axis=0)
+
+    for name, algorithm in anomaly_algorithms:
+        t0 = time.time()
+        algorithm.fit(X)
+        t1 = time.time()
+        plt.subplot(len(datasets), len(anomaly_algorithms), plot_num)
+        if i_dataset == 0:
+            plt.title(name, size=18)
+
+        # fit the data and tag outliers
+        if name == "Local Outlier Factor":
+            y_pred = algorithm.fit_predict(X)
+        else:
+            y_pred = algorithm.fit(X).predict(X)
+
+        # plot the levels lines and the points
+        if name != "Local Outlier Factor":  # LOF does not implement predict
+            Z = algorithm.predict(np.c_[xx.ravel(), yy.ravel()])
+            Z = Z.reshape(xx.shape)
+            plt.contour(xx, yy, Z, levels=[0], linewidths=2, colors='black')
+
+        colors = np.array(['#377eb8', '#ff7f00'])
+        plt.scatter(X[:, 0], X[:, 1], s=10, color=colors[(y_pred + 1) // 2])
+
+        plt.xlim(-7, 7)
+        plt.ylim(-7, 7)
+        plt.xticks(())
+        plt.yticks(())
+        plt.text(.99, .01, ('%.2fs' % (t1 - t0)).lstrip('0'),
+                 transform=plt.gca().transAxes, size=15,
+                 horizontalalignment='right')
+        plot_num += 1
+
+plt.show()
@@ -0,0 +1,40 @@
+# -*- coding: utf-8 -*-
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.linear_model import LogisticRegression
+from sklearn import datasets
+
+# import some data to play with
+iris = datasets.load_iris()
+X = iris.data[:, :2]  # we only take the first two features.
+Y = iris.target
+
+logreg = LogisticRegression(C=1e5)
+
+# Create an instance of Logistic Regression Classifier and fit the data.
+logreg.fit(X, Y)
+
+# Plot the decision boundary. For that, we will assign a color to each
+# point in the mesh [x_min, x_max]x[y_min, y_max].
+x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
+y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
+h = .02  # step size in the mesh
+xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
+Z = logreg.predict(np.c_[xx.ravel(), yy.ravel()])
+
+# Put the result into a color plot
+Z = Z.reshape(xx.shape)
+plt.figure(1, figsize=(4, 3))
+plt.pcolormesh(xx, yy, Z)
+
+# Plot also the training points
+plt.scatter(X[:, 0], X[:, 1], c=Y, edgecolors='k')
+plt.xlabel('Sepal length')
+plt.ylabel('Sepal width')
+
+plt.xlim(xx.min(), xx.max())
+plt.ylim(yy.min(), yy.max())
+plt.xticks(())
+plt.yticks(())
+
+plt.show()
@@ -0,0 +1,40 @@
+import numpy as np
+import matplotlib.pyplot as plt
+from matplotlib.collections import LineCollection
+
+from sklearn.linear_model import LinearRegression
+from sklearn.isotonic import IsotonicRegression
+from sklearn.utils import check_random_state
+
+n = 100
+x = np.arange(n)
+rs = check_random_state(0)
+y = rs.randint(-50, 50, size=(n,)) + 50. * np.log1p(np.arange(n))
+print(y)
+
+# #############################################################################
+# Fit IsotonicRegression and LinearRegression models
+
+ir = IsotonicRegression()
+
+y_ = ir.fit_transform(x, y)
+
+lr = LinearRegression()
+lr.fit(x[:, np.newaxis], y)  # x needs to be 2d for LinearRegression
+
+# #############################################################################
+# Plot result
+
+print(y)
+segments = [[[i, y[i]], [i, y_[i]]] for i in range(n)]
+lc = LineCollection(segments, zorder=0)
+lc.set_array(np.ones(len(y)))
+
+fig = plt.figure()
+plt.plot(x, y, 'r.', markersize=12)
+plt.plot(x, y_, 'b.-', markersize=12)
+plt.plot(x, lr.predict(x[:, np.newaxis]), 'b-')
+plt.gca().add_collection(lc)
+plt.legend(('Data', 'Isotonic Fit', 'Linear Fit'), loc='lower right')
+plt.title('Isotonic regression')
+plt.show()
@@ -0,0 +1,132 @@
+import time
+
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.svm import SVR
+from sklearn.model_selection import GridSearchCV
+from sklearn.model_selection import learning_curve
+from sklearn.kernel_ridge import KernelRidge
+
+rng = np.random.RandomState(0)
+
+# #############################################################################
+# Generate sample data
+X = 5 * rng.rand(10000, 1)
+y = np.sin(X).ravel()
+
+# Add noise to targets
+y[::5] += 3 * (0.5 - rng.rand(X.shape[0] // 5))
+
+X_plot = np.linspace(0, 5, 100000)[:, None]
+
+# #############################################################################
+# Fit regression model
+train_size = 100
+svr = GridSearchCV(SVR(kernel='rbf', gamma=0.1),
+                   param_grid={"C": [1e0, 1e1, 1e2, 1e3],
+                               "gamma": np.logspace(-2, 2, 5)})
+
+kr = GridSearchCV(KernelRidge(kernel='rbf', gamma=0.1),
+                  param_grid={"alpha": [1e0, 0.1, 1e-2, 1e-3],
+                              "gamma": np.logspace(-2, 2, 5)})
+
+t0 = time.time()
+svr.fit(X[:train_size], y[:train_size])
+svr_fit = time.time() - t0
+print("SVR complexity and bandwidth selected and model fitted in %.3f s"
+      % svr_fit)
+
+t0 = time.time()
+kr.fit(X[:train_size], y[:train_size])
+kr_fit = time.time() - t0
+print("KRR complexity and bandwidth selected and model fitted in %.3f s"
+      % kr_fit)
+
+sv_ratio = svr.best_estimator_.support_.shape[0] / train_size
+print("Support vector ratio: %.3f" % sv_ratio)
+
+t0 = time.time()
+y_svr = svr.predict(X_plot)
+svr_predict = time.time() - t0
+print("SVR prediction for %d inputs in %.3f s"
+      % (X_plot.shape[0], svr_predict))
+
+t0 = time.time()
+y_kr = kr.predict(X_plot)
+kr_predict = time.time() - t0
+print("KRR prediction for %d inputs in %.3f s"
+      % (X_plot.shape[0], kr_predict))
+
+
+# #############################################################################
+# Look at the results
+sv_ind = svr.best_estimator_.support_
+plt.scatter(X[sv_ind], y[sv_ind], c='r', s=50, label='SVR support vectors',
+            zorder=2, edgecolors=(0, 0, 0))
+plt.scatter(X[:100], y[:100], c='k', label='data', zorder=1,
+            edgecolors=(0, 0, 0))
+plt.plot(X_plot, y_svr, c='r',
+         label='SVR (fit: %.3fs, predict: %.3fs)' % (svr_fit, svr_predict))
+plt.plot(X_plot, y_kr, c='g',
+         label='KRR (fit: %.3fs, predict: %.3fs)' % (kr_fit, kr_predict))
+plt.xlabel('data')
+plt.ylabel('target')
+plt.title('SVR versus Kernel Ridge')
+plt.legend()
+
+# Visualize training and prediction time
+plt.figure()
+
+# Generate sample data
+X = 5 * rng.rand(10000, 1)
+y = np.sin(X).ravel()
+y[::5] += 3 * (0.5 - rng.rand(X.shape[0] // 5))
+sizes = np.logspace(1, 4, 7).astype(np.int)
+for name, estimator in {"KRR": KernelRidge(kernel='rbf', alpha=0.1,
+                                           gamma=10),
+                        "SVR": SVR(kernel='rbf', C=1e1, gamma=10)}.items():
+    train_time = []
+    test_time = []
+    for train_test_size in sizes:
+        t0 = time.time()
+        estimator.fit(X[:train_test_size], y[:train_test_size])
+        train_time.append(time.time() - t0)
+
+        t0 = time.time()
+        estimator.predict(X_plot[:1000])
+        test_time.append(time.time() - t0)
+
+    plt.plot(sizes, train_time, 'o-', color="r" if name == "SVR" else "g",
+             label="%s (train)" % name)
+    plt.plot(sizes, test_time, 'o--', color="r" if name == "SVR" else "g",
+             label="%s (test)" % name)
+
+plt.xscale("log")
+plt.yscale("log")
+plt.xlabel("Train size")
+plt.ylabel("Time (seconds)")
+plt.title('Execution Time')
+plt.legend(loc="best")
+
+# Visualize learning curves
+plt.figure()
+
+svr = SVR(kernel='rbf', C=1e1, gamma=0.1)
+kr = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1)
+train_sizes, train_scores_svr, test_scores_svr = \
+    learning_curve(svr, X[:100], y[:100], train_sizes=np.linspace(0.1, 1, 10),
+                   scoring="neg_mean_squared_error", cv=10)
+train_sizes_abs, train_scores_kr, test_scores_kr = \
+    learning_curve(kr, X[:100], y[:100], train_sizes=np.linspace(0.1, 1, 10),
+                   scoring="neg_mean_squared_error", cv=10)
+
+plt.plot(train_sizes, -test_scores_svr.mean(1), 'o-', color="r",
+         label="SVR")
+plt.plot(train_sizes, -test_scores_kr.mean(1), 'o-', color="g",
+         label="KRR")
+plt.xlabel("Train size")
+plt.ylabel("Mean Squared Error")
+plt.title('Learning curves')
+plt.legend(loc="best")
+
+plt.show()