Skip to content
This repository was archived by the owner on Jan 13, 2024. It is now read-only.

Commit e4cbcf9

Browse files
committed
Fixes #70, partially checks every example in scikit-learn
1 parent df58cb4 commit e4cbcf9

File tree

14 files changed

+950
-10
lines changed

14 files changed

+950
-10
lines changed

_unittests/ut_cli/test_cli_validate.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,6 @@ def test_cli_validate_model(self):
3030
'--out_graph', gr],
3131
fLOG=st.fprint)
3232
res = str(st)
33-
print('-----------------')
34-
print(res)
3533
self.assertIn('Linear', res)
3634
self.assertExists(out1)
3735
self.assertExists(out2)

_unittests/ut_onnx_conv/test_scorers.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,10 @@
99
# unittest_require_at_least
1010
from pyquickhelper.pycode import ExtTestCase, get_temp_folder
1111
from sklearn.metrics import make_scorer
12-
from sklearn.metrics.scorer import _PredictScorer
12+
try:
13+
from sklearn.metrics._scorer import _PredictScorer
14+
except ImportError:
15+
from sklearn.metrics.scorer import _PredictScorer
1316
from mlprodict.onnx_conv import to_onnx, register_scorers
1417
from mlprodict.onnxrt import OnnxInference
1518
from mlprodict.onnx_conv.scorers.cdist_score import score_cdist_sum

_unittests/ut_onnxrt/test_onnxrt_validate_2.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,12 @@ def test_n_features_int(self):
4040
self.assertEqualArray(X, X2)
4141
X2 = _modify_dimension(X, 6)
4242
self.assertEqualArray(X[:, 2:4], X2[:, 2:4])
43-
self.assertNotEqualArray(X[:, :2], X2[:, :2])
4443
self.assertNotEqualArray(X[:, :2], X2[:, 4:6])
44+
try:
45+
self.assertNotEqualArray(X[:, :2], X2[:, :2])
46+
except AssertionError as e:
47+
raise AssertionError("Should be different\n{}\n{}".format(
48+
X, X2)) from e
4549

4650

4751
if __name__ == "__main__":
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
import time
2+
3+
import numpy as np
4+
import matplotlib
5+
import matplotlib.pyplot as plt
6+
7+
from sklearn import svm
8+
from sklearn.datasets import make_moons, make_blobs
9+
from sklearn.covariance import EllipticEnvelope
10+
from sklearn.ensemble import IsolationForest
11+
from sklearn.neighbors import LocalOutlierFactor
12+
13+
matplotlib.rcParams['contour.negative_linestyle'] = 'solid'
14+
15+
# Example settings
16+
n_samples = 300
17+
outliers_fraction = 0.15
18+
n_outliers = int(outliers_fraction * n_samples)
19+
n_inliers = n_samples - n_outliers
20+
21+
# define outlier/anomaly detection methods to be compared
22+
anomaly_algorithms = [
23+
("Robust covariance", EllipticEnvelope(contamination=outliers_fraction)),
24+
("One-Class SVM", svm.OneClassSVM(nu=outliers_fraction, kernel="rbf",
25+
gamma=0.1)),
26+
("Isolation Forest", IsolationForest(contamination=outliers_fraction,
27+
random_state=42)),
28+
("Local Outlier Factor", LocalOutlierFactor(
29+
n_neighbors=35, contamination=outliers_fraction))]
30+
31+
# Define datasets
32+
blobs_params = dict(random_state=0, n_samples=n_inliers, n_features=2)
33+
datasets = [
34+
make_blobs(centers=[[0, 0], [0, 0]], cluster_std=0.5,
35+
**blobs_params)[0],
36+
make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[0.5, 0.5],
37+
**blobs_params)[0],
38+
make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[1.5, .3],
39+
**blobs_params)[0],
40+
4. * (make_moons(n_samples=n_samples, noise=.05, random_state=0)[0] -
41+
np.array([0.5, 0.25])),
42+
14. * (np.random.RandomState(42).rand(n_samples, 2) - 0.5)]
43+
44+
# Compare given classifiers under given settings
45+
xx, yy = np.meshgrid(np.linspace(-7, 7, 150),
46+
np.linspace(-7, 7, 150))
47+
48+
plt.figure(figsize=(len(anomaly_algorithms) * 2 + 3, 12.5))
49+
plt.subplots_adjust(left=.02, right=.98, bottom=.001, top=.96, wspace=.05,
50+
hspace=.01)
51+
52+
plot_num = 1
53+
rng = np.random.RandomState(42)
54+
55+
for i_dataset, X in enumerate(datasets):
56+
# Add outliers
57+
X = np.concatenate([X, rng.uniform(low=-6, high=6,
58+
size=(n_outliers, 2))], axis=0)
59+
60+
for name, algorithm in anomaly_algorithms:
61+
t0 = time.time()
62+
algorithm.fit(X)
63+
t1 = time.time()
64+
plt.subplot(len(datasets), len(anomaly_algorithms), plot_num)
65+
if i_dataset == 0:
66+
plt.title(name, size=18)
67+
68+
# fit the data and tag outliers
69+
if name == "Local Outlier Factor":
70+
y_pred = algorithm.fit_predict(X)
71+
else:
72+
y_pred = algorithm.fit(X).predict(X)
73+
74+
# plot the levels lines and the points
75+
if name != "Local Outlier Factor": # LOF does not implement predict
76+
Z = algorithm.predict(np.c_[xx.ravel(), yy.ravel()])
77+
Z = Z.reshape(xx.shape)
78+
plt.contour(xx, yy, Z, levels=[0], linewidths=2, colors='black')
79+
80+
colors = np.array(['#377eb8', '#ff7f00'])
81+
plt.scatter(X[:, 0], X[:, 1], s=10, color=colors[(y_pred + 1) // 2])
82+
83+
plt.xlim(-7, 7)
84+
plt.ylim(-7, 7)
85+
plt.xticks(())
86+
plt.yticks(())
87+
plt.text(.99, .01, ('%.2fs' % (t1 - t0)).lstrip('0'),
88+
transform=plt.gca().transAxes, size=15,
89+
horizontalalignment='right')
90+
plot_num += 1
91+
92+
plt.show()
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# -*- coding: utf-8 -*-
2+
import numpy as np
3+
import matplotlib.pyplot as plt
4+
from sklearn.linear_model import LogisticRegression
5+
from sklearn import datasets
6+
7+
# import some data to play with
8+
iris = datasets.load_iris()
9+
X = iris.data[:, :2] # we only take the first two features.
10+
Y = iris.target
11+
12+
logreg = LogisticRegression(C=1e5)
13+
14+
# Create an instance of Logistic Regression Classifier and fit the data.
15+
logreg.fit(X, Y)
16+
17+
# Plot the decision boundary. For that, we will assign a color to each
18+
# point in the mesh [x_min, x_max]x[y_min, y_max].
19+
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
20+
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
21+
h = .02 # step size in the mesh
22+
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
23+
Z = logreg.predict(np.c_[xx.ravel(), yy.ravel()])
24+
25+
# Put the result into a color plot
26+
Z = Z.reshape(xx.shape)
27+
plt.figure(1, figsize=(4, 3))
28+
plt.pcolormesh(xx, yy, Z)
29+
30+
# Plot also the training points
31+
plt.scatter(X[:, 0], X[:, 1], c=Y, edgecolors='k')
32+
plt.xlabel('Sepal length')
33+
plt.ylabel('Sepal width')
34+
35+
plt.xlim(xx.min(), xx.max())
36+
plt.ylim(yy.min(), yy.max())
37+
plt.xticks(())
38+
plt.yticks(())
39+
40+
plt.show()
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import numpy as np
2+
import matplotlib.pyplot as plt
3+
from matplotlib.collections import LineCollection
4+
5+
from sklearn.linear_model import LinearRegression
6+
from sklearn.isotonic import IsotonicRegression
7+
from sklearn.utils import check_random_state
8+
9+
n = 100
10+
x = np.arange(n)
11+
rs = check_random_state(0)
12+
y = rs.randint(-50, 50, size=(n,)) + 50. * np.log1p(np.arange(n))
13+
print(y)
14+
15+
# #############################################################################
16+
# Fit IsotonicRegression and LinearRegression models
17+
18+
ir = IsotonicRegression()
19+
20+
y_ = ir.fit_transform(x, y)
21+
22+
lr = LinearRegression()
23+
lr.fit(x[:, np.newaxis], y) # x needs to be 2d for LinearRegression
24+
25+
# #############################################################################
26+
# Plot result
27+
28+
print(y)
29+
segments = [[[i, y[i]], [i, y_[i]]] for i in range(n)]
30+
lc = LineCollection(segments, zorder=0)
31+
lc.set_array(np.ones(len(y)))
32+
33+
fig = plt.figure()
34+
plt.plot(x, y, 'r.', markersize=12)
35+
plt.plot(x, y_, 'b.-', markersize=12)
36+
plt.plot(x, lr.predict(x[:, np.newaxis]), 'b-')
37+
plt.gca().add_collection(lc)
38+
plt.legend(('Data', 'Isotonic Fit', 'Linear Fit'), loc='lower right')
39+
plt.title('Isotonic regression')
40+
plt.show()
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
import time
2+
3+
import numpy as np
4+
import matplotlib.pyplot as plt
5+
from sklearn.svm import SVR
6+
from sklearn.model_selection import GridSearchCV
7+
from sklearn.model_selection import learning_curve
8+
from sklearn.kernel_ridge import KernelRidge
9+
10+
rng = np.random.RandomState(0)
11+
12+
# #############################################################################
13+
# Generate sample data
14+
X = 5 * rng.rand(10000, 1)
15+
y = np.sin(X).ravel()
16+
17+
# Add noise to targets
18+
y[::5] += 3 * (0.5 - rng.rand(X.shape[0] // 5))
19+
20+
X_plot = np.linspace(0, 5, 100000)[:, None]
21+
22+
# #############################################################################
23+
# Fit regression model
24+
train_size = 100
25+
svr = GridSearchCV(SVR(kernel='rbf', gamma=0.1),
26+
param_grid={"C": [1e0, 1e1, 1e2, 1e3],
27+
"gamma": np.logspace(-2, 2, 5)})
28+
29+
kr = GridSearchCV(KernelRidge(kernel='rbf', gamma=0.1),
30+
param_grid={"alpha": [1e0, 0.1, 1e-2, 1e-3],
31+
"gamma": np.logspace(-2, 2, 5)})
32+
33+
t0 = time.time()
34+
svr.fit(X[:train_size], y[:train_size])
35+
svr_fit = time.time() - t0
36+
print("SVR complexity and bandwidth selected and model fitted in %.3f s"
37+
% svr_fit)
38+
39+
t0 = time.time()
40+
kr.fit(X[:train_size], y[:train_size])
41+
kr_fit = time.time() - t0
42+
print("KRR complexity and bandwidth selected and model fitted in %.3f s"
43+
% kr_fit)
44+
45+
sv_ratio = svr.best_estimator_.support_.shape[0] / train_size
46+
print("Support vector ratio: %.3f" % sv_ratio)
47+
48+
t0 = time.time()
49+
y_svr = svr.predict(X_plot)
50+
svr_predict = time.time() - t0
51+
print("SVR prediction for %d inputs in %.3f s"
52+
% (X_plot.shape[0], svr_predict))
53+
54+
t0 = time.time()
55+
y_kr = kr.predict(X_plot)
56+
kr_predict = time.time() - t0
57+
print("KRR prediction for %d inputs in %.3f s"
58+
% (X_plot.shape[0], kr_predict))
59+
60+
61+
# #############################################################################
62+
# Look at the results
63+
sv_ind = svr.best_estimator_.support_
64+
plt.scatter(X[sv_ind], y[sv_ind], c='r', s=50, label='SVR support vectors',
65+
zorder=2, edgecolors=(0, 0, 0))
66+
plt.scatter(X[:100], y[:100], c='k', label='data', zorder=1,
67+
edgecolors=(0, 0, 0))
68+
plt.plot(X_plot, y_svr, c='r',
69+
label='SVR (fit: %.3fs, predict: %.3fs)' % (svr_fit, svr_predict))
70+
plt.plot(X_plot, y_kr, c='g',
71+
label='KRR (fit: %.3fs, predict: %.3fs)' % (kr_fit, kr_predict))
72+
plt.xlabel('data')
73+
plt.ylabel('target')
74+
plt.title('SVR versus Kernel Ridge')
75+
plt.legend()
76+
77+
# Visualize training and prediction time
78+
plt.figure()
79+
80+
# Generate sample data
81+
X = 5 * rng.rand(10000, 1)
82+
y = np.sin(X).ravel()
83+
y[::5] += 3 * (0.5 - rng.rand(X.shape[0] // 5))
84+
sizes = np.logspace(1, 4, 7).astype(np.int)
85+
for name, estimator in {"KRR": KernelRidge(kernel='rbf', alpha=0.1,
86+
gamma=10),
87+
"SVR": SVR(kernel='rbf', C=1e1, gamma=10)}.items():
88+
train_time = []
89+
test_time = []
90+
for train_test_size in sizes:
91+
t0 = time.time()
92+
estimator.fit(X[:train_test_size], y[:train_test_size])
93+
train_time.append(time.time() - t0)
94+
95+
t0 = time.time()
96+
estimator.predict(X_plot[:1000])
97+
test_time.append(time.time() - t0)
98+
99+
plt.plot(sizes, train_time, 'o-', color="r" if name == "SVR" else "g",
100+
label="%s (train)" % name)
101+
plt.plot(sizes, test_time, 'o--', color="r" if name == "SVR" else "g",
102+
label="%s (test)" % name)
103+
104+
plt.xscale("log")
105+
plt.yscale("log")
106+
plt.xlabel("Train size")
107+
plt.ylabel("Time (seconds)")
108+
plt.title('Execution Time')
109+
plt.legend(loc="best")
110+
111+
# Visualize learning curves
112+
plt.figure()
113+
114+
svr = SVR(kernel='rbf', C=1e1, gamma=0.1)
115+
kr = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1)
116+
train_sizes, train_scores_svr, test_scores_svr = \
117+
learning_curve(svr, X[:100], y[:100], train_sizes=np.linspace(0.1, 1, 10),
118+
scoring="neg_mean_squared_error", cv=10)
119+
train_sizes_abs, train_scores_kr, test_scores_kr = \
120+
learning_curve(kr, X[:100], y[:100], train_sizes=np.linspace(0.1, 1, 10),
121+
scoring="neg_mean_squared_error", cv=10)
122+
123+
plt.plot(train_sizes, -test_scores_svr.mean(1), 'o-', color="r",
124+
label="SVR")
125+
plt.plot(train_sizes, -test_scores_kr.mean(1), 'o-', color="g",
126+
label="KRR")
127+
plt.xlabel("Train size")
128+
plt.ylabel("Mean Squared Error")
129+
plt.title('Learning curves')
130+
plt.legend(loc="best")
131+
132+
plt.show()

0 commit comments

Comments
 (0)