Skip to content

Commit

Permalink
DOC: improve Thiel-Sen vs RANSAC example
Browse files Browse the repository at this point in the history
  • Loading branch information
GaelVaroquaux committed Mar 23, 2014
1 parent 39efe30 commit 689bf29
Showing 1 changed file with 46 additions and 45 deletions.
91 changes: 46 additions & 45 deletions examples/linear_model/plot_theilsen.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,70 +33,71 @@
# Author: Florian Wilhelm -- <florian.wilhelm@gmail.com>
# License: BSD 3 clause

from __future__ import division, print_function, absolute_import
import time
import numpy as np
import matplotlib.pylab as pl
from sklearn.linear_model import LinearRegression, TheilSen, RANSACRegressor

print(__doc__)

###################
# Theil-Sen / OLS #
###################
estimators = [('OLS', LinearRegression()),
('Theil-Sen', TheilSen()),
('RANSAC', RANSACRegressor(random_state=42)), ]

##############################################################################
# Outliers only in the y direction

# Generate 1d toy problem
np.random.seed(0)
n_samples = 100
n_samples = 200
# Linear model y = 3*x + N(2, 0.1**2)
x = np.random.randn(n_samples)
w = np.array([3.])
c = np.array([2.])
w = 3.
c = 2.
noise = 0.1 * np.random.randn(n_samples)
y = w * x + c + noise
# Add some outliers
x[42], y[42] = (-2, 4)
x[43], y[43] = (-2.5, 8)
x[53], y[53] = (2.5, 1)
x[60], y[60] = (2.1, 2)
x[72], y[72] = (1.8, -7)
# 10% outliers
y[-20:] += -20 * x[-20:]
X = x[:, np.newaxis]

# Ordinary Least Squares fit
lstq = LinearRegression().fit(X, y)
pl.plot(x, y, 'k+', mew=2, ms=8)
line_x = np.array([-3, 3])
y_pred_lstq = lstq.predict(line_x.reshape(2, 1))
# Theil-Sen fit
theilsen = TheilSen().fit(X, y)
y_pred_theilsen = theilsen.predict(line_x.reshape(2, 1))
# Plot
fig, ax = pl.subplots()
pl.scatter(X, y)
pl.plot(line_x, y_pred_lstq, label='OLS')
pl.plot(line_x, y_pred_theilsen, label='Theil-Sen')
ax.set_xlim(line_x)
for name, estimator in estimators:
t0 = time.time()
estimator.fit(X, y)
elapsed_time = time.time() - t0
y_pred = estimator.predict(line_x.reshape(2, 1))
pl.plot(line_x, y_pred,
label='%s (fit time: %.2fs)' % (name, elapsed_time))

pl.axis('tight')
pl.legend(loc=2)

######################
# Theil-Sen / RANSAC #
######################

# Construct special case
x = np.array([0.16, 0.16, 0.06, 0.87, 0.29,
0.28, 0.22, 0.11, 0.86, 0.34])
y = np.array([23.46, 29.91, 6.66, 99, 52.55,
44.9, 34.44, 15.31, 98, 61.34])
##############################################################################
# Outliers in the X direction

np.random.seed(0)
# Linear model y = 3*x + N(2, 0.1**2)
x = np.random.randn(n_samples)
noise = 0.1 * np.random.randn(n_samples)
y = 3 * x + 2 + noise
# 10% outliers
x[-20:] = 9.9
y[-20:] += 15
X = x[:, np.newaxis]
pred_X = np.array([0., 1.]).reshape(2, 1)

pl.figure()
pl.ylim(0, 100)
pl.plot(x, y, 'bo')
# RANSAC fit
rs = RANSACRegressor(random_state=42).fit(X, y)
pred_y = rs.predict(pred_X)
pl.plot(pred_X, pred_y, label="RANSAC")
# Theil-Sen fit
ts = TheilSen(random_state=42).fit(X, y)
pred_y = ts.predict(pred_X)
pl.plot(pred_X, pred_y, label="Theil-Sen")
pl.plot(x, y, 'k+', mew=2, ms=8)

line_x = np.array([-3, 10])
for name, estimator in estimators:
t0 = time.time()
estimator.fit(X, y)
elapsed_time = time.time() - t0
y_pred = estimator.predict(line_x.reshape(2, 1))
pl.plot(line_x, y_pred,
label='%s (fit time: %.2fs)' % (name, elapsed_time))

pl.axis('tight')
pl.legend(loc=2)
pl.show()

0 comments on commit 689bf29

Please sign in to comment.