# Example: simple polynomial regression

This illustrates how I want to use the inversion package on the simplest linear example.

In [None]:
%matplotlib inline
from __future__ import print_function, division
from future.builtins import super, range
import numpy as np
import matplotlib.pyplot as plt
from fatiando import utils
import cPickle as pickle
from inversion import LinearModel

In [None]:
class Regression(LinearModel):
    def __init__(self, degree, damping=None):
        # The solver is now independent of the data.
        super().__init__(nparams=(degree + 1), damping=damping)
        self.degree = degree
        
    def predict(self, x):
        return sum(self.p_[i]*x**i for i in range(self.nparams))
        
    def jacobian(self, x):
        print('Calculate jacobian')
        A = np.empty((x.size, self.nparams))
        for i in range(self.nparams):
            A[:, i] = x**i
        return A
    
    def fit(self, x, y, **kwargs):
        """
        Docstrings!
        """
        super().fit(data=y, aux=[x], **kwargs)
        return self
    
    @property
    def polynomial_(self):
        eq = ' + '.join(['{:g}x**{}'.format(self.p_[i], i) 
                         for i in range(self.nparams)])
        return eq

# Test on data

In [None]:
coefs = 1000, 10, 0, 1
n = 200
x = np.linspace(-100, 100, n)
y = a + b*x + c*x**2 + d*x**3
yo = utils.contaminate(y, 0.1, percent=True, seed=0)

In [None]:
plt.plot(x, yo, '.k')

In [None]:
reg = Regression(3)
reg.fit(x, yo).p_

In [None]:
reg.polynomial_
# Should print the equation

In [None]:
plt.plot(x, yo, '.k')
plt.plot(x, reg.predict(x), '-r')

In [None]:
# No more residuals, since reg doesn't store the data
_ = plt.hist(yo - reg.predict(x), 10, normed=True, color='grey')

In [None]:
# Can be pickled for use in parallel
reg = pickle.loads(pickle.dumps(reg))

In [None]:
# Config still works
plt.plot(x, yo, '.k')
for meth in ['levmarq', 'steepest', 'newton']:
    reg = Regression(3).set_solver(meth, initial=np.ones(len(coefs)))
    print(reg.fit(x, yo).polynomial_)
    plt.plot(x, reg.predict(x), '-', label=meth)
plt.legend()

In [None]:
# Config still works
reg.set_solver('acor', bounds=[0, 10000, 0, 10, 0, 1, 0, 10]).fit(x, yo).polynomial_

In [None]:
plt.plot(x, yo, '.k')
plt.plot(x, reg.predict(x), '-r')

Can use methods from [scipy.optimize.minimize](http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html#scipy.optimize.minimize).

In [None]:
print(reg.set_solver('Nelder-Mead', x0=np.ones(reg.nparams)).fit(x, yo).polynomial_)

plt.plot(x, yo, '.k')
plt.plot(x, reg.predict(x), '-r')

# Using cross-validation

In [None]:
rand = np.random.RandomState(42)
whole = rand.permutation(x.size)
divide = int(0.6*x.size)
train, test = whole[:divide], whole[divide:]

In [None]:
plt.plot(x[train], yo[train], '.k')

In [None]:
degrees = list(range(1, 20))
scores = [Regression(i).fit(x[train], yo[train]).score(x[test], yo[test])
          for i in degrees]
best = degrees[np.argmin(scores)]
best

In [None]:
scores

In [None]:
plt.plot(degrees, scores, '.-k')
#plt.yscale('log')

In [None]:
reg = Regression(best).fit(x, yo)
reg.estimate_

In [None]:
plt.plot(x, yo, '.k')
plt.plot(x, reg.predict(x), '-r')

# Re-weighted

In [None]:
yo2 = np.copy(yo)
yo2[rand.choice(yo.size, size=20)] += 1e6

In [None]:
plt.plot(x, yo2, '.k')

In [None]:
reg = Regression(3).fit(x, yo2)
reg.estimate_

In [None]:
plt.plot(x, yo2, '.k')
plt.plot(x, reg.predict(x), '-r')

In [None]:
reg2 = Regression(3).fit(x, yo2)
r = np.abs(yo2 - reg2.predict(x))
reg2.fit(x, yo2, weights=1/r).estimate_

In [None]:
plt.plot(x, yo2, '.k')
plt.plot(x, reg2.predict(x), '-r')
plt.plot(x, reg.predict(x), '-b')

In [None]:
reg3 = Regression(3).fit_reweighted(x, yo2, iterations=100)
reg3.estimate_

In [None]:
plt.plot(x, yo2, '.k')
plt.plot(x, reg3.predict(x), '-r')
plt.plot(x, reg.predict(x), '-b')

## Regularization

In [None]:
reg = 
ridge = Regression(20, damping=1)
ridge.fit(x, yo).polynomial_

In [None]:
mus = np.logspace(1, 15, 30)
A = Regression(3).jacobian(x[train])
scores = np.array(
     [Regression(3, damping=mu).fit(x[train], yo[train], jacobian=A[train]).mse(x[test], yo[test])
      for mu in mus])
mu = mus[np.argmin(scores)]

In [None]:
plt.plot(mus, scores)
plt.plot(mu, scores.min(), '^k')
plt.xscale('log')
plt.yscale('log')

In [None]:
ridge = Regression(3, damping=mu)
print(ridge.fit(x, yo).polynomial_)
plt.plot(x, yo, '.k')
plt.plot(x, ridge.predict(x), '-r')