# Curve fitting with masked data
A quick demo of `scipy.optimize.curve_fit` to verify if it handles masked data (or not).

**Result: Manual trimming of data must be applied before passing to scipy fitting routines.**

In [None]:
%matplotlib inline
from __future__ import print_function
from scipy.optimize import curve_fit
import numpy as np
import matplotlib.pyplot as plt
import matplotlib

In [None]:
plt.rcParams['figure.figsize'] = 10, 8

In [None]:
np.random.seed(123)

In [None]:
m0 = 3.
c0 = 5.
sigma= 4.

x = np.arange(5,15)
y = m0*x +c0 + np.random.normal(scale=sigma,size=len(x))


In [None]:
# Add an outlier:
y[-1] = y[-1]+10

In [None]:
y_masked = np.ma.MaskedArray(data=y, mask=np.zeros_like(y))
y_masked.mask[-1] = True
y_masked

In [None]:
outliers = np.ma.masked_array(y, np.logical_not(y_masked.mask))
outliers

In [None]:
def straight_line(x, slope, intercept):
    return slope * x + intercept

Run a fit including the outliers:

In [None]:
fit_params, fit_cov =  curve_fit(straight_line, x, y)
m1, c1 = fit_params
y1 = c1 + m1*x
m1, c1

Unfortunately scipy.optimize.curve_fit does not make use of array masks, so we get the same result:

In [None]:
misleading_params, _= curve_fit(straight_line, x, y_masked)
misleading_params

We need to manually trim the data first:

In [None]:
y_trimmed = y[~y_masked.mask]
x_trimmed = x[~y_masked.mask]

trimmed_fit, _ =  curve_fit(straight_line, x_trimmed, y_trimmed)
m2, c2 = trimmed_fit
m2, c2

In [None]:
plt
ax = plt.gca()
ax.scatter(x,y_masked, label='Data')
ax.scatter(x,outliers, label='Outliers')
ax.plot(x,m0*x+c0, ls='--', label='True line')
ax.plot(x,m1*x + c1, ls='--', label='Fitted with outlier')
ax.plot(x,m2*x + c2, ls='--', label='Fitted to trimmed data')

ax.legend(loc='best')