|<h2>Substack post:</h2>|<h1><a href="https://mikexcohen.substack.com/p/nonlinear-curve-fitting-to-data-and" target="_blank">Nonlinear curve-fitting to data and distributions</a></h1>|
|-|:-:|
|<h2>Teacher:<h2>|<h1>Mike X Cohen, <a href="https://sincxpress.com" target="_blank">sincxpress.com</a></h1>|

<br>

<i>Using the code without reading the post may lead to confusion or errors.</i>

In [None]:
import numpy as np
import matplotlib.pyplot as plt

from scipy.optimize import curve_fit
import scipy.stats as stats

In [None]:
### Run this cell only if you're using "dark mode"

# svg plots (higher-res)
import matplotlib_inline.backend_inline
matplotlib_inline.backend_inline.set_matplotlib_formats('svg')

plt.rcParams.update({
    'figure.facecolor': '#282a2c',#'#171717', # 17 to match substack background
    'figure.edgecolor': '#282a2c',
    'axes.facecolor':   '#282a2c',
    'axes.edgecolor':   '#DDE2F4',
    'axes.labelcolor':  '#DDE2F4',
    'xtick.color':      '#DDE2F4',
    'ytick.color':      '#DDE2F4',
    'text.color':       '#DDE2F4',
    'axes.spines.right': False,
    'axes.spines.top':   False,
    'axes.titleweight': 'bold',
    'axes.labelweight': 'bold'
})

# **Demo 1: Fit a line**

In [None]:
def linear_fun(x,b0,b1):
  return b0 + x*b1

# create some data
x = np.linspace(0,5,50)
y = linear_fun(x,10,2)
y += np.random.normal(0,2,len(x))

# visualize them
plt.figure(figsize=(9,4))
plt.plot(x,y,'ko',markerfacecolor=[.9,.7,.9],markersize=10)
plt.gca().set(xlabel='x',ylabel='y',title='Simulated data')
plt.show()

In [None]:

# initial parameter guesses [beta0, beta1]
p0 = [0,1]

# fit the sigmoid function to data
est_params,param_cov = curve_fit(linear_fun,x,y,p0)

print('    Truth |  C-fit |  sterr')
print('---+------+--------+--------')
print(f'b0 |  10  |  {est_params[0]:5.2f} |  {np.sqrt(param_cov[0,0]):.3f}')
print(f'b1 |   2  |  {est_params[1]:5.2f} |  {np.sqrt(param_cov[1,1]):.3f}')

In [None]:
# confirm in statsmodels
import statsmodels.api as sm
model = sm.OLS(y,sm.add_constant(x)).fit()
print(model.summary())

In [None]:
# model predictions
yHat = linear_fun(x,*est_params)

# visualization
plt.figure(figsize=(11,4))

plt.plot(x,yHat,'g',linewidth=2,label='Model')
plt.plot(x,y,'ko',markerfacecolor=[.9,.7,.9],markersize=10,label='Data')

plt.legend(facecolor='k')
plt.gca().set(xlabel='x',ylabel='y',title=f'Sigmoid fit to data')

plt.show()

# **Demo 2: Fit a sigmoid**

In [None]:
#   A: maximum value
# x0: x-value of midpoint
#   k: curve steepness
#   b: minimum value
def sigmoid_fun(x,A,x0,k,b):
  return A / (1+np.exp(-k * (x-x0))) + b

# create some data
x = np.linspace(-10,10,50)
y = sigmoid_fun(x,10,1,1,2)
y += np.random.normal(0,2,len(x))

# visualize them
plt.figure(figsize=(11,4))
plt.plot(x,y,'ko',markerfacecolor=[.9,.7,.9],markersize=10)
plt.gca().set(xlabel='x',ylabel='y',title='Simulated data')
plt.show()

In [None]:
# initial parameter guesses [A, x0, k, b]
p0 = [max(y)-min(y), np.median(x), 1, np.mean(y)]

# fit the sigmoid function to data
est_params,param_cov = curve_fit(sigmoid_fun,x,y,p0)

print('    Truth | Estim. | sterr')
print('---+------+--------+-------')
print(f' A |  10  | {est_params[0]:5.2f}  |  {np.sqrt(param_cov[0,0]):.2f}')
print(f'x0 |   1  | {est_params[1]:5.2f}  |  {np.sqrt(param_cov[1,1]):.2f}')
print(f' k |   1  | {est_params[2]:5.2f}  |  {np.sqrt(param_cov[2,2]):.2f}')
print(f' b |   2  | {est_params[3]:5.2f}  |  {np.sqrt(param_cov[3,3]):.2f}')

In [None]:
# initial parameter guesses [A, x0, k, b]
p0 = [-20,20,-20,20]
p0 = np.random.randn(len(p0))

# fit the sigmoid function to data
est_params,param_cov = curve_fit(sigmoid_fun,x,y,p0)

print('    Truth | Estim. | sterr')
print('---+------+--------+-------')
print(f' A |  10  | {est_params[0]:5.2f}  |  {np.sqrt(param_cov[0,0]):.2f}')
print(f'x0 |   1  | {est_params[1]:5.2f}  |  {np.sqrt(param_cov[1,1]):.2f}')
print(f' k |   1  | {est_params[2]:5.2f}  |  {np.sqrt(param_cov[2,2]):.2f}')
print(f' b |   2  | {est_params[3]:5.2f}  |  {np.sqrt(param_cov[3,3]):.2f}')

In [None]:
# model predictions
yHat = sigmoid_fun(x,*est_params)

# visualization
plt.figure(figsize=(11,4))

plt.plot(x,yHat,'g',linewidth=2,label='Model')
plt.plot(x,y,'ko',markerfacecolor=[.9,.7,.9],markersize=10,label='Data')

plt.legend(facecolor='k')
plt.gca().set(xlabel='x',ylabel='y',title=f'Sigmoid fit to data')

plt.show()

# **Demo 3: Fit an ellipse**

In [None]:
### create data
# simulation parameters
x0 = 2
y0 = -1
a  = 5
b  = 3
theta = 4/np.pi

# parameteric theta values

# create the ellipse points
t = np.linspace(0,2*np.pi,100)
x = x0 + a*np.cos(t)*np.cos(theta) - b*np.sin(t)*np.sin(theta)
y = y0 + a*np.cos(t)*np.sin(theta) + b*np.sin(t)*np.cos(theta)

# add noise
x += np.random.normal(0,.5,len(t))
y += np.random.normal(0,.1,len(t))




### fit the data to an ellipse
def ellipse_fun(t,x0,y0,a,b,theta):
  # rotation parameters
  cos_th = np.cos(theta)
  sin_th = np.sin(theta)

  # the points
  x = x0 + a*np.cos(t)*cos_th - b*np.sin(t)*sin_th
  y = y0 + a*np.cos(t)*sin_th + b*np.sin(t)*cos_th

  # curve_fit expects one output
  return np.concatenate((x,y))

# Initial guess (x0,y0,a,b,theta)
p0 = [0,0,1,1,0]

# fit the data
est_params,param_cov = curve_fit(ellipse_fun,t,np.concatenate((x,y)),p0=p0)

# print the results
print('     Truth | Estim. | sterr')
print('---+-------+--------+-------')
print(f'x0 | {x0:4.1f}  | {est_params[0]:5.2f}  |  {np.sqrt(param_cov[0,0]):.2f}')
print(f'y0 | {y0:4.1f}  | {est_params[1]:5.2f}  |  {np.sqrt(param_cov[1,1]):.2f}')
print(f' a | {a:4.1f}  | {est_params[2]:5.2f}  |  {np.sqrt(param_cov[2,2]):.2f}')
print(f' b | {b:4.1f}  | {est_params[3]:5.2f}  |  {np.sqrt(param_cov[3,3]):.2f}')
print(f'th | {theta:4.1f}  | {est_params[4]:5.2f}  |  {np.sqrt(param_cov[4,4]):.2f}')

In [None]:
# model predictions
pred_XYvals = ellipse_fun(t,*est_params)
xHat = pred_XYvals[:len(t)]
yHat = pred_XYvals[len(t):]

# visualization
plt.figure(figsize=(6,4))

plt.plot(xHat,yHat,'g',linewidth=2,label='Model')
plt.plot(x,y,'ko',markerfacecolor=[.9,.7,.9],markersize=10,label='Data')

plt.legend(facecolor='k')
plt.gca().set(xlabel='x',ylabel='y',title=f'Ellipse fit to data')

plt.show()

# **Demo 4: Interp/extrapolate a data histogram**

In [None]:
### simulate data

# data parameters
peak_center = 1
noise_level = 2
samplesize = 80

# create some data
data = np.random.normal(peak_center,noise_level,samplesize)

# get the data histogram

histy,histx = np.histogram(data,bins='fd',density=True)
histx = (histx[:-1] + histx[1:]) / 2  # bin edges -> bin centers


### fit a Gaussian to the data histogram
def gauss_fun(x,A,x0,s):
  return A*np.exp(-(x-x0)**2/(2*s**2))

# initial parameter guesses and curve fitting
p0 = [np.max(histy),np.median(data),(histx.max()-histx.min())/2]
est_params,param_cov = curve_fit(gauss_fun,histx,histy,p0)

# predicted (interpolated and extrapolated) histogram
newx = np.linspace(histx[0]-10,histx[-1]+10,101)
yHat = gauss_fun(newx,*est_params)


### visualization
_,axs = plt.subplots(1,3,figsize=(12,3))
axs[0].plot(data,'ro',markersize=9,markeredgewidth=.3,markerfacecolor=[.9,.7,.9,.7])
axs[0].set(xlabel='Data index',ylabel='Data value',title='Raw data')

axs[1].bar(histx,histy,color=[.9,.7,.9],width=histx[1]-histx[0],edgecolor='k')
axs[1].set(xlabel='Data value',ylabel='Density',title='Data histogram',
           xlim=newx[[0,-1]])

axs[2].plot(newx,yHat,'g',linewidth=2,label='Model')
axs[2].plot(histx,histy,'ko',markerfacecolor=[.9,.7,.9],markersize=10,label='Data')

axs[2].legend(facecolor='k')
axs[2].set(xlabel='Data value',ylabel='Density',title='Histogram and Gaussian fit',
           xlim=newx[[0,-1]])

plt.tight_layout()
plt.show()