In [None]:
import matplotlib.pyplot as plt
import os
import time
import matplotlib
import numpy as np
import pandas as pd

Optimisation in science
<div>
<img src="http://www.jensuhlig.de/Kemm30/KEMM30_007.png" width="600">
</div>

How do you get there? or better,how do you get there without producing a lot of nonsense
Counting parameter. e.g. 10 peaks, each position, width, intensity =30 parameter plus background. So fitting is about intelligence. Think, $\textbf{optimize}$ the smallest amount of parameter starting with a good guess

One parameter optimisation (middle = mu)
<div>
<img src="http://www.jensuhlig.de/Kemm30/error_way.png" width="1000">
</div>

<div>
<img src="http://www.jensuhlig.de/Kemm30/KEMM30_008.jpg" width="900">
</div>

Before we can go into optimisation we have to talk about 

# Functions:

The reason is that you create a function and optimize the parameter of the function

Two ways to define functions: the clasical way using def

In [None]:
def my_function(s):  #this gives the name of the function. The brackets are mandatory (they do not need to contain anything) colon : is mandatory
    print(s) # here needs to be something (what is done in the function) and this needs to be intended
my_function('hello world')

def my_function(s='input to a function'): #with Standard input that is used if you do not give an input
    print(s)
my_function()

def my_function(s=None,a=['get','a','second','input']): #multiple input
    s=1  #inside the function you can use whatever variable you have given, and you can do to it whatever you want. If you have the same variable outside the function this will not be altered (namespace)
    return a # and a function can return something if it returns multipe things with "return a,b"  then the call must be c,d=my_function() 
catching_the_return = my_function('hello world')
' '.join(catching_the_return)

In [None]:
my_function?

A second way to create a function is with something called Lambda Functions. They are usually used if you need the function only once (e.g. when you want to plot it) and are useful for e.g. curve_fit (comes next) or for changing things in DataFrames with the df.apply approach (comes later) 



In [None]:
normalize = lambda x: (x-x.min())/(x.max()-x.min())
line = lambda x,p: p[0]+p[1]*x
x=np.arange(-2,5,0.1)
y=line(x,[1,3])
plt.plot(x,y)

### Task:

* Write a standard function that takes x, mu and sigma and returns a gaussian bell curve (with normalisation). Make sure that you use numpy operations only, so that you can give it a vector and receive a vector.<br> 
${\frac {1}{\sqrt {2\pi \sigma ^{2}}}}\operatorname {exp} \left(-{\frac {\left(x-\mu \right)^{2}}{2\sigma ^{2}}}\right)$
* plot this function with mu=0,sigma=1 from -5 to 5 in 0.01 steps 

* Write a lambda function that returns the normalized vector 
* use the cumsum function from numpy to create the cummulative sum of the gaussian from above and normalize it with your lambda function
* Plot this in the same plot (against x)
* What does this resemble?

# Parameter optimisation: Fitting vs. optimisation


## Curve Fit
Starting with curve_fit. We assume that we have a flat function that has a clear gradient to the minimum (see top of this page) then we can use curve_fit to fast measure the parameter we need

In [None]:
gauss = lambda x,mu=0,sigma=1,offset=0: 1/np.sqrt(2*np.pi*(sigma**2))*np.exp((-0.5/sigma**2)*(np.subtract(x,mu))**2)+offset
                                    
#create some data with some randomness and plot it
x=np.linspace(-5,5,200)
y=gauss(x,mu=0.5,sigma=0.5)+0.1*np.random.random(np.shape(x))
fig,ax=plt.subplots()
ax.plot(x,y,'o',ms=2,label='data')
ax.set_xlabel('measured value')
ax.set_ylabel('occurance')

In [None]:
from scipy.optimize import curve_fit
#make a guess
p0=[1,1,0.1]

#optimise
popt,pcov = curve_fit(gauss, xdata=x, ydata=y,p0=p0)

#plot both
ax.plot(x, gauss(x, mu=p0[0],sigma=p0[1],offset=p0[2]), 'b-', label='guess')
ax.plot(x, gauss(x, mu=popt[0],sigma=popt[1],offset=popt[2]), 'r-', label='fit')
ax.legend()

#get errors from the covariance matrix (works here, but careful)
perr = np.sqrt(np.diag(pcov))
df=pd.DataFrame({'values':popt,'errors':perr},index=['$\mu$','$\sigma$','$x_0$'])
df=df[['values','errors']]
df

The key bit
```
from scipy.optimize import curve_fit
popt,pcov = curve_fit(gauss, xdata=x, ydata=y,p0=[1,0.5,0.1])
```
curve fit is a least square method that takes a function, the target data and a set of starting parameters, that are in order the parameter after the first.
it returns: 
popt = optimized parameter
and
pcov = covariance matrix.
p_sigma = np.sqrt(np.diag(pcov))


<div>
<img src="http://www.jensuhlig.de/Kemm30/KEMM30_009.jpg" width="400">
</div>

### Tasks:

Read the file, Fit the file, plot both, data and fits:
What is the center position of the peak
http://www.jensuhlig.de/Kemm30/fit_0.csv

## Scipy minimize

Second way of optimisation uses in addition to the "cost function" an "error function". The task of the second function is to create a "price" for this parameter. The Minimize function is then minimizing this price.

In [None]:
def gauss(x,p): #the function that is your model
    [mu,sigma,offset]=p
    pre_factor=1/np.sqrt(2*np.pi*(sigma**2))
    exponent=(-0.5/sigma**2)*(np.subtract(x,mu))**2
    return pre_factor*np.exp(exponent)+offset
x=np.linspace(-5,5,200)
y=gauss(x,[0.5,0.5,0.2])
y=y+(y**0.5)*0.5*np.random.random(np.shape(x))
fig,ax=plt.subplots(figsize=(4,4))
ax.plot(x,y,'o',ms=5,label='data')
ax.set_xlabel('measured value')
ax.set_ylabel('occurance')

The new thing is that you need a second function that produces you a single "error" value

In [None]:
def min_gauss(p):# this would be the root mean square (we skip the root as the minimum is the minimum)
    return ((y-gauss(x,p))**2).sum()
def min_gauss_lin(p): #this is a different cost function that uses a more linear approach. It does not "punish" strong deviations as much. As such it has the tendency to be more outlier stable but preforms bad for peaks.
    return np.abs(y-gauss(x,p)).sum()

In [None]:
from scipy.optimize import minimize #import the minimization function
x0=[1.,0.5,0.1]
out = minimize(min_gauss,x0=x0,method='Nelder-Mead') #nelder-Mead is a standard multi-parameter optimiser. check out other choices.
out2 = minimize(min_gauss_lin,x0=x0,method='Nelder-Mead')

In [None]:
fig,ax=plt.subplots(figsize=(8,8))
ax.plot(x,y,'o',ms=5,label='data')
ax.set_xlabel('measured value')
ax.set_ylabel('occurance')
ax.plot(x, gauss(x, p=x0), color='blue',lw=5,alpha=0.5,label='start')
ax.plot(x, gauss(x, p=out['x']), color='red',linestyle='dashed', lw=5,label='fit_sqrt')
ax.plot(x, gauss(x, p=out2['x']), color='green',lw=5,zorder=0, label='fit_lin')
print(out)
plt.legend()

### Tasks:

Read the files, Fit files and plot both, data and fits:
* http://www.jensuhlig.de/Kemm30/fit_0.csv
* http://www.jensuhlig.de/Kemm30/fit_1.csv  here: try first a separate fit, in which you fit the linear range and then separately the peak.
* http://www.jensuhlig.de/Kemm30/fit_3.csv (two peaks and background)

### Fit the peaks

<div>
<img src="http://www.jensuhlig.de/Kemm30/2D_measured_indicated.png" width="200">
</div>

# Advanced

While scipy minimize is a very useful tool, it is still a bit difficult to handle parameters. A very nice tool that was developed for this lmfit.

In [None]:
import lmfit

def gauss_with_names(x,par): #the function that is your model
    pre_factor=1/np.sqrt(2*np.pi*(par['sigma']**2))
    exponent=(-0.5/par['sigma']**2)*(np.subtract(x,par['mu']))**2
    return pre_factor*np.exp(exponent)+par['offset']
def min_gauss(par,x,y):# this would be the root mean square (we skip the root as the minimum is the minimum)
    return ((y-gauss_with_names(x,par))**2).sum()

x=np.linspace(-5,5,200)
y=gauss(x,[0.5,0.5,0.2])
y=y+(y**0.5)*0.5*np.random.random(np.shape(x))


#first create a parameter object
par=lmfit.Parameters()                                       # create empty parameter object

par.add('mu',value=0,vary=True)                                # Add a parameter
par.add('sigma',value=0.5,vary=True)
par.add('offset',value=0.1,vary=True)

mini = lmfit.Minimizer(min_gauss,par,fcn_kws={'x':x,'y':y})
results = mini.minimize('nelder')
results