# Optimization

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

# Example 1: One parameter linear model
To illustrate the concept of optimization, we will first investigate fitting a model of the form
$$ \hat{y} = px $$
to a set of linear-looking data.

First, read in the example data set using `numpy`:

In [None]:
data_1 = np.genfromtxt(os.path.join('data','optimization_data',
                                    'linear_data.csv'),delimiter=',')

We can make a plot to take a peek at our data:

In [None]:
plt.figure()
plt.plot(data_1[:,0],data_1[:,1],'b.')
plt.gca().set_xlim([0, 1])
plt.gca().set_ylim([0, 2])
plt.grid(linestyle='--',linewidth=0.4)
plt.title('Data');

To fit our model to this data, we can calculate the sum of squares for every combination of possible slopes $p$: 

In [None]:
p_test = np.arange(0,4.01,0.01)
cost = np.zeros_like(p_test)
for pi in range(len(p_test)):
    cost[pi] = np.sum((p_test[pi]*data_1[:,0] - data_1[:,1])**2)

Next, we can plot the cost function and compare with the model fit. First, we write a function to make the plot and then pass it to the `interact` function below.

In [None]:
def plot_cost_and_model(p,p_test,cost):

    plt.figure(figsize=(14,5))

    plt.subplot(1,2,1)
#     plt.scatter(p_test,cost,'r-', label='cost')
    plt.scatter(p_test, cost, c=cost, cmap='turbo')
    index = np.argmin(np.abs(p_test-p))
    plt.plot(p,cost[index],'ko',markersize=10)
    plt.plot(p,cost[index],'wo',markersize=8)
    plt.gca().set_xlim([0,4])
    plt.gca().set_ylim([0, 14])
    plt.ylabel('cost: $\Sigma (\hat{y} - y)^2$')
    plt.xlabel('parameter: $p$')
    plt.grid(linestyle='--',linewidth=0.4)
    plt.grid(linestyle='--',linewidth=0.4)

    plt.subplot(1,2,2)
    x = np.arange(0,1.1,0.1)
    plt.plot(x,p*x,'k-', label='model: $\hat{y} = px$')
    plt.plot(data_1[:,0],data_1[:,1],'b.')
    plt.gca().set_xlim([0, 1])
    plt.gca().set_ylim([0, 2])
    plt.grid(linestyle='--',linewidth=0.4)
    plt.legend(loc=2)
    plt.grid(linestyle='--',linewidth=0.4)


Now, we can visualize how different models compare in both their cost and their visual fit to the data:

In [None]:
interact(plot_cost_and_model,
         p=widgets.FloatSlider(min=0, max=4, step=0.01, value=0.5),
         p_test=fixed(p_test), cost=fixed(cost));

## Example 2: Two Parameter Nonlinear Model
In this example, we will optimize the parameterize $c_1$ and $c_2$ in the following model:
$$ \hat{y} = c_0 e^{c_1t} $$
First, we'll read in an example data set:

In [None]:
data_2 = np.genfromtxt(os.path.join('data','optimization_data',
                                    'nonlinear_data.csv'), delimiter=',')

First, take a look at the data:

In [None]:
plt.plot(data_2[:,0], data_2[:,1],'ko',markersize=8)
plt.plot(data_2[:,0], data_2[:,1],'wo',markersize=7)
plt.grid(linestyle='--',linewidth=0.4)
plt.ylabel('Phytoplankton (cells/ml)')
plt.xlabel('Day');
plt.gca().set_xlim([0,26])
plt.gca().set_ylim([0,15e6]);

As above, we can calculate the cost of our model over a range of parameter values. Here, we explore the parameter space of $c_0$ and $c_1$:

In [None]:
c1_test = np.arange(0,1.01,0.01)
c0_test = np.arange(0,5e5,500)

cost = np.zeros((len(c1_test),len(c0_test)))
for c0i in range(len(c0_test)):
    for c1i in range(len(c1_test)):
        cost[c1i,c0i] = np.sum((c0_test[c0i]*np.exp(data_2[:,0]*c1_test[c1i]) - data_2[:,1])**2)

Similar to above, we will visually explore the parameter space by defining a plot function and then exploring it with the interact tool:

In [None]:
def plot_cost_and_model_2d(c0, c1, c0_test, c1_test, cost):

    plt.figure(figsize=(14,5))

    plt.subplot(1,2,1)
    C = plt.pcolormesh(c0_test, c1_test, np.log10(cost), cmap='turbo')
    cbar = plt.colorbar(C)
    cbar.set_label('log(error)')
    plt.plot(c0,c1,'ko',markersize=10)
    plt.plot(c0,c1,'wo',markersize=8)
    plt.xlabel('$c_0$')
    plt.ylabel('$c_1$')
    # plt.xscale("log")
    plt.gca().set_xlim([0,1])
    plt.gca().set_xlim([0,5e5])

    plt.subplot(1,2,2)
    plt.plot(data_2[:,0], data_2[:,1],'ko',markersize=8)
    plt.plot(data_2[:,0], data_2[:,1],'wo',markersize=7)
    t = np.arange(0,26)
    plt.plot(t,c0*np.exp(c1*t))
    plt.grid(linestyle='--',linewidth=0.4)
    plt.ylabel('Phytoplankton (cells/ml)')
    plt.xlabel('Day');
    plt.gca().set_xlim([0,26])
    plt.gca().set_ylim([0,15e6])

In [None]:
interact(plot_cost_and_model_2d,
         c0=widgets.FloatSlider(min=0, max=5e5, step=500, value=1.5e5),
         c1=widgets.FloatSlider(min=0, max=1, step=0.01, value=0.14),
         c0_test=fixed(c0_test), c1_test=fixed(c1_test), cost=fixed(cost));

Using the cost function, we can determine the two parameters that yield the lowest cost:

In [None]:
#find indices of lowest error
min_c1_index, min_c0_index = np.where(cost==np.min(cost))

c0_best = c0_test[min_c0_index][0]
c1_best = c1_test[min_c1_index][0]

print('c_0 = ',c0_best)
print('c_1 = ',c1_best)