# Optimization in Deep Learning

In [1]:
import sys

sys.path.append('../src')

In [2]:
import numpy as np
import jax.numpy as jnp
import jax

from optymus import Optimizer

### Simple neural network

In [3]:
class SimpleNeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        # Initialize weights and biases
        self.W1 = np.random.randn(input_size, hidden_size) * 0.01
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size) * 0.01
        self.b2 = np.zeros((1, output_size))

    def forward(self, X):
        # Forward pass
        self.Z1 = jnp.dot(X, self.W1) + self.b1
        self.A1 = jnp.tanh(self.Z1)
        self.Z2 = jnp.dot(self.A1, self.W2) + self.b2
        return self.Z2

    def loss(self, X, y):
        # Mean Squared Error Loss
        y_pred = self.forward(X)
        return jnp.mean((y_pred - y) ** 2)

    def get_params(self):
        # Get network parameters
        return [self.W1, self.b1, self.W2, self.b2]

    def set_params(self, params):
        # Set network parameters
        self.W1, self.b1, self.W2, self.b2 = params


In [4]:

def train_neural_network(X_train, y_train, nn, learning_rate=0.01, max_iter=100, tol=1e-4, verbose=True, optim='gradient_descent'):
    # Flatten the parameters to optimize
    params = nn.get_params()
    flat_params = np.concatenate([p.flatten() for p in params])

    def f_obj(flat_params):
        # Unflatten the parameters
        shapes = [p.shape for p in params]
        sizes = [np.prod(shape) for shape in shapes]
        new_params = []
        index = 0
        for size, shape in zip(sizes, shapes):
            new_params.append(flat_params[index:index + size].reshape(shape))
            index += size
        nn.set_params(new_params)
        return nn.loss(X_train, y_train)

    opt = Optimizer(f_obj=f_obj, x0=flat_params, learning_rate=learning_rate, max_iter=max_iter, tol=tol, verbose=verbose, method=optim)
    result = opt.get_results()

    # Set the optimal parameters back to the network
    shapes = [p.shape for p in params]
    sizes = [np.prod(shape) for shape in shapes]
    params = []
    index = 0
    for size, shape in zip(sizes, shapes):
        params.append(result['xopt'][index:index + size].reshape(shape))
        index += size
    nn.set_params(params)

    return result


In [5]:
# Generate some sample data
np.random.seed(0)
X_train = np.random.randn(100, 3)
y_train = np.dot(X_train, np.array([1.5, -2.0, 1.0])) + 0.5 * np.random.randn(100)

# Define the neural network
input_size = X_train.shape[1]
hidden_size = 5
output_size = 1
nn = SimpleNeuralNetwork(input_size, hidden_size, output_size)

# Train the neural network
result = train_neural_network(X_train, y_train, nn, learning_rate=0.01, max_iter=100, tol=1e-4, verbose=True, optim='powell')

# Print the results
print("Optimal parameters:", result['xopt'])
print("Minimum loss:", result['fmin'])
print("Number of iterations:", result['num_iter'])

Gradient Descent 0:   1%|          | 1/100 [00:02<04:42,  2.85s/it]

Optimal parameters: [-1.86560232e+00 -1.66547550e+00 -1.67026722e+00 -1.24296492e+00
 -4.37178516e-01 -7.85577189e-01 -6.89505199e-01 -6.75997213e-01
 -3.75554626e-01 -3.64309780e-01  3.02878074e-01  8.17808429e+00
  3.39393760e-01  2.52981315e+00  8.18217049e-01 -1.71098583e+01
 -3.24934367e+01 -1.55676768e+01 -1.95158962e+01 -1.60956517e+01
  2.46781829e-01  3.70799709e-03  1.42065618e-03  1.51999486e-02
  1.71958931e-02  1.63756928e-15]
Minimum loss: 8.34156836344146
Number of iterations: 1



