In [2]:
import sys

sys.path.append('../src')

In [3]:
import numpy as np
import jax.numpy as jnp
import jax

from optymus import Optimizer

In [4]:
class SimpleNeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        # Initialize weights and biases
        self.W1 = np.random.randn(input_size, hidden_size) * 0.01
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size) * 0.01
        self.b2 = np.zeros((1, output_size))

    def forward(self, X):
        # Forward pass
        self.Z1 = jnp.dot(X, self.W1) + self.b1
        self.A1 = jnp.tanh(self.Z1)
        self.Z2 = jnp.dot(self.A1, self.W2) + self.b2
        return self.Z2

    def loss(self, X, y):
        # Mean Squared Error Loss
        y_pred = self.forward(X)
        return jnp.mean((y_pred - y) ** 2)

    def get_params(self):
        # Get network parameters
        return [self.W1, self.b1, self.W2, self.b2]

    def set_params(self, params):
        # Set network parameters
        self.W1, self.b1, self.W2, self.b2 = params


In [14]:

def train_neural_network(X_train, y_train, nn, learning_rate=0.01, max_iter=100, tol=1e-4, verbose=True, optim='gradient_descent'):
    # Flatten the parameters to optimize
    params = nn.get_params()
    flat_params = np.concatenate([p.flatten() for p in params])

    def f_obj(flat_params):
        # Unflatten the parameters
        shapes = [p.shape for p in params]
        sizes = [np.prod(shape) for shape in shapes]
        new_params = []
        index = 0
        for size, shape in zip(sizes, shapes):
            new_params.append(flat_params[index:index + size].reshape(shape))
            index += size
        nn.set_params(new_params)
        return nn.loss(X_train, y_train)
    
    opt = Optimizer(f_obj=f_obj, x0=flat_params, learning_rate=learning_rate, max_iter=max_iter, tol=tol, verbose=verbose, method=optim)
    result = opt.get_results()
    
    # Set the optimal parameters back to the network
    shapes = [p.shape for p in params]
    sizes = [np.prod(shape) for shape in shapes]
    params = []
    index = 0
    for size, shape in zip(sizes, shapes):
        params.append(result['xopt'][index:index + size].reshape(shape))
        index += size
    nn.set_params(params)
    
    return result


In [15]:
# Generate some sample data
np.random.seed(0)
X_train = np.random.randn(100, 3)
y_train = np.dot(X_train, np.array([1.5, -2.0, 1.0])) + 0.5 * np.random.randn(100)

# Define the neural network
input_size = X_train.shape[1]
hidden_size = 5
output_size = 1
nn = SimpleNeuralNetwork(input_size, hidden_size, output_size)

# Train the neural network
result = train_neural_network(X_train, y_train, nn, learning_rate=0.01, max_iter=100, tol=1e-4, verbose=True, optim='gradient_descent')

# Print the results
print("Optimal parameters:", result['xopt'])
print("Minimum loss:", result['fmin'])
print("Number of iterations:", result['num_iter'])

Adam 0:   0%|          | 0/100 [00:00<?, ?it/s]

Adam 0: 100%|██████████| 100/100 [00:01<00:00, 84.30it/s]

Optimal parameters: [ 0.01977234 -0.00582409  0.04228204  0.011235   -0.01882133  0.02333755
  0.00631271  0.02514589 -0.00213405 -0.01806712 -0.04017076  0.00631022
 -0.05040128 -0.00588307  0.0253066  -0.1451187  -0.18447872 -0.00876499
 -0.17868824 -0.19668901  0.09780558  0.12132452  0.00746448  0.11234963
  0.13512725 -0.2021622 ]
Minimum loss: 8.341569418836764
Number of iterations: 100



