In [1]:
%cd ../..

/Users/ryandevera/data-science/umn_environments/Deeplifting


In [2]:
%ls

[1m[36mLJ_data[m[m/                        [1m[36mhigh-dimension-paper-results[m[m/
LJ_data.tar                     [1m[36mhigh-dimension-search-results[m[m/
README.md                       [1m[36mimages[m[m/
[1m[36m__pycache__[m[m/                    [1m[36mjobs[m[m/
[1m[36malgorithm_compare_results[m[m/      [1m[36mlow-dimension-search-results[m[m/
config.py                       [1m[36mmodels[m[m/
[1m[36mdata[m[m/                           [1m[36mpaper-images[m[m/
[1m[36mdata-queue[m[m/                     requirements.txt
[1m[36mdata-queue-2023-09-24[m[m/          [1m[36mresults[m[m/
[1m[36mdata-queue-2023-10-18[m[m/          [1m[36msearch_results[m[m/
[1m[36mdeeplifting[m[m/                    tasks.py
deeplifting.png                 test-low-dimension-results.png
[1m[36mexperiments[m[m/                    [1m[36mwandb[m[m/


In [3]:
import numpy as np
import pandas as pd
import torch
import torch.optim as optim
import torch.nn as nn

from deeplifting.problems import PROBLEMS_BY_NAME
from deeplifting.models import DeepliftingSkipMLP
from deeplifting.optimization import deeplifting_predictions
from deeplifting.utils import initialize_vector

In [4]:
problem = PROBLEMS_BY_NAME['bukin_n6']
output_size = problem['dimensions']

# Set up the deeplifting model
input_size = 1
hidden_sizes = (128,) * 3
bounds = problem['bounds']
activation = 'sine'
output_activation = 'leaky_relu'
agg_function = 'sum'
trial = 0

model = DeepliftingSkipMLP(
    input_size=input_size,
    hidden_sizes=hidden_sizes,
    output_size=output_size,
    bounds=None,
    skip_every_n=1,
    activation=activation,
    output_activation=output_activation,
    agg_function=agg_function,
    include_bn=True,
    seed=trial,
)

# Need to setup an objective
results = np.zeros((1, 1, 3))
trial = 0

objective = problem['objective']
fn = lambda x: objective(x, results=results, trial=trial, version='pytorch')

In [5]:
inputs = torch.randn(1, 5 * output_size).to(dtype=torch.double)
model = model.to(dtype=torch.double)
outputs = model(inputs=inputs)
outputs.flatten()

tensor([-0.0341, -0.0048], dtype=torch.float64,
       grad_fn=<ReshapeAliasBackward0>)

In [6]:
x0 = initialize_vector(size=output_size, bounds=bounds)
x0 = torch.from_numpy(x0)
x0 = x0.to(dtype=torch.double)

In [7]:
model(inputs=inputs).mean(axis=0)

tensor([-0.0341, -0.0048], dtype=torch.float64, grad_fn=<MeanBackward1>)

In [8]:
for name, parameters in model.named_parameters():
    print(name, parameters)
    # if 'output_layer' in name:
    #     print(name, parameters)

layers.0.activation_layer.amplitude Parameter containing:
tensor([3.1416], dtype=torch.float64, requires_grad=True)
layers.0.activation_layer.scale Parameter containing:
tensor([1.], dtype=torch.float64, requires_grad=True)
layers.0.activation_layer.shift Parameter containing:
tensor([0.], dtype=torch.float64, requires_grad=True)
layers.0.activation_layer.y_shift Parameter containing:
tensor([0.], dtype=torch.float64, requires_grad=True)
layers.0.linear.weight Parameter containing:
tensor([[-0.1218,  0.0848, -0.0063,  ..., -0.0622, -0.3021, -0.2094],
        [-0.1304,  0.0117,  0.1250,  ...,  0.2626, -0.0651,  0.2366],
        [-0.0510,  0.0335,  0.2863,  ...,  0.2732, -0.2050, -0.1456],
        ...,
        [ 0.1591,  0.1206,  0.2451,  ...,  0.0331,  0.2700, -0.2300],
        [ 0.1038,  0.1397,  0.0829,  ...,  0.2652, -0.2947, -0.1706],
        [-0.0676, -0.0939,  0.2921,  ..., -0.0366,  0.0462,  0.0822]],
       dtype=torch.float64, requires_grad=True)
layers.0.linear.bias Parameter 

In [9]:
from torch.optim.lr_scheduler import OneCycleLR

# # Freeze all layers except the output layer
# for name, parameters in model.named_parameters():
#     if (
#         'output_layer' not in name
#     ):  # assuming 'layer2' is the output layer, adjust if otherwise
#         parameters.requires_grad = False

# Begin training
model.train()
optimizer = optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-4)

epochs = 100000
tolerance = 1e-5

scheduler = OneCycleLR(
    optimizer,
    max_lr=1e-4,
    epochs=epochs,
    steps_per_epoch=1,
    pct_start=0.1,
)
criterion = nn.MSELoss()

for epoch in range(epochs):
    optimizer.zero_grad()  # Zero gradients
    outputs = model(inputs)  # Get model outputs for the input
    outputs = outputs.mean(axis=0).flatten()  # Flatten the output tensor if needed
    loss = criterion(x0, outputs)  # Compute loss
    loss.backward()  # Backward pass
    optimizer.step()  # Update parameters
    scheduler.step()

    # Check L2 distance
    l2_distance = torch.norm(outputs - x0, p=2).item()

    # Print loss and L2 distance every 100 epochs
    if (epoch + 1) % 10000 == 0:
        print(
            f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}'
            f', L2 Distance: {l2_distance:.4e}'
        )

    # Check the stopping criterion
    if l2_distance < tolerance:
        print(
            f'Training converged at epoch {epoch+1} with'
            f' L2 Distance: {l2_distance:.4e}'
        )
        break

print(f'Initial x0 in fn = {x0}')
print(f'Fitted x0 in fn = {outputs}')

Epoch [10000/100000], Loss: 0.5343, L2 Distance: 1.0337e+00
Epoch [20000/100000], Loss: 0.0000, L2 Distance: 1.0911e-04
Training converged at epoch 24567 with L2 Distance: 8.8606e-06
Initial x0 in fn = tensor([-9.5119,  1.2911], dtype=torch.float64)
Fitted x0 in fn = tensor([-9.5119,  1.2911], dtype=torch.float64, grad_fn=<MeanBackward1>)


In [10]:
for name, parameters in model.named_parameters():
    # if 'output_layer' in name:
    print(name, parameters)

layers.0.activation_layer.amplitude Parameter containing:
tensor([3.0910], dtype=torch.float64, requires_grad=True)
layers.0.activation_layer.scale Parameter containing:
tensor([0.9834], dtype=torch.float64, requires_grad=True)
layers.0.activation_layer.shift Parameter containing:
tensor([-0.0030], dtype=torch.float64, requires_grad=True)
layers.0.activation_layer.y_shift Parameter containing:
tensor([0.], dtype=torch.float64, requires_grad=True)
layers.0.linear.weight Parameter containing:
tensor([[-0.1211,  0.0848, -0.0045,  ..., -0.0626, -0.2980, -0.2037],
        [-0.1265,  0.0101,  0.1213,  ...,  0.2589, -0.0625,  0.2307],
        [-0.0484,  0.0312,  0.2792,  ...,  0.2696, -0.1994, -0.1444],
        ...,
        [ 0.1577,  0.1167,  0.2388,  ...,  0.0340,  0.2664, -0.2272],
        [ 0.1139,  0.1249,  0.0692,  ...,  0.2722, -0.2769, -0.1795],
        [-0.0694, -0.0891,  0.2895,  ..., -0.0389,  0.0423,  0.0837]],
       dtype=torch.float64, requires_grad=True)
layers.0.linear.bias P