# Running the Supervised Optimizer

From start to finish, on pretrained weights


In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
import numpy as np
import os
import json
from Ising import Ising
from model import TransformerModel
from optimizer_supervised import Optimizer

In [3]:
def gpu_setup():
    # Setup for PyTorch:
    if torch.cuda.is_available():
        torch_device = torch.device("cuda")
        print("PyTorch is using GPU {}".format(torch.cuda.current_device()))
    else:
        torch_device = torch.device("cpu")
        print("GPU unavailable; using CPU")

In [4]:
gpu_setup()

PyTorch is using GPU 0


In [5]:
torch.set_default_device("cuda")

In [6]:
import plotly.graph_objects as go
import numpy as np


def plot_tensor(tens, labels, opacity=0.7, size=5):

    x = np.arange(tens.shape[0])
    y = np.arange(tens.shape[1])
    z = np.arange(tens.shape[2])

    xlen = len(x)
    ylen = len(y)
    zlen = len(z)

    print(f"(x, y, z) = ({xlen}, {ylen}, {zlen})")

    X, Y, Z = np.meshgrid(x, y, z)

    color_function = np.vectorize(lambda x, y, z: tens[x, y, z])

    fig = go.Figure(
        data=[
            go.Scatter3d(
                x=X.flatten(),
                y=Y.flatten(),
                z=Z.flatten(),
                mode="markers",
                marker=dict(
                    size=size,
                    # color=tens.swapaxes(1, 2)
                    # .swapaxes(0, 2)
                    # .swapaxes(1, 2)
                    # .flatten(),  # set color to an array/list of desired values
                    color=color_function(X, Y, Z).flatten(),
                    colorscale="bupu",  # choose a colorscale
                    opacity=opacity,
                ),
            )
        ]
    )

    fig.update_layout(
        scene=dict(xaxis_title=labels[0], yaxis_title=labels[1], zaxis_title=labels[2]),
    )
    fig.show()

## New Probabilistic Batched Method


In [7]:
system_sizes = torch.arange(4, 4 + 2, 2).reshape(-1, 1)
Hamiltonians = [Ising(size, periodic=True) for size in system_sizes]
param_dim = Hamiltonians[0].param_dim
embedding_size = 32
n_head = 8
n_hid = embedding_size
n_layers = 8
dropout = 0
minibatch = 1000
param_range = None
point_of_interest = None
use_SR = False

print("Sizes:\n", system_sizes)
print("Dimensions of parameter space:", param_dim)
print("Number of units in a feedforward layer:", n_hid)

  return func(*args, **kwargs)
  return func(*args, **kwargs)


Sizes:
 tensor([[18]], device='cuda:0')
Dimensions of parameter space: 1
Number of units in a feedforward layer: 32


In [8]:
import math

gaussian_coeff = 1 / math.sqrt(2 * math.pi)
gaussian_mean = 1.0
gaussian_std = 0.05
probability_distribution = lambda param: gaussian_coeff * torch.exp(
    -0.5 * (((param - gaussian_mean) ** 2) / gaussian_std**2)
)

In [9]:
data_dir_path = os.path.join("TFIM_ground_states", "2024-07-24T19-26-39.836")
for ham in Hamiltonians:
    ham.load_dataset(data_dir_path, batch_size=30000, samples_in_epoch=100)
    ham.training_dataset.set_sampling_distribution(probability_distribution)

print("Hamiltonians:", Hamiltonians)

  return func(*args, **kwargs)


Loaded dataset for system size 18 from TFIM_ground_states/2024-07-24T19-26-39.836/18.arrow.
(h_min, h_step, h_max) = (0.5, 0.01, 1.5).
Hamiltonians: [<Ising.Ising object at 0x7d4f85601130>]


In [10]:
Hamiltonians[0].dataset

Unnamed: 0,N,h,energy,state
0,18,0.50,-19.143800,"[0.6074848784174569, 0.07720456699543793, 0.07..."
1,18,0.51,-19.190860,"[0.603442449533376, 0.07828052250174836, 0.078..."
2,18,0.52,-19.238925,"[0.5989958034035765, 0.07928533711807846, 0.07..."
3,18,0.53,-19.288001,"[0.5951097360014603, 0.08034609824462283, 0.08..."
4,18,0.54,-19.338093,"[0.5908188007555905, 0.08133439307972481, 0.08..."
...,...,...,...,...
96,18,1.46,-29.465884,"[0.04926065985698775, 0.02149227513715912, 0.0..."
97,18,1.47,-29.622582,"[0.04807624290278999, 0.021117536779159472, 0...."
98,18,1.48,-29.779636,"[0.04693857781807465, 0.020755229985387716, 0...."
99,18,1.49,-29.937037,"[0.045845243723527766, 0.020404784899805916, 0..."


In [11]:
testmodel = TransformerModel(
    system_sizes,
    param_dim,
    embedding_size,
    n_head,
    n_hid,
    n_layers,
    dropout=dropout,
    minibatch=minibatch,
)

results_dir = "results"
paper_checkpoint_name = "ckpt_100000_Ising_32_8_8_0.ckpt"
paper_checkpoint_path = os.path.join(results_dir, paper_checkpoint_name)
checkpoint = torch.load(paper_checkpoint_path)
testmodel.load_state_dict(checkpoint)

  return func(*args, **kwargs)


<All keys matched successfully>

In [12]:
testmodel.cuda()

TransformerModel(
  (pos_encoder): TQSPositionalEncoding1D(
    (dropout): Dropout(p=0, inplace=False)
  )
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-7): 8 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): Linear(in_features=32, out_features=32, bias=True)
          (linear_Q): Linear(in_features=32, out_features=32, bias=True)
          (linear_K): Linear(in_features=32, out_features=32, bias=True)
          (linear_V): Linear(in_features=32, out_features=32, bias=True)
        )
        (linear1): Linear(in_features=32, out_features=32, bias=True)
        (dropout): Dropout(p=0, inplace=False)
        (linear2): Linear(in_features=32, out_features=32, bias=True)
        (norm1): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0, inplace=False)
        (dropout2): Dropout(p=0, inplace=False)
      )
    )


In [13]:
from optimizer_supervised_batches import Optimizer

In [14]:
opt = Optimizer(testmodel, Hamiltonians, point_of_interest=point_of_interest)

In [15]:
plot_tensor(
    Hamiltonians[0].training_dataset.sampled.unsqueeze(1).cpu().numpy(),
    ["batch", "system size", "parameter"],
    opacity=0.5,
    size=3,
)

(x, y, z) = (101, 1, 262144)


KeyboardInterrupt: 

In [16]:
opt.train(epochs=1, start_iter=0)

RuntimeError: number of categories cannot exceed 2^24

In [33]:
Hamiltonians[0].training_dataset.sampled

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]], device='cuda:0')

In [34]:
plot_tensor(
    Hamiltonians[0].training_dataset.sampled.unsqueeze(1).cpu().numpy(),
    ["batch", "system size", "parameter"],
    opacity=0.5,
    size=3,
)

(x, y, z) = (101, 1, 16)


# Old Non-Batched Method


In [None]:
system_sizes = torch.arange(2, 16 + 1, 2).reshape(-1, 1)
Hamiltonians = [Ising(size, periodic=True) for size in system_sizes]
param_dim = Hamiltonians[0].param_dim
embedding_size = 32
n_head = 8
n_hid = embedding_size
n_layers = 8
dropout = 0
minibatch = 1000
param_range = None
point_of_interest = None
use_SR = False

Hamiltonians = [Ising(L) for L in system_sizes]
data_dir_path = os.path.join("TFIM_ground_states", "2024-07-24T19-26-39.836")
for ham in Hamiltonians:
    ham.load_dataset(data_dir_path)

print("Sizes:\n", system_sizes)
print("Hamiltonians:", Hamiltonians)
print("Dimensions of parameter space:", param_dim)
print("Number of units in a feedforward layer:", n_hid)

  return func(*args, **kwargs)


KeyboardInterrupt: 

In [None]:
testmodel = TransformerModel(
    system_sizes,
    param_dim,
    embedding_size,
    n_head,
    n_hid,
    n_layers,
    dropout=dropout,
    minibatch=minibatch,
)

results_dir = "results"
paper_checkpoint_name = "ckpt_100000_Ising_32_8_8_0.ckpt"
paper_checkpoint_path = os.path.join(results_dir, paper_checkpoint_name)
checkpoint = torch.load(paper_checkpoint_path)
testmodel.load_state_dict(checkpoint)



<All keys matched successfully>

In [None]:
testmodel.cuda()

TransformerModel(
  (pos_encoder): TQSPositionalEncoding1D(
    (dropout): Dropout(p=0, inplace=False)
  )
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-7): 8 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): Linear(in_features=32, out_features=32, bias=True)
          (linear_Q): Linear(in_features=32, out_features=32, bias=True)
          (linear_K): Linear(in_features=32, out_features=32, bias=True)
          (linear_V): Linear(in_features=32, out_features=32, bias=True)
        )
        (linear1): Linear(in_features=32, out_features=32, bias=True)
        (dropout): Dropout(p=0, inplace=False)
        (linear2): Linear(in_features=32, out_features=32, bias=True)
        (norm1): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0, inplace=False)
        (dropout2): Dropout(p=0, inplace=False)
      )
    )


In [None]:
opt = Optimizer(testmodel, Hamiltonians, point_of_interest=point_of_interest)

  return func(*args, **kwargs)
  """


RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!

In [None]:
import cProfile
import pstats

In [None]:
epochs = 3
param_range = torch.tensor([[0.5, 1.5]])
param_step = torch.tensor([0.01])

In [None]:
testmodel.minibatch = 1000

In [None]:
# with cProfile.Profile() as pr:
# with torch.autograd.profiler.profile(use_cuda=True) as prof:
opt.train(epochs=epochs, param_range=param_range, param_step=param_step, start_iter=0)

Ran forward for tensor([2], device='cuda:0') spins at point (0.5,)
Ran forward for tensor([2], device='cuda:0') spins at point (0.5099999997764826,)
Ran forward for tensor([2], device='cuda:0') spins at point (0.5199999995529652,)
Ran forward for tensor([2], device='cuda:0') spins at point (0.5299999993294477,)
Ran forward for tensor([2], device='cuda:0') spins at point (0.5399999991059303,)
Ran forward for tensor([2], device='cuda:0') spins at point (0.5499999988824129,)
Ran forward for tensor([2], device='cuda:0') spins at point (0.5599999986588955,)
Ran forward for tensor([2], device='cuda:0') spins at point (0.5699999984353781,)
Ran forward for tensor([2], device='cuda:0') spins at point (0.5799999982118607,)
Ran forward for tensor([2], device='cuda:0') spins at point (0.5899999979883432,)
Ran forward for tensor([2], device='cuda:0') spins at point (0.5999999977648258,)
Ran forward for tensor([2], device='cuda:0') spins at point (0.6099999975413084,)
Ran forward for tensor([2], dev

  """


OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 