In [20]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [21]:
import torch
import numpy as np
from optimizer_supervised import Optimizer
from Ising import Ising
from model import TransformerModel

In [22]:
def gpu_setup():
    # Setup for PyTorch:
    if torch.cuda.is_available():
        torch_device = torch.device("cuda")
        print("PyTorch is using GPU {}".format(torch.cuda.current_device()))
    else:
        torch_device = torch.device("cpu")
        print("GPU unavailable; using CPU")

In [23]:
gpu_setup()
torch.set_default_device("cuda")

PyTorch is using GPU 0


In [24]:
system_sizes = torch.arange(2, 21, 2, device="cpu").reshape(-1, 1)
system_sizes

tensor([[ 2],
        [ 4],
        [ 6],
        [ 8],
        [10],
        [12],
        [14],
        [16],
        [18],
        [20]])

In [25]:
Hamiltonians = [Ising(size, periodic=True) for size in system_sizes]

  return func(*args, **kwargs)


In [26]:
param_dim = Hamiltonians[0].param_dim
embedding_size = 32
n_head = 8
n_hid = embedding_size
n_layers = 8
dropout = 0
minibatch = 1000
param_range = None
point_of_interest = None
use_SR = False

In [27]:
# Small allocation for model parameters, layers, etc.
testmodel = TransformerModel(
    system_sizes,
    param_dim,
    embedding_size,
    n_head,
    n_hid,
    n_layers,
    dropout=dropout,
    minibatch=minibatch,
)



In [28]:
testmodel.cuda()

TransformerModel(
  (pos_encoder): TQSPositionalEncoding1D(
    (dropout): Dropout(p=0, inplace=False)
  )
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-7): 8 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): Linear(in_features=32, out_features=32, bias=True)
          (linear_Q): Linear(in_features=32, out_features=32, bias=True)
          (linear_K): Linear(in_features=32, out_features=32, bias=True)
          (linear_V): Linear(in_features=32, out_features=32, bias=True)
        )
        (linear1): Linear(in_features=32, out_features=32, bias=True)
        (dropout): Dropout(p=0, inplace=False)
        (linear2): Linear(in_features=32, out_features=32, bias=True)
        (norm1): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0, inplace=False)
        (dropout2): Dropout(p=0, inplace=False)
      )
    )


In [29]:
opt = Optimizer(testmodel, Hamiltonians, point_of_interest=point_of_interest)

## Following .forward


In [48]:
H = Hamiltonians[7]
print(H.system_size)

tensor([16])


In [49]:
spins = H.basis

In [50]:
spins

tensor([[0, 0, 0,  ..., 1, 1, 1],
        [0, 0, 0,  ..., 1, 1, 1],
        [0, 0, 0,  ..., 1, 1, 1],
        ...,
        [0, 0, 0,  ..., 1, 1, 1],
        [0, 0, 1,  ..., 0, 1, 1],
        [0, 1, 0,  ..., 1, 0, 1]], device='cuda:0')

In [51]:
symmetry = H.symmetry
spins_reduced, phases_reduced = symmetry(spins)
print(spins_reduced.shape)

torch.Size([4, 16, 65536])


In [39]:
(spins_reduced[0] == spins).sum() - (spins.shape[0] * spins.shape[1])

tensor(0, device='cuda:0')

In [41]:
n_symm, n, batch0 = spins_reduced.shape
print(n_symm, n, batch0)

4 16 65536


In [44]:
spins_all = spins_reduced.transpose(0, 1).reshape(n, -1)
spins_all.shape

torch.Size([16, 262144])

In [47]:
(2**H.n) * 4

tensor(262144)

In [13]:
testmodel.set_param(system_size=H.system_size, param=torch.tensor([1.0], device="cuda"))

In [14]:
res = testmodel.forward(spins)
res

[tensor([[[-0.7665, -0.6248],
          [-0.7665, -0.6248],
          [-0.7665, -0.6248],
          ...,
          [-0.7665, -0.6248],
          [-0.7665, -0.6248],
          [-0.7665, -0.6248]],
 
         [[-0.6480, -0.7404],
          [-0.6480, -0.7404],
          [-0.6480, -0.7404],
          ...,
          [-0.6752, -0.7114],
          [-0.6752, -0.7114],
          [-0.6752, -0.7114]],
 
         [[-0.6914, -0.6949],
          [-0.6914, -0.6949],
          [-0.6914, -0.6949],
          ...,
          [-0.6938, -0.6925],
          [-0.6938, -0.6925],
          [-0.6938, -0.6925]],
 
         ...,
 
         [[-0.6782, -0.7083],
          [-0.6782, -0.7083],
          [-0.6782, -0.7083],
          ...,
          [-0.6175, -0.7750],
          [-0.6175, -0.7750],
          [-0.6175, -0.7750]],
 
         [[-0.6869, -0.6994],
          [-0.6869, -0.6994],
          [-0.6895, -0.6968],
          ...,
          [-0.6356, -0.7542],
          [-0.6254, -0.7658],
          [-0.6254, -0.7658

In [15]:
del res

In [16]:
del testmodel

In [17]:
import gc

gc.collect()

0

In [18]:
torch.cuda.empty_cache()

In [19]:
src = testmodel.wrap_spins(spins)

NameError: name 'testmodel' is not defined

In [None]:
testmodel.src_mask = testmodel._generate_square_subsequent_mask(src.size(0)).to(
    src.device
)
testmodel.src_mask

tensor([[0., -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf],
        [0., 0., -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf],
        [0., 0., 0., -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf],
        [0., 0., 0., 0., -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf],
        [0., 0., 0., 0., 0., -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf],
        [0., 0., 0., 0., 0., 0., -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf],
        [0., 0., 0., 0., 0., 0., 0., -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf],
        [0., 0., 0., 0., 0., 0., 0., 0., -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf],
        [0., 0., 0., 0., 0.,

In [None]:
system_size = src[
    : testmodel.n_dim, 0, testmodel.phys_dim : testmodel.phys_dim + testmodel.n_dim
].diag()
system_size

tensor([2.7726], device='cuda:0')

In [None]:
system_size = system_size.exp().round().to(torch.int64)
system_size

tensor([16], device='cuda:0')

In [None]:
import math

In [None]:
src = testmodel.encoder(src) * math.sqrt(testmodel.embedding_size)
src

tensor([[[-1.4493,  1.2511,  0.7615,  ..., -0.6516, -1.4590, -0.8698],
         [-1.4493,  1.2511,  0.7615,  ..., -0.6516, -1.4590, -0.8698],
         [-1.4493,  1.2511,  0.7615,  ..., -0.6516, -1.4590, -0.8698],
         ...,
         [-1.4493,  1.2511,  0.7615,  ..., -0.6516, -1.4590, -0.8698],
         [-1.4493,  1.2511,  0.7615,  ..., -0.6516, -1.4590, -0.8698],
         [-1.4493,  1.2511,  0.7615,  ..., -0.6516, -1.4590, -0.8698]],

        [[-0.0892, -0.2254,  0.2675,  ..., -0.2129,  0.3032, -0.1424],
         [-0.0892, -0.2254,  0.2675,  ..., -0.2129,  0.3032, -0.1424],
         [-0.0892, -0.2254,  0.2675,  ..., -0.2129,  0.3032, -0.1424],
         ...,
         [-0.0892, -0.2254,  0.2675,  ..., -0.2129,  0.3032, -0.1424],
         [-0.0892, -0.2254,  0.2675,  ..., -0.2129,  0.3032, -0.1424],
         [-0.0892, -0.2254,  0.2675,  ..., -0.2129,  0.3032, -0.1424]],

        [[ 0.1982, -0.0404, -0.3834,  ..., -0.1147, -0.5085,  0.5536],
         [ 0.1982, -0.0404, -0.3834,  ..., -0

There's a problem if GPU memory usage increases at this point; this operation should be in-place.


In [None]:
src = testmodel.pos_encoder(src)
src

tensor([[[-1.4560,  1.2795,  0.7603,  ..., -0.6402, -1.4689, -0.8660],
         [-1.4560,  1.2795,  0.7603,  ..., -0.6402, -1.4689, -0.8660],
         [-1.4560,  1.2795,  0.7603,  ..., -0.6402, -1.4689, -0.8660],
         ...,
         [-1.4560,  1.2795,  0.7603,  ..., -0.6402, -1.4689, -0.8660],
         [-1.4560,  1.2795,  0.7603,  ..., -0.6402, -1.4689, -0.8660],
         [-1.4560,  1.2795,  0.7603,  ..., -0.6402, -1.4689, -0.8660]],

        [[-0.1021, -0.2299,  0.2635,  ..., -0.2287,  0.3352, -0.1544],
         [-0.1021, -0.2299,  0.2635,  ..., -0.2287,  0.3352, -0.1544],
         [-0.1021, -0.2299,  0.2635,  ..., -0.2287,  0.3352, -0.1544],
         ...,
         [-0.1021, -0.2299,  0.2635,  ..., -0.2287,  0.3352, -0.1544],
         [-0.1021, -0.2299,  0.2635,  ..., -0.2287,  0.3352, -0.1544],
         [-0.1021, -0.2299,  0.2635,  ..., -0.2287,  0.3352, -0.1544]],

        [[ 0.1982,  0.9596, -0.3834,  ...,  0.8853, -0.5085,  1.5536],
         [ 0.1982,  0.9596, -0.3834,  ...,  0

In [None]:
output = testmodel.transformer_encoder(src, testmodel.src_mask)
output

tensor([[[ 1.3218, -0.5965, -1.0745,  ...,  0.3264, -0.4246, -1.3149],
         [ 1.3218, -0.5965, -1.0745,  ...,  0.3264, -0.4246, -1.3149],
         [ 1.3218, -0.5965, -1.0745,  ...,  0.3264, -0.4246, -1.3149],
         ...,
         [ 1.3218, -0.5965, -1.0745,  ...,  0.3264, -0.4246, -1.3149],
         [ 1.3218, -0.5965, -1.0745,  ...,  0.3264, -0.4246, -1.3149],
         [ 1.3218, -0.5965, -1.0745,  ...,  0.3264, -0.4246, -1.3149]],

        [[ 0.9145, -0.9515, -0.9850,  ..., -0.0988, -0.5072, -0.3868],
         [ 0.9145, -0.9515, -0.9850,  ..., -0.0988, -0.5072, -0.3868],
         [ 0.9145, -0.9515, -0.9850,  ..., -0.0988, -0.5072, -0.3868],
         ...,
         [ 0.9145, -0.9515, -0.9850,  ..., -0.0988, -0.5072, -0.3868],
         [ 0.9145, -0.9515, -0.9850,  ..., -0.0988, -0.5072, -0.3868],
         [ 0.9145, -0.9515, -0.9850,  ..., -0.0988, -0.5072, -0.3868]],

        [[ 0.3780, -1.1240, -1.0349,  ..., -0.2109, -0.7638,  1.0329],
         [ 0.3780, -1.1240, -1.0349,  ..., -0