## Setup

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os

project_path = "/Users/spandan/Projects/transformer_quantum_state/"
os.chdir(project_path)

In [3]:
import torch
import numpy as np
import os
from hamiltonians.Ising import Ising

from model.model_batched import TransformerModel
from optimizers.optimizer_supervised_batches import Optimizer
from torch.utils.tensorboard import SummaryWriter
import pickle
from cuda_setup import cuda_setup
from optimizers.bookkeeping_tools import generate_monitor_dict

In [4]:
cuda_setup()
torch.set_default_dtype(torch.float32)

GPU unavailable; using CPU


## Defining Hamiltonians and Datasets

In [5]:
# The name of the dataset to create in TFIM_ground_states
dataset_dir_name = "h_small"

In [6]:
!julia --threads 28 ./createdata/create_datasets.jl {dataset_dir_name}

]0;Julia]0;Julia[32m[1m   Resolving[22m[39m package versions...
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.10/Project.toml`
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.10/Manifest.toml`
[?25l[?25h[2K[?25h[32m[1m   Resolving[22m[39m package versions...
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.10/Project.toml`
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.10/Manifest.toml`
[?25l[?25h[2K[?25h[32m[1m   Resolving[22m[39m package versions...
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.10/Project.toml`
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.10/Manifest.toml`
[?25l[?25h[2K[?25h[32m[1m   Resolving[22m[39m package versions...
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.10/Project.toml`
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.10/Manifest.toml`
[?25l[?25h[2K[?25h[32m[1m   Resolving[22m[39m package versions...
[32

In [7]:
system_sizes = torch.arange(4, 4 + 1, 1).reshape(-1, 1)
Hamiltonians = [Ising(size, periodic=True, get_basis=True) for size in system_sizes]
data_dir_path = os.path.join("TFIM_ground_states", dataset_dir_name)

  return func(*args, **kwargs)
  return func(*args, **kwargs)


In [8]:
for ham in Hamiltonians:
    ham.load_dataset(
        data_dir_path,
        # batch_size=batch_from_n(ham.n),
        batch_size=1,
    )

Loaded dataset for system size 4 from /Users/spandan/Projects/transformer_quantum_state/TFIM_ground_states/h_small/4.arrow.
(h_min, h_step, h_max) = (0.5, -1, 1.5).


  return func(*args, **kwargs)


## Defining the Model

In [9]:
param_dim = Hamiltonians[0].param_dim
embedding_size = 32
n_head = 8
n_hid = embedding_size
n_layers = 8
dropout = 0
minibatch = 10000
param_range = None
point_of_interest = None
use_SR = False

compat_dict = {
    "system_sizes": system_sizes,
    "param_range": None,
}

model = TransformerModel(
    n_dim=1,
    param_dim=param_dim,
    embedding_size=embedding_size,
    n_head=n_head,
    n_hid=n_hid,
    n_layers=n_layers,
    possible_spin_vals=2,
    compat_dict=compat_dict,
    dropout_encoding=dropout,
    dropout_transformer=dropout,
    minibatch=minibatch,
).to(device="cpu")

results_dir = "results"
paper_checkpoint_name = "ckpt_100000_Ising_32_8_8_0.ckpt"
paper_checkpoint_path = os.path.join(results_dir, paper_checkpoint_name)
checkpoint = torch.load(paper_checkpoint_path, map_location="cpu")
model.load_state_dict(checkpoint)



<All keys matched successfully>

## Defining the Optimizer and Generalization Monitoring

In [10]:
# Recommended lr range: 1e-9 to 1e-2
opt = Optimizer(
    model,
    Hamiltonians,
    lr=1e-7,
    beta1=0.9,
    beta2=0.98,
    point_of_interest=point_of_interest,
)

In [11]:
monitor_sizes = torch.tensor([15, 40]).reshape(-1, 1)
monitor_params = torch.tensor([0.6, 1.0, 1.4]).unsqueeze(0)
print("Sizes to monitor:\n", monitor_sizes)
print("Params to monitor:\n", monitor_params)

Sizes to monitor:
 tensor([[15],
        [40]])
Params to monitor:
 tensor([[0.6000, 1.0000, 1.4000]])


In [12]:
monitor_dict = generate_monitor_dict(
    monitor_sizes=monitor_sizes, monitor_params=monitor_params, epochs_anticipated=30000
)

System size keys: [[15], [40]]
Param keys: [[0.6], [1.0], [1.4]]


  return func(*args, **kwargs)


### DMRG Energies

In [13]:
def get_dmrg_energies(h_values):
    drmg40path = os.path.join("results", "E_dmrg_40.npy")
    dmrg40 = np.load(drmg40path)
    dmrg40 = torch.tensor(dmrg40, dtype=torch.float32)

    dmrg40_h_values = torch.linspace(0, 2, 101)
    energies = {}
    for h in h_values:
        energy = dmrg40[torch.where(torch.isclose(dmrg40_h_values, torch.tensor(h)))]
        energies[h] = energy.item()
    return energies


dmrg_energies = get_dmrg_energies([0.6, 1.0, 1.4])

dmrg_energies

{0.6: -42.87900161743164, 1.0: -50.569435119628906, 1.4: -63.206722259521484}

In [14]:
monitor_dict["[40]"]["params"]["[1.0]"]

{'param': tensor([1.]),
 'energy': None,
 'epoch_errors': [],
 'epoch_relative_errors': [],
 'epoch_E_mean': [],
 'epoch_E_var': [],
 'epoch_Er': [],
 'epoch_Ei': [],
 'epoch_averaged_errors': tensor([0., 0., 0.,  ..., 0., 0., 0.]),
 'epoch_averaged_relative_errors': tensor([0., 0., 0.,  ..., 0., 0., 0.]),
 'epoch_averaged_E_mean': tensor([0., 0., 0.,  ..., 0., 0., 0.]),
 'epoch_averaged_E_var': tensor([0., 0., 0.,  ..., 0., 0., 0.]),
 'epoch_averaged_Er': tensor([0., 0., 0.,  ..., 0., 0., 0.]),
 'epoch_averaged_Ei': tensor([0., 0., 0.,  ..., 0., 0., 0.])}

In [15]:
monitor_dict["[40]"]["params"]["[0.6]"]["energy"] = dmrg_energies[0.6]
monitor_dict["[40]"]["params"]["[1.0]"]["energy"] = dmrg_energies[1.0]
monitor_dict["[40]"]["params"]["[1.4]"]["energy"] = dmrg_energies[1.4]

In [16]:
monitor_dict["[40]"]["params"]["[0.6]"]

{'param': tensor([0.6000]),
 'energy': -42.87900161743164,
 'epoch_errors': [],
 'epoch_relative_errors': [],
 'epoch_E_mean': [],
 'epoch_E_var': [],
 'epoch_Er': [],
 'epoch_Ei': [],
 'epoch_averaged_errors': tensor([0., 0., 0.,  ..., 0., 0., 0.]),
 'epoch_averaged_relative_errors': tensor([0., 0., 0.,  ..., 0., 0., 0.]),
 'epoch_averaged_E_mean': tensor([0., 0., 0.,  ..., 0., 0., 0.]),
 'epoch_averaged_E_var': tensor([0., 0., 0.,  ..., 0., 0., 0.]),
 'epoch_averaged_Er': tensor([0., 0., 0.,  ..., 0., 0., 0.]),
 'epoch_averaged_Ei': tensor([0., 0., 0.,  ..., 0., 0., 0.])}

In [17]:
monitor_dict["[40]"]["params"]["[1.0]"]

{'param': tensor([1.]),
 'energy': -50.569435119628906,
 'epoch_errors': [],
 'epoch_relative_errors': [],
 'epoch_E_mean': [],
 'epoch_E_var': [],
 'epoch_Er': [],
 'epoch_Ei': [],
 'epoch_averaged_errors': tensor([0., 0., 0.,  ..., 0., 0., 0.]),
 'epoch_averaged_relative_errors': tensor([0., 0., 0.,  ..., 0., 0., 0.]),
 'epoch_averaged_E_mean': tensor([0., 0., 0.,  ..., 0., 0., 0.]),
 'epoch_averaged_E_var': tensor([0., 0., 0.,  ..., 0., 0., 0.]),
 'epoch_averaged_Er': tensor([0., 0., 0.,  ..., 0., 0., 0.]),
 'epoch_averaged_Ei': tensor([0., 0., 0.,  ..., 0., 0., 0.])}

In [18]:
monitor_dict["[40]"]["params"]["[1.4]"]

{'param': tensor([1.4000]),
 'energy': -63.206722259521484,
 'epoch_errors': [],
 'epoch_relative_errors': [],
 'epoch_E_mean': [],
 'epoch_E_var': [],
 'epoch_Er': [],
 'epoch_Ei': [],
 'epoch_averaged_errors': tensor([0., 0., 0.,  ..., 0., 0., 0.]),
 'epoch_averaged_relative_errors': tensor([0., 0., 0.,  ..., 0., 0., 0.]),
 'epoch_averaged_E_mean': tensor([0., 0., 0.,  ..., 0., 0., 0.]),
 'epoch_averaged_E_var': tensor([0., 0., 0.,  ..., 0., 0., 0.]),
 'epoch_averaged_Er': tensor([0., 0., 0.,  ..., 0., 0., 0.]),
 'epoch_averaged_Ei': tensor([0., 0., 0.,  ..., 0., 0., 0.])}

### Brute-Force Energies

In [19]:
ham_15 = Ising(torch.tensor([15]), periodic=True, get_basis=False)
energy_15_0_6 = ham_15.calc_E_ground(param=0.6)
energy_15_1_0 = ham_15.calc_E_ground(param=1.0)
energy_15_1_4 = ham_15.calc_E_ground(param=1.4)

In [20]:
monitor_dict["[15]"]["params"]["[0.6]"]["energy"] = energy_15_0_6
monitor_dict["[15]"]["params"]["[1.0]"]["energy"] = energy_15_1_0
monitor_dict["[15]"]["params"]["[1.4]"]["energy"] = energy_15_1_4

In [21]:
monitor_dict["[15]"]["params"]["[0.6]"]

{'param': tensor([0.6000]),
 'energy': -16.383636410876687,
 'epoch_errors': [],
 'epoch_relative_errors': [],
 'epoch_E_mean': [],
 'epoch_E_var': [],
 'epoch_Er': [],
 'epoch_Ei': [],
 'epoch_averaged_errors': tensor([0., 0., 0.,  ..., 0., 0., 0.]),
 'epoch_averaged_relative_errors': tensor([0., 0., 0.,  ..., 0., 0., 0.]),
 'epoch_averaged_E_mean': tensor([0., 0., 0.,  ..., 0., 0., 0.]),
 'epoch_averaged_E_var': tensor([0., 0., 0.,  ..., 0., 0., 0.]),
 'epoch_averaged_Er': tensor([0., 0., 0.,  ..., 0., 0., 0.]),
 'epoch_averaged_Ei': tensor([0., 0., 0.,  ..., 0., 0., 0.])}

In [22]:
monitor_dict["[15]"].keys()

dict_keys(['system_size', 'H', 'params'])

In [23]:
monitor_dict.keys()

dict_keys(['[15]', '[40]'])

In [24]:
monitor_dict["[15]"]["params"]["[1.0]"]

{'param': tensor([1.]),
 'energy': -19.133544467011305,
 'epoch_errors': [],
 'epoch_relative_errors': [],
 'epoch_E_mean': [],
 'epoch_E_var': [],
 'epoch_Er': [],
 'epoch_Ei': [],
 'epoch_averaged_errors': tensor([0., 0., 0.,  ..., 0., 0., 0.]),
 'epoch_averaged_relative_errors': tensor([0., 0., 0.,  ..., 0., 0., 0.]),
 'epoch_averaged_E_mean': tensor([0., 0., 0.,  ..., 0., 0., 0.]),
 'epoch_averaged_E_var': tensor([0., 0., 0.,  ..., 0., 0., 0.]),
 'epoch_averaged_Er': tensor([0., 0., 0.,  ..., 0., 0., 0.]),
 'epoch_averaged_Ei': tensor([0., 0., 0.,  ..., 0., 0., 0.])}

In [25]:
monitor_dict["[15]"]["params"]["[1.4]"]

{'param': tensor([1.4000]),
 'energy': -23.778807936182726,
 'epoch_errors': [],
 'epoch_relative_errors': [],
 'epoch_E_mean': [],
 'epoch_E_var': [],
 'epoch_Er': [],
 'epoch_Ei': [],
 'epoch_averaged_errors': tensor([0., 0., 0.,  ..., 0., 0., 0.]),
 'epoch_averaged_relative_errors': tensor([0., 0., 0.,  ..., 0., 0., 0.]),
 'epoch_averaged_E_mean': tensor([0., 0., 0.,  ..., 0., 0., 0.]),
 'epoch_averaged_E_var': tensor([0., 0., 0.,  ..., 0., 0., 0.]),
 'epoch_averaged_Er': tensor([0., 0., 0.,  ..., 0., 0., 0.]),
 'epoch_averaged_Ei': tensor([0., 0., 0.,  ..., 0., 0., 0.])}

In [26]:
opt.train(
    epochs=30000,
    monitor_dict=monitor_dict,
    log_tensorboard=True,
    prob_weight=10**6,
    arg_weight=0.5,
)

Use tensorboard --logdir supervised_results/Ising_32_8_8_0_supervised_2024_08_09_07_59_26_185309/tensorboard_logs for monitoring. Pass --bind-all if training remotely.
Starting epoch 0
	N = [4]
		Iter 0 - h ∈ [0.6, 0.6]
		Iter 1 - h ∈ [0.6, 0.6]
		Iter 2 - h ∈ [0.6, 0.6]
		Iter 3 - h ∈ [0.6, 0.6]
		Iter 4 - h ∈ [0.6, 0.6]
		Iter 5 - h ∈ [0.6, 0.6]
		Iter 6 - h ∈ [0.6, 0.6]
		Iter 7 - h ∈ [0.6, 0.6]
		Iter 8 - h ∈ [0.6, 0.6]
		Iter 9 - h ∈ [0.6, 0.6]
		Iter 10 - h ∈ [0.6, 0.6]
		Iter 11 - h ∈ [0.6, 0.6]
		Iter 12 - h ∈ [0.6, 0.6]
		Iter 13 - h ∈ [0.6, 0.6]
		Iter 14 - h ∈ [0.6, 0.6]
		Iter 15 - h ∈ [0.6, 0.6]
Starting epoch 1
	N = [4]
		Iter 0 - h ∈ [0.6, 0.6]
		Iter 1 - h ∈ [0.6, 0.6]
		Iter 2 - h ∈ [0.6, 0.6]
		Iter 3 - h ∈ [0.6, 0.6]
		Iter 4 - h ∈ [0.6, 0.6]
		Iter 5 - h ∈ [0.6, 0.6]
		Iter 6 - h ∈ [0.6, 0.6]
		Iter 7 - h ∈ [0.6, 0.6]
		Iter 8 - h ∈ [0.6, 0.6]
		Iter 9 - h ∈ [0.6, 0.6]
		Iter 10 - h ∈ [0.6, 0.6]
		Iter 11 - h ∈ [0.6, 0.6]
		Iter 12 - h ∈ [0.6, 0.6]
		Iter 13 - h ∈ [

  """


KeyboardInterrupt: 