In [4]:
%load_ext autoreload
%autoreload 2

In [None]:
import torch
torch.multiprocessing.set_start_method("spawn")

In [5]:
import os
from pathlib import Path

proj_path = "/home/spandan/Projects/transformer_quantum_state"
os.chdir(proj_path)

In [7]:
from hamiltonians.Ising import Ising
import torch

In [10]:
system_sizes = torch.arange(10, 15 + 1, 2).reshape(-1, 1)
Hamiltonians = [Ising(size, periodic=True, get_basis=True) for size in system_sizes]
# data_dir_path = os.path.join("TFIM_ground_states", "2024-08-02T12-12-55.238")
data_dir_path = os.path.join("TFIM_ground_states", "h_windows")

perc = (2**15 - 30000) / 2**15
batch_size_dyn = lambda n: int(2**n * (1 - perc))

for ham in Hamiltonians:
    ham.load_dataset(
        data_dir_path,
        batch_size=batch_size_dyn(ham.n),
        # samples_in_epoch=100,
        sampling_type="shuffled",
    )

Loaded dataset for system size 10 from TFIM_ground_states/h_windows/10.arrow.
(h_min, h_step, h_max) = (0.5, -1, 1.5).
Loaded dataset for system size 12 from TFIM_ground_states/h_windows/12.arrow.
(h_min, h_step, h_max) = (0.5, -1, 1.5).
Loaded dataset for system size 14 from TFIM_ground_states/h_windows/14.arrow.
(h_min, h_step, h_max) = (0.5, -1, 1.5).


In [11]:
ham.dataset

Unnamed: 0,N,h,energy,state
0,14,0.5,-14.88963,"[0.6283284965025702, -0.07985427455485967, -0...."
1,14,0.7,-15.776385,"[0.5482456406333611, -0.09937707505092618, -0...."
2,14,0.9,-17.041148,"[-0.42001585435609934, 0.1013754277719862, 0.1..."
3,14,1.1,-18.820983,"[0.24132831196358626, -0.0755480292780341, -0...."
4,14,1.3,-21.014991,"[-0.13584722067264499, 0.052360822750679555, 0..."
5,14,1.5,-23.407583,"[0.089239800697954, -0.039977658610150256, -0...."


In [12]:
import numpy as np

In [18]:
mmap_dir = "mmap_data"
basis_memmap_dir = "basis_sets"
parameter_memmap_dir = "parameters"
ground_memmap_dir = "ground_states"
for ham in Hamiltonians:

    params = ham.training_dataset.param_tensor.unsqueeze(-1).T
    ground = ham.training_dataset.ground_state_tensor
    basis = ham.basis

    print(params.shape, ground.shape, basis.shape)

    # params should be of shape (1, n_samples) (and should match
    # the number of ground state samples)
    assert params.shape == (1, ground.shape[0])

    # basis should be of shape (n, 2**n)
    assert basis.shape == (ham.n, 2 ** ham.n)

    # ground state components should match the basis size
    assert ground.shape[1] == 2 ** basis.shape[0] == 2 ** ham.n

    # basis_memmap = np.memmap(
    #     os.path.join(mmap_dir, basis_memmap_dir, f"basis_{ham.n}.npy"),
    #     dtype=np.int32,
    #     mode="w+",
    #     shape=(ham.n, 2 ** ham.n),
    # )

    # parameter_memmap = np.memmap(
    #     os.path.join(mmap_dir, parameter_memmap_dir, f"param_{ham.n}.npy"),
    #     dtype=np.float32,
    #     mode="w+",
    #     shape=(1, ground.shape[0]),
    # )

    # ground_memmap = np.memmap(
    #     os.path.join(mmap_dir, ground_memmap_dir, f"ground_{ham.n}.npy"),
    #     dtype=np.float32,
    #     mode="w+",
    #     shape=(ground.shape[0], ground.shape[1]),
    # )

    np.save(
        os.path.join(mmap_dir, basis_memmap_dir, f"basis_{ham.n}.npy"),
        basis.numpy(),
    )

    np.save(
        os.path.join(mmap_dir, parameter_memmap_dir, f"param_{ham.n}.npy"),
        params.numpy(),
    )

    np.save(
        os.path.join(mmap_dir, ground_memmap_dir, f"ground_{ham.n}.npy"),
        ground.numpy(),
    )

torch.Size([1, 6]) torch.Size([6, 1024]) torch.Size([10, 1024])
torch.Size([1, 6]) torch.Size([6, 4096]) torch.Size([12, 4096])
torch.Size([1, 6]) torch.Size([6, 16384]) torch.Size([14, 16384])


In [21]:
num_samples = ham.training_dataset.ground_state_tensor.shape[0]

In [22]:
num_samples

6

In [26]:
import json
import os

metadata = {
    "num_samples": num_samples,
    "basis_memmap_dir": basis_memmap_dir,
    "parameter_memmap_dir": parameter_memmap_dir,
    "ground_memmap_dir": ground_memmap_dir,
}

metadata_file = os.path.join(mmap_dir, "meta.json")

with open(metadata_file, "w") as f:
    json.dump(metadata, f)

In [19]:
from numpy.lib.format import open_memmap

In [23]:
basis_recovered = open_memmap(
    os.path.join(mmap_dir, basis_memmap_dir, f"basis_{ham.n}.npy"),
    mode="r",
    dtype=np.int32,
    shape=(ham.n, 2 ** ham.n),
)

In [24]:
basis_recovered

memmap([[0, 0, 0, ..., 1, 1, 1],
        [0, 0, 0, ..., 1, 1, 1],
        [0, 0, 0, ..., 1, 1, 1],
        ...,
        [0, 0, 0, ..., 1, 1, 1],
        [0, 0, 1, ..., 0, 1, 1],
        [0, 1, 0, ..., 1, 0, 1]])

In [27]:
ham.load_mmap(mmap_dir)

In [28]:
for sample in ham.training_dataset:
    print(sample)
    break

RuntimeError: Caught RuntimeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/home/spandan/anaconda3/envs/tqs2/lib/python3.12/site-packages/torch/utils/data/_utils/worker.py", line 308, in _worker_loop
    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
           ^^^^^^^^^^^^^^^^^^^^
  File "/home/spandan/anaconda3/envs/tqs2/lib/python3.12/site-packages/torch/utils/data/_utils/fetch.py", line 54, in fetch
    return self.collate_fn(data)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/home/spandan/Projects/transformer_quantum_state/datasets/ising_memmap.py", line 20, in prob_amp_collate
    b0 = torch.stack([b[0] for b in batch]).to(device="cuda")
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/spandan/anaconda3/envs/tqs2/lib/python3.12/site-packages/torch/cuda/__init__.py", line 293, in _lazy_init
    torch._C._cuda_init()
RuntimeError: CUDA error: initialization error
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.

