## A demo using Hydrogen Hamiltonian with GPT-QE.

In [1]:
import torch
from torch.utils.data import Dataset
from torch.utils.data.dataloader import DataLoader
from gqe.mingpt.utils import set_seed

set_seed(31)

In [2]:
from qwrapper.operator import PauliObservable
from gqe.mingpt.cost import EnergyCost
from qswift.compiler import DefaultOperatorPool
from benchmark.molecule import DiatomicMolecularHamiltonian
from gqe.operator_pool.uccsd import UCCSD, generate_molecule
from gqe.common.initializer import HFStateInitializer
from gqe.mingpt.callback import DefaultCallback, PrintMonitor, FileMonitor

#molecule = generate_molecule("H", "H", 0.7414, "sto-3g")
molecule = generate_molecule("H", "H", 1.5, "sto-3g", bravyi_kitaev=False)
nqubit = 4

# prepare Hamiltonian
hamiltonian = DiatomicMolecularHamiltonian(nqubit, molecule, bravyi_kitaev=False)

# prepare operator_pool
uccsd = UCCSD(nqubit, molecule)
paulis = uccsd.paulis
paulis.append(PauliObservable("IIII"))
print('paulis', paulis)
num_operators = len(paulis)
initializer = HFStateInitializer(n_electrons=2)
pool = DefaultOperatorPool(paulis)
cost = EnergyCost(hamiltonian, initializer, pool,
                  [0.00625, -0.00625, 0.0125, -0.0125, 0.025, -0.025, 0.05,
                   -0.05, 0.1, -0.1])
print(hamiltonian.exact_value(initializer.init_circuit(4, [], "qulacs")))

converged SCF energy = -0.910873554594387
Starting to parse FermionOperator using 4 qubits...

Operator t:  -0.49178577730353756 [] +
-0.0573839840149255 [X0 X1 Y2 Y3] +
0.0573839840149255 [X0 Y1 Y2 X3] +
0.0573839840149255 [Y0 X1 X2 Y3] +
-0.0573839840149255 [Y0 Y1 X2 X3] +
0.09345649667701589 [Z0] +
0.13817584576560335 [Z0 Z1] +
0.08253705488832763 [Z0 Z2] +
0.13992103890325314 [Z0 Z3] +
0.09345649667701589 [Z1] +
0.13992103890325314 [Z1 Z2] +
0.08253705488832763 [Z1 Z3] +
-0.03564481621009516 [Z2] +
0.1458551903009311 [Z2 Z3] +
-0.035644816210095145 [Z3]
Term, coeff:  () -0.49178577730353756
Term, coeff:  ((0, 'Z'),) 0.09345649667701589
Index, p_char:  0 Z
Term, coeff:  ((1, 'Z'),) 0.09345649667701589
Index, p_char:  1 Z
Term, coeff:  ((2, 'Z'),) -0.03564481621009516
Index, p_char:  2 Z
Term, coeff:  ((3, 'Z'),) -0.035644816210095145
Index, p_char:  3 Z
Term, coeff:  ((0, 'Z'), (1, 'Z')) 0.13817584576560335
Index, p_char:  0 Z
Index, p_char:  1 Z
Term, coeff:  ((0, 'Y'), (1, 'X'), (

## FCI energy by diagonalization

In [3]:
from qwrapper.hamiltonian import compute_ground_state

print(compute_ground_state(hamiltonian))

-0.9981493534714105


## Setup for GPT

In [4]:
# create a GPT instance
from gqe.mingpt.model import GPT

model_config = GPT.get_default_config()
model_config.model_type = 'gpt2'
model_config.vocab_size = cost.vocab_size()
model_config.block_size = cost.vocab_size()
model_config.n_gates = 30  # The number of gates for each circuit
model_config.temperature = 5  # Each gate is generated with probability exp(-temperature * logit)
model_config.embd_pdrop = 0
model_config.resid_pdrop = 0
model_config.attn_pdrop = 0
model = GPT(model_config, cost)

number of parameters: 85.29M


In [5]:
# create a Trainer object
from gqe.mingpt.trainer import Trainer

train_config = Trainer.get_default_config()
train_config.learning_rate = 5e-7  # the model we're using is so small that we can go a bit faster
train_config.max_iters = 2
train_config.num_workers = 0
train_config.n_samples = 5
trainer = Trainer(train_config, model)

running on device cpu


In [6]:
file_monitor = FileMonitor()
callback_generator = DefaultCallback(model, monitors=[PrintMonitor(), file_monitor])

trainer.set_callback('on_batch_end', callback_generator.generate())
trainer.run()
torch.save(model.state_dict(), '../saved_models/gptqe_test_2')
file_monitor.save('../output/test_batch.json')

iter_dt 0.00s; iter 0: train loss 0.82493 temperature: 5
mean_logits tensor([-1.1689, -1.2591, -1.1869, -1.1736, -1.1192], grad_fn=<MulBackward0>)
energies: tensor([-0.8855, -0.9064, -0.9053, -0.9339, -0.6590])
mean: tensor(-0.8580)
iter_dt 1695232464.32s; iter 1: train loss 0.14702 temperature: 5.05
mean_logits tensor([-1.0082, -1.0967, -0.8968, -0.9323, -1.0482], grad_fn=<MulBackward0>)
energies: tensor([-0.7904, -0.9416, -0.8972, -0.7697, -0.9206])
mean: tensor(-0.8639)


In [7]:
indices, logits = model.generate(torch.tensor([[0]]), model_config.n_gates)
print(cost.energy(indices))

tensor([-0.8898])


In [8]:
model.temperature = 20
#model.load_state_dict(torch.load('../saved_models/gptqe_test_2'))
model.load_state_dict(torch.load('../saved_models/gpt2_model_h2_sto3g_1.5_30_3047.json'))
indices, logits = model.generate(torch.zeros(10, 1, dtype=torch.int), model_config.n_gates)
cost.sequence.tool = "qiskit"
index = torch.argmin(cost.energy(indices)).item()

RuntimeError: Error(s) in loading state_dict for GPT:
	size mismatch for transformer.wte.weight: copying a param with shape torch.Size([144, 768]) from checkpoint, the shape in current model is torch.Size([150, 768]).
	size mismatch for transformer.wpe.weight: copying a param with shape torch.Size([144, 768]) from checkpoint, the shape in current model is torch.Size([150, 768]).
	size mismatch for transformer.h.0.attn.bias: copying a param with shape torch.Size([1, 1, 144, 144]) from checkpoint, the shape in current model is torch.Size([1, 1, 150, 150]).
	size mismatch for transformer.h.1.attn.bias: copying a param with shape torch.Size([1, 1, 144, 144]) from checkpoint, the shape in current model is torch.Size([1, 1, 150, 150]).
	size mismatch for transformer.h.2.attn.bias: copying a param with shape torch.Size([1, 1, 144, 144]) from checkpoint, the shape in current model is torch.Size([1, 1, 150, 150]).
	size mismatch for transformer.h.3.attn.bias: copying a param with shape torch.Size([1, 1, 144, 144]) from checkpoint, the shape in current model is torch.Size([1, 1, 150, 150]).
	size mismatch for transformer.h.4.attn.bias: copying a param with shape torch.Size([1, 1, 144, 144]) from checkpoint, the shape in current model is torch.Size([1, 1, 150, 150]).
	size mismatch for transformer.h.5.attn.bias: copying a param with shape torch.Size([1, 1, 144, 144]) from checkpoint, the shape in current model is torch.Size([1, 1, 150, 150]).
	size mismatch for transformer.h.6.attn.bias: copying a param with shape torch.Size([1, 1, 144, 144]) from checkpoint, the shape in current model is torch.Size([1, 1, 150, 150]).
	size mismatch for transformer.h.7.attn.bias: copying a param with shape torch.Size([1, 1, 144, 144]) from checkpoint, the shape in current model is torch.Size([1, 1, 150, 150]).
	size mismatch for transformer.h.8.attn.bias: copying a param with shape torch.Size([1, 1, 144, 144]) from checkpoint, the shape in current model is torch.Size([1, 1, 150, 150]).
	size mismatch for transformer.h.9.attn.bias: copying a param with shape torch.Size([1, 1, 144, 144]) from checkpoint, the shape in current model is torch.Size([1, 1, 150, 150]).
	size mismatch for transformer.h.10.attn.bias: copying a param with shape torch.Size([1, 1, 144, 144]) from checkpoint, the shape in current model is torch.Size([1, 1, 150, 150]).
	size mismatch for transformer.h.11.attn.bias: copying a param with shape torch.Size([1, 1, 144, 144]) from checkpoint, the shape in current model is torch.Size([1, 1, 150, 150]).
	size mismatch for lm_head.weight: copying a param with shape torch.Size([144, 768]) from checkpoint, the shape in current model is torch.Size([150, 768]).

In [None]:
target = indices.numpy()[index]
print(cost.energy(torch.tensor([target])))
print(target)
cost.sequence._get_circuit(target).qc.draw(output="mpl", plot_barriers=True)