## A demo using Hydrogen Hamiltonian with GPT-QE.

In [3]:
import torch
from gqe.mingpt.utils import set_seed

set_seed(3407)

In [28]:
from qwrapper.operator import PauliObservable
from gqe.mingpt.cost import EnergyCost
from qswift.compiler import DefaultOperatorPool
from benchmark.molecule import DiatomicMolecularHamiltonian
from gqe.operator_pool.uccsd import UCCSD, do_generate_molecule
from gqe.common.initializer import HFStateInitializer
from gqe.util import get_device
from gqe.mingpt.callback import DefaultCallback, PrintMonitor, FileMonitor

# molecule = generate_molecule("Li", "H", 1.596, "sto-3g", bravyi_kitaev=False)
bond_length = 3.0
geometry = f"H 0.0 0.0 0.0\n" + f"H 0.0 0.0 {bond_length}\n" + f"H 0.0 0.0 {2 * bond_length}\n"
geometry += f"H 0.0 0.0 {3 * bond_length}\n" + f"H 0.0 0.0 {4 * bond_length}\n" + f"H 0.0 0.0 {5 * bond_length}\n"
molecule = do_generate_molecule(geometry, "sto-3g", bravyi_kitaev=False)
nqubit = 12

# prepare Hamiltonian
hamiltonian = DiatomicMolecularHamiltonian(nqubit, molecule, bravyi_kitaev=False)

# prepare operator_pool
uccsd = UCCSD(nqubit, molecule)
paulis = uccsd.paulis
paulis.append(PauliObservable("IIIIIIIIII"))
print('paulis', paulis)
num_operators = len(paulis)
initializer = HFStateInitializer(n_electrons=6)
pool = DefaultOperatorPool(paulis)
# time_pool = [1 / 320, -1 / 320, 1 / 160, -1 / 160, 1 / 80, -1 / 80, 1 / 40, -1 / 40, 0.05, -0.05, 0.1, -0.1, 0.2, -0.2, 0.4, -0.4, 0.8, -0.8]
time_pool = [1 / (2 ** j) for j in range(2, 12)]
time_pool.extend([-1 / (2 ** j) for j in range(2, 12)])
cost = EnergyCost(hamiltonian, initializer, pool, time_pool)


converged SCF energy = -1.97060224599968
paulis [+IIIIIXIIIIII, +IIIIIXZYIIII, +IIIIIXZZZYII, +IIIIIXZZZZZY, +IIIIIYZXIIII, +IIIIIYZZZXII, +IIIIIYZZZZZX, +IIIIXIIIIIII, +IIIIXXIIIIXY, +IIIIXXIIIIYX, +IIIIXXIIXYII, +IIIIXXIIYXII, +IIIIXXIXZZYI, +IIIIXXIYZZXI, +IIIIXXXYIIII, +IIIIXXXZZZZY, +IIIIXXYXIIII, +IIIIXXYZZZZX, +IIIIXYIIIIXX, +IIIIXYIIIIYY, +IIIIXYIIXXII, +IIIIXYIIYYII, +IIIIXYIXZZXI, +IIIIXYIYZZYI, +IIIIXYXXIIII, +IIIIXYXZZZZX, +IIIIXYYYIIII, +IIIIXYYZZZZY, +IIIIXZYIIIII, +IIIIXZZZYIII, +IIIIXZZZZZYI, +IIIIYXIIIIXX, +IIIIYXIIIIYY, +IIIIYXIIXXII, +IIIIYXIIYYII, +IIIIYXIXZZXI, +IIIIYXIYZZYI, +IIIIYXXXIIII, +IIIIYXXZZZZX, +IIIIYXYYIIII, +IIIIYXYZZZZY, +IIIIYYIIIIXY, +IIIIYYIIIIYX, +IIIIYYIIXYII, +IIIIYYIIYXII, +IIIIYYIXZZYI, +IIIIYYIYZZXI, +IIIIYYXYIIII, +IIIIYYXZZZZY, +IIIIYYYXIIII, +IIIIYYYZZZZX, +IIIIYZXIIIII, +IIIIYZZZXIII, +IIIIYZZZZZXI, +IIIXIIIIIIII, +IIIXXIIIIXYI, +IIIXXIIIIYXI, +IIIXXIIIXZZY, +IIIXXIIIYZZX, +IIIXXIIXYIII, +IIIXXIIYXIII, +IIIXXIXZZYII, +IIIXXIYZZXII, +IIIXY

## FCI energy by diagonalization

In [5]:
from qwrapper.hamiltonian import compute_ground_state

print(compute_ground_state(hamiltonian))

-2.800958899654439


In [6]:
print("hf state:", hamiltonian.exact_value(initializer.init_circuit(12, [], "qulacs")))

hf state: -1.9706022459996853


## Setup for GPT

In [24]:
# create a GPT instance
from gqe.mingpt.model import GPT

num_layers = 12


def generate_model_config():
    model_config = GPT.get_default_config()
    model_config.model_type = 'gpt2'
    model_config.vocab_size = cost.vocab_size()
    model_config.n_gates = 5  # The number of gates for each circuit
    model_config.block_size = model_config.n_gates
    model_config.temperature = 5  # Each gate is generated with probability exp(-temperature * logit)
    model_config.embd_pdrop = 0.1
    model_config.resid_pdrop = 0.1
    model_config.attn_pdrop = 0.1
    model_config.std = 0.02
    model_config.energy_offset = 1
    return model_config

In [31]:
# create a Trainer object
from gqe.mingpt.trainer import Trainer
from gqe.mingpt.layer import LayerWiseTrainer


def generate_train_config():
    train_config = Trainer.get_default_config()
    train_config.learning_rate = 5e-7  # the model we're using is so small that we can go a bit faster
    train_config.max_iters = 100
    train_config.num_workers = 10
    train_config.n_samples = 50
    return train_config


trainer = LayerWiseTrainer(generate_train_config, generate_model_config, cost, num_layers, get_device())

In [32]:
print_monitors = []
file_monitors = []
for index in range(num_layers):
    file_monitor = FileMonitor()
    file_monitors.append(file_monitor)
    trainer.set_monitors(index, [PrintMonitor(), file_monitor])
trainer.run()
#torch.save(model.state_dict(), '../saved_models/gptqe_test_2')

layer: 1 starts running
number of parameters: 91.99M
running on device mps
iter_dt 0.00s; iter 0: train loss 0.33718 temperature: 5
mean_logits tensor([-2.3561, -2.0183, -2.2393, -2.3006, -1.8072, -1.9265, -1.9937, -2.2464,
        -1.7399, -1.8959, -2.2233, -2.0834, -2.0592, -2.0900, -2.0204, -2.1247,
        -2.2222, -1.8461, -2.1762, -1.9636, -2.0597, -2.2928, -2.0009, -1.6004,
        -1.8242, -2.3260, -1.8684, -2.0300, -2.2221, -1.6224, -1.9560, -1.9384,
        -2.3869, -2.0354, -2.2541, -1.8855, -1.8910, -2.1361, -1.8264, -2.2053,
        -2.1988, -1.6047, -2.0655, -1.7232, -1.6968, -2.0199, -2.2574, -1.9825,
        -1.9537, -1.5838], device='mps:0', grad_fn=<SubBackward0>)
energies: tensor([-1.9899, -1.9821, -2.0146, -1.9794, -1.9204, -1.9234, -1.9620, -1.9925,
        -1.9596, -2.0111, -2.0243, -2.0039, -1.9827, -1.9798, -2.0014, -1.9509,
        -1.9802, -1.9388, -1.9823, -1.9701, -1.9875, -1.9791, -1.9041, -1.9818,
        -1.9138, -1.9661, -1.9658, -1.9606, -2.0398, -1.959

In [27]:
for fm in file_monitors:
    print(fm.min_energy)


-2.144927501678467
-2.4615018367767334
-2.6674141883850098
-2.6935291290283203
-2.7183966636657715
-2.7465150356292725
-2.7500452995300293
-2.7533998489379883
-2.75622296333313
-2.7588882446289062
-2.760599136352539
-2.762188196182251


In [None]:
# cost.sequence.tool = "qiskit"
# print(file_monitor.min_indices)
# cost.sequence._get_circuit(file_monitor.min_indices).qc.draw(output="mpl", plot_barriers=True)