# Running Instructions
Below instructions tested in Ubuntu 22.04.3 (Windows Subsystem for Linux 2, WSL2).  
1. Create a clean Python 3.10 virtual environment: `python -m venv env_planted_solutions`
2. Activate environment: `source env_planted_solutions/bin/activate`
3. Install Jupyter notebook: `python -m pip install notebook`
4. Run this notebook. Running in VS Code is recommended, but not necessary.

In [None]:
%reset -f 
# Install pip packages in the current Jupyter kernel (from https://jakevdp.github.io/blog/2017/12/05/installing-python-packages-from-jupyter/)
import sys
!{sys.executable} -m pip install --extra-index-url=https://block-hczhai.github.io/block2-preview/pypi/ git+https://github.com/jtcantin/dmrghandler
!{sys.executable} -m pip install openfermion tensorflow h5py pyscf pandas numpy gitpython 

# This took about 8 minutes for the first time, but only about 8 seconds after that.

In [None]:
import copy
import numpy as np
from pathlib import Path
import dmrghandler.dmrg_calc_prepare
import pyscf.tools.fcidump
import pyscf.fci
import os
import planted_solutions_functions as psf
import uuid
import gzip
import shutil
import git

In [None]:
### Parameters
# Code will loop over all files in this directory
fcidump_path = Path("fcidumps_catalysts")
fcidump_output_path = Path("generated_planted_solutions")
json_output_path = Path("generated_planted_solutions")

# Number of spatial orbitals in a block
block_size = 4

# Number of electrons per block; must be even as we assume singlet spin state
ne_per_block = 4

# Strength of balance operators that ensure the correct number of electrons in a block
balance_strength = 10.0

# +- difference in number of electrons per block
ne_range = 0

# Number of killer operators for each CAS block
n_killer = 3

# Strength of killer operators: killer_coefficient*(1+rand[0,1])
killer_coefficient = 0.01

# Orbital ration angles randomly chosen from [0, 2*pi*orbital_rotation_angle_scaling_factor]
orbital_rotation_angle_scaling_factor = 0.1 / (2 * np.pi)

# Random number generator seeds
rng_global_seed_list = [
    4580379,
    2962639,
    3270423,
    9269130,
    5222682,
    8174757,
    3618160,
    7316534,
    1148087,
    1677362,
    9221496,
    8813557,
] * 20

# Number of orbitals threshold for when to call a system utility scale
utility_scale_threshold = 40

# Save the Hamiltonian in the fcidump format
save_bool = True
compress_file = True  # gzip compression

# Run Full CI to check the ground state, use only for small systems
FCI_bool = True
fci_threshold = 15  # max number of orbitals before FCI is skipped

# Checking symmetries of the planted Hamiltonian, use only for small systems
check_symmetry = True
check_symmetry_threshold = 15  # max number of orbitals before symmetry check is skipped

# Check
check_rotational_invariance = True
check_rotational_invariance_threshold = (
    15  # max number of orbitals before rotational invariance check is skipped
)

# Save json files
save_json_bool = True

# Metadata for json files
problem_instance_count_start_number = 100
planted_solution_name_prefix = ""
problem_instance_json_schema_url = "https://raw.githubusercontent.com/isi-usc-edu/qb-gsee-benchmark/main/schemas/problem_instance.schema.0.0.1.json"
generation_code_url = "https://github.com/jtcantin/planted_solutions/blob/7ca06fd3be2449b98330c97c9f756c541ca7e63f/planted_hamiltonian_generator.ipynb"

github_repository_url = "https://github.com/jtcantin/planted_solutions"
# github_commit_sha = "7ca06fd3be2449b98330c97c9f756c541ca7e63f"
repo = git.Repo(search_parent_directories=True)
github_commit_sha = repo.head.object.hexsha


paper_reference_doi = "Not Available"

fcidump_permanent_storage_location = "sftp://sftp.l3harris.com/gsee/planted_solutions/"

contact_info = [
    {
        "name": "Joshua T. Cantin",
        "email": "joshua.cantin@utoronto.ca",
        "institution": "University of Toronto at Scarborough",
    }
]

In [None]:
fcidumps = []
e_mins = []
e_min_killers = []

In [None]:
for file_iter, fcidump_filename in enumerate(os.listdir(fcidump_path)):
    rng_global_seed = rng_global_seed_list[file_iter]
    rng_obj = np.random.default_rng(rng_global_seed)
    print("#####################################################################")
    print(f"Processing {fcidump_filename}")
    print("#####################################################################")

    # If compressed, uncompress first
    if fcidump_filename.endswith(".gz"):
        print(f"Uncompressing {fcidump_filename}")
        with gzip.open(fcidump_path / Path(fcidump_filename), "rb") as f_in:
            with open(fcidump_path / Path(fcidump_filename).stem, "wb") as f_out:
                shutil.copyfileobj(f_in, f_out)

        fcidump_filename_orig = fcidump_filename
        fcidump_filename = fcidump_filename.split(".gz")[0]
        remove_uncompressed = True
    else:
        remove_uncompressed = False

    # Obtain integrals of original Hamiltonian
    (
        one_body_tensor,
        two_body_tensor,
        nuc_rep_energy,
        num_orbitals,
        num_spin_orbitals,
        num_electrons,
        two_S,
        two_Sz,
        orb_sym,
        extra_attributes,
    ) = dmrghandler.dmrg_calc_prepare.load_tensors_from_fcidump(
        data_file_path=Path(fcidump_path) / Path(fcidump_filename),
        molpro_orbsym_convention=True,
    )
    num_blocks = num_orbitals // block_size + (num_orbitals % block_size > 0)

    # Check that all electrons will fit into the blocks,
    # if not, increase the number of electrons per block
    if num_electrons / num_blocks > ne_per_block:
        print(
            f"Number of electrons ({num_electrons}) is too large for the number of blocks ({num_blocks}) and electrons per block ({ne_per_block}). Increasing electrons per block."
        )
        while num_electrons / num_blocks > ne_per_block:
            ne_per_block += 2

        print(f"New electrons per block: {ne_per_block}")

    assert (
        num_electrons / num_blocks <= ne_per_block
    ), f"Number of electrons ({num_electrons}) is too large for the number of blocks ({num_blocks}) and electrons per block ({ne_per_block})"
    assert (
        ne_per_block % 2 == 0
    ), f"Number of electrons per block must be even, Ne per block: {ne_per_block}"
    assert (
        ne_per_block / (2 * block_size) <= 1
    ), f"Number of electrons per block must be less than or equal to the number of spin orbitals in a block, Ne per block: {ne_per_block}, Num spin orbitals per block: {2*block_size}"

    # one_body_tensor and two_body_tensor assume the following form of the Hamiltonian:
    # H = E_0 + h_ij a†_i a_j + 0.5*g_ijkl a†_i a†_k a_l a_j
    # where i,j,k,l are indices for the spatial orbitals (NOT spin orbitals)
    # The full g_ijkl tensor, not a permutation-symmetry-compressed version, is returned.
    # "C1" point group symmetry is assumed

    num_spin_orbitals = 2 * num_orbitals
    num_spatial_orbitals = num_orbitals

    print(f"Number of spin orbitals: {num_spin_orbitals}")

    # Assert that the number of electrons is even
    assert (
        num_electrons % 2 == 0
    ), f"Currently, the number of electrons must be even, Ne: {num_electrons}"

    # Assert the number of electrons per block is even
    assert (
        ne_per_block % 2 == 0
    ), f"Currently, the number of electrons per block must be even, Ne per block: {ne_per_block}"

    # Convert to chemist's notation. That is, convert the Hamiltonian to the form:
    # H = E_0 + \sum_{ij} h_{ij} a_i^† a_j + \sum_{ijkl} g_ijkl a_i^† a_j a_k^† a_l
    # where i,j,k,l are indices for the spatial orbitals (NOT spin orbitals)
    Hobt = one_body_tensor
    Htbt = two_body_tensor
    Hobt -= 0.5 * np.einsum("prrq->pq", Htbt.copy())
    Htbt *= 0.5

    H = (Hobt, Htbt)

    # Partition the spatial orbitals into blocks
    k = psf.construct_blocks(block_size, num_spatial_orbitals, spin_orb=False)
    print(f"Block Structure: {k}")

    # Obtain the number of parameters for some quantities
    # upnum: number of parameters for unitary rotations of whole space, is N(N-1) where N is the number of spatial orbitals
    # casnum: number of parameters for the CAS block, is N^4 + N^2 where N is the number of spatial orbitals in the block
    # pnum: upnum + casnum
    upnum, casnum, pnum = psf.get_param_num(num_spatial_orbitals, k, complex=False)

    # Construct the CAS block Hamiltonian
    # Only integrals within each partition are kept, all integrals coupling different partitions are discarded
    # cas_obt: one-body tensor for the CAS block
    # cas_tbt: two-body tensor for the CAS block
    # cas_x: 1D array of all integrals in the CAS block
    cas_obt, cas_tbt, cas_x = psf.get_truncated_cas_tbt(H, k, casnum)

    print("CAS block Hamiltonian constructed")

    # Add balance operators to maintain the number of electrons within each block
    # and then solve the CAS block Hamiltonian by exact diagonalization within each block
    # e_nums: number of electrons in each block
    # states: ground state in each block
    # E_cas: total ground state energy of the CAS block Hamiltonian, which is the sum of the ground state energies in each block
    # H_cas is updated to include the balance operators
    H_cas = [cas_obt, cas_tbt]
    e_nums, states, E_cas, core_energy_balance = psf.solve_enums(
        H_cas,
        k,
        num_electrons,
        ne_per_block=ne_per_block,
        ne_range=ne_range,
        balance_t=balance_strength,
        rng_obj=rng_obj,
    )

    obt_2 = copy.deepcopy(cas_obt)
    tbt_2 = copy.deepcopy(cas_tbt)

    print("Solve Enums")

    # Build the killer operators
    # These add obfuscation to the CAS block Hamiltonian
    killer_constant, killer_obt, killer_tbt = psf.construct_killer_directly(
        k,
        e_nums,
        const=killer_coefficient,
        n=num_spatial_orbitals,
        n_killer=n_killer,
        rng_obj=rng_obj,
    )

    # Generate different forms of the planted solution
    # The final version to use for a benchmark is H_killer_hidden
    # This has both killer operator obfuscation and orbital rotation obfuscation
    # U: Arbitrary Unitary Rotation, in spatial orbital basis
    # H_cas: Unhidden CAS Fragements
    # H_hidden：U H_cas U*
    # H_with_killer: H_cas + killer
    # H_killer_hidden: U H_with_killer U*
    U, H_cas, H_hidden, H_with_killer, H_killer_hidden, E_min = (
        psf.unitary_rotation_obfuscation(
            cas_obt,
            cas_tbt,
            killer_obt,
            killer_tbt,
            E_cas,
            killer_constant,
            upnum,
            num_spatial_orbitals,
            rng_obj=rng_obj,
            core_energy=core_energy_balance,
            scaling_factor=orbital_rotation_angle_scaling_factor,
        )
    )
    print(f"E_min: {E_min}")
    fcidumps.append(fcidump_filename)
    e_mins.append(E_min)
    e_min_killers.append(E_min)

    # Convert the Hamiltonians to the form used for fcidump files
    block_balanced_H_ij, block_balanced_G_ijkl = (
        psf.chem_spatial_orb_to_phys_spatial_orb(H_cas[1], H_cas[2])
    )
    block_balanced_rotated_H_ij, block_balanced_rotated_G_ijkl = (
        psf.chem_spatial_orb_to_phys_spatial_orb(H_hidden[1], H_hidden[2])
    )
    block_balanced_killer_H_ij, block_balanced_killer_G_ijkl = (
        psf.chem_spatial_orb_to_phys_spatial_orb(H_with_killer[1], H_with_killer[2])
    )
    block_balanced_killer_rotated_H_ij, block_balanced_killer_rotated_G_ijkl = (
        psf.chem_spatial_orb_to_phys_spatial_orb(H_killer_hidden[1], H_killer_hidden[2])
    )

    # Check for self-consistency of the Hamiltonians
    if (
        check_rotational_invariance
        and num_orbitals <= check_rotational_invariance_threshold
    ):
        psf.ensure_rotation_invariance(
            H_cas,
            H_hidden,
            block_balanced_H_ij,
            block_balanced_rotated_H_ij,
            block_balanced_G_ijkl,
            block_balanced_rotated_G_ijkl,
            H_with_killer,
            H_killer_hidden,
            block_balanced_killer_H_ij,
            block_balanced_killer_rotated_H_ij,
            block_balanced_killer_G_ijkl,
            block_balanced_killer_rotated_G_ijkl,
        )

    # Make dir
    fcidump_output_path = Path(fcidump_output_path)
    fcidump_output_path.mkdir(parents=True, exist_ok=True)

    num_electrons = np.sum(e_nums)
    num_alpha_electrons = int(num_electrons // 2 + num_electrons % 2)
    num_beta_electrons = int(num_electrons // 2)

    problem_instance_count_start_number += 1
    planted_solution_name = (
        planted_solution_name_prefix + f"{problem_instance_count_start_number:04d}"
    )
    uuid_string = str(uuid.uuid4())
    filename_fcidump = (
        "fcidump.planted_solution_" + planted_solution_name + "." + uuid_string
    )

    if remove_uncompressed:
        os.remove(fcidump_path / Path(fcidump_filename))

    if save_bool:

        pyscf.tools.fcidump.from_integrals(
            fcidump_output_path / Path(filename_fcidump),
            block_balanced_killer_rotated_H_ij,
            block_balanced_killer_rotated_G_ijkl,
            nmo=block_balanced_killer_rotated_H_ij.shape[0],
            nelec=num_electrons,
            nuc=H_killer_hidden[0],
            ms=0,
            orbsym=None,
            tol=1e-15,
            float_format=" %.14g",  # Less than double precision to aid reproducibility
        )
        if compress_file:
            print(f"Compressing {filename_fcidump}")
            with open(fcidump_output_path / Path(filename_fcidump), "rb") as f_in:
                with gzip.open(
                    str(fcidump_output_path / Path(filename_fcidump)) + ".gz", "wb"
                ) as f_out:
                    shutil.copyfileobj(f_in, f_out)

            filename_fcidump = filename_fcidump + ".gz"

            # # Remove the uncompressed file
            # filename_path.unlink()

    if check_symmetry:
        if num_orbitals > check_symmetry_threshold:
            print(
                f"Skipping symmetry check as the number of orbitals ({num_orbitals}) is above the threshold ({check_symmetry_threshold})"
            )
        else:

            print("Checking symmetries of the Hamiltonian")
            # Checking symmetries in chemist's notation
            print("######Checking H_cas#######")
            return_dict_H_cas = psf.check_hamiltonian(
                obt_to_check=H_cas[1], tbt_to_check=H_cas[2], spatial_orbitals=True
            )
            print("######Checking H_hidden#######")
            return_dict_H_hidden = psf.check_hamiltonian(
                obt_to_check=H_hidden[1],
                tbt_to_check=H_hidden[2],
                spatial_orbitals=True,
            )
            print("######Checking H_with_killer#######")
            return_dict_H_with_killer = psf.check_hamiltonian(
                obt_to_check=H_with_killer[1],
                tbt_to_check=H_with_killer[2],
                spatial_orbitals=True,
            )
            print("######Checking H_killer_hidden#######")
            return_dict_H_killer_hidden = psf.check_hamiltonian(
                obt_to_check=H_killer_hidden[1],
                tbt_to_check=H_killer_hidden[2],
                spatial_orbitals=True,
            )

            # Ensure that the Hamiltonians are symmetric
            assert return_dict_H_cas[
                "permutation_symmetries_real_orbitals_check_passed"
            ], "H_cas failed real orbital permutation symmetry check"
            assert return_dict_H_hidden[
                "permutation_symmetries_real_orbitals_check_passed"
            ], "H_hidden failed real orbital permutation symmetry check"
            assert return_dict_H_with_killer[
                "permutation_symmetries_real_orbitals_check_passed"
            ], "H_with_killer failed real orbital permutation symmetry check"
            assert return_dict_H_killer_hidden[
                "permutation_symmetries_real_orbitals_check_passed"
            ], "H_killer_hidden failed real orbital permutation symmetry check"

    if FCI_bool:
        # Below based on example from https://github.com/pyscf/pyscf/blob/master/examples/fci/01-given_h1e_h2e.py

        cisolver = pyscf.fci.direct_spin1.FCI()
        cisolver.max_cycle = 1000  # Max. iterations for diagonalization
        cisolver.conv_tol = 1e-13  # Convergence tolerance for diagonalization

        if num_orbitals > fci_threshold:
            print(
                f"Skipping FCI calculation as the number of orbitals ({num_orbitals}) is above the threshold ({fci_threshold})"
            )
        else:

            # Compare the number of non-zreo terms in the Hamiltonian between versions
            print("Checking the number of non-zero terms in the Hamiltonian")
            print(
                f"Number of non-zero terms in block_balanced_killer_rotated_H_ij: {np.count_nonzero(block_balanced_killer_rotated_H_ij)}"
            )
            print(
                f"Number of non-zero terms in block_balanced_killer_H_ij: {np.count_nonzero(block_balanced_killer_H_ij)}"
            )
            print(
                f"Number of non-zero terms in block_balanced_rotated_H_ij: {np.count_nonzero(block_balanced_rotated_H_ij)}"
            )
            print(
                f"Number of non-zero terms in block_balanced_H_ij: {np.count_nonzero(block_balanced_H_ij)}"
            )

            print(
                f"Number of non-zero terms in block_balanced_killer_rotated_G_ijkl: {np.count_nonzero(block_balanced_killer_rotated_G_ijkl)}"
            )
            print(
                f"Number of non-zero terms in block_balanced_killer_G_ijkl: {np.count_nonzero(block_balanced_killer_G_ijkl)}"
            )
            print(
                f"Number of non-zero terms in block_balanced_rotated_G_ijkl: {np.count_nonzero(block_balanced_rotated_G_ijkl)}"
            )
            print(
                f"Number of non-zero terms in block_balanced_G_ijkl: {np.count_nonzero(block_balanced_G_ijkl)}"
            )

            print("Starting FCI calculation")
            e_min_fci, fcivec = cisolver.kernel(
                np.array(block_balanced_killer_rotated_H_ij, dtype=np.float64),
                np.array(block_balanced_killer_rotated_G_ijkl, dtype=np.float64),
                ecore=H_killer_hidden[0],
                norb=num_orbitals,
                nelec=(num_alpha_electrons, num_beta_electrons),
            )

            # Check that the ground state energy is the same as the one obtained from the planted solution
            print(f"FCI ground state energy: {e_min_fci}")
            print(f"Planted solution ground state energy: {E_min}")
            print(f"Relative error: {(e_min_fci - E_min) / E_min}")
            print(f"Absolute error: {e_min_fci - E_min}")

            # e_min_fci, fcivec = cisolver.kernel(
            #     np.array(block_balanced_killer_H_ij, dtype=np.float64),
            #     np.array(block_balanced_killer_G_ijkl, dtype=np.float64),
            #     ecore=H_killer_hidden[0],
            #     norb=num_orbitals,
            #     nelec=(num_alpha_electrons, num_beta_electrons),
            # )

            # # Check that the ground state energy is the same as the one obtained from the planted solution
            # print(f"FCI ground state energy: {e_min_fci}")
            # print(f"Planted solution ground state energy: {E_min}")
            # print(f"Relative error: {(e_min_fci - E_min) / E_min}")
            # print(f"Absolute error: {e_min_fci - E_min}")

            # e_min_fci, fcivec = cisolver.kernel(
            #     np.array(block_balanced_rotated_H_ij, dtype=np.float64),
            #     np.array(block_balanced_rotated_G_ijkl, dtype=np.float64),
            #     ecore=H_killer_hidden[0],
            #     norb=num_orbitals,
            #     nelec=(num_alpha_electrons, num_beta_electrons),
            # )

            # # Check that the ground state energy is the same as the one obtained from the planted solution
            # print(f"FCI ground state energy: {e_min_fci}")
            # print(f"Planted solution ground state energy: {E_min}")
            # print(f"Relative error: {(e_min_fci - E_min) / E_min}")
            # print(f"Absolute error: {e_min_fci - E_min}")

            # e_min_fci, fcivec = cisolver.kernel(
            #     np.array(block_balanced_H_ij, dtype=np.float64),
            #     np.array(block_balanced_G_ijkl, dtype=np.float64),
            #     ecore=H_killer_hidden[0],
            #     norb=num_orbitals,
            #     nelec=(num_alpha_electrons, num_beta_electrons),
            # )

            # # Check that the ground state energy is the same as the one obtained from the planted solution
            # print(f"FCI ground state energy: {e_min_fci}")
            # print(f"Planted solution ground state energy: {E_min}")
            # print(f"Relative error: {(e_min_fci - E_min) / E_min}")
            # print(f"Absolute error: {e_min_fci - E_min}")

            assert np.isclose(e_min_fci, E_min)

    if save_json_bool:
        print("Saving json files")
        uuid_string_instance = str(uuid.uuid4())

        filename_json = (
            "problem_instance.planted_solution_"
            + planted_solution_name
            + "."
            + uuid_string_instance
            + ".json"
        )
        contact_info = [
            {
                "name": "Joshua T. Cantin",
                "email": "joshua.cantin@utoronto.ca",
                "institution": "University of Toronto at Scarborough",
            }
        ]

        parameter_dict = {
            "multiplicity": 1,
            "num_electrons": int(num_electrons),
            "num_orbitals": int(num_orbitals),
            "utility_scale": True if num_orbitals > utility_scale_threshold else False,
            "block_size": int(block_size),
            "ne_per_block": int(ne_per_block),
            "balance_strength": float(balance_strength),
            "ne_range": int(ne_range),
            "n_killer": int(n_killer),
            "rng_global_seed": int(rng_global_seed),
            "utility_scale_threshold": int(utility_scale_threshold),
            "github_repository_url": github_repository_url,
            "github_commit_sha": github_commit_sha,
            "paper_reference_doi": paper_reference_doi,
            "known_ground_state_energy_hartrees": float(E_min),
            "killer_coefficient": float(killer_coefficient),
            "orbital_rotation_angle_scaling_factor": float(
                orbital_rotation_angle_scaling_factor
            ),
        }
        requirements = {
            "probability_of_success": 0.99,
            "time_limit_seconds": 86400,  # 24 hrs  # 172800, #48 hrs
            "accuracy": 1.0,
            "energy_units": "millihartree",
            "energy_target": 0.99,
        }
        psf.gen_json_files(
            filename_json=Path(json_output_path) / Path(filename_json),
            uuid_string_instance=uuid_string_instance,
            uuid_string_fcidump=uuid_string,
            short_name=f"planted_solution_{planted_solution_name}",
            filename_fcidump=fcidump_output_path / Path(filename_fcidump),
            parameter_dict=parameter_dict,
            status="in_development",
            contact_info=contact_info,
            superseded_by=None,
            problem_type="GSEE",
            application_domain="QC",
            requirements=requirements,
            problem_instance_json_schema_url=problem_instance_json_schema_url,
            generation_code_url=generation_code_url,
            source_fcidump=fcidump_filename_orig,
            fcidump_permanent_storage_location=fcidump_permanent_storage_location,
        )