# Basic Handling of QCFractal Dataset Views for Force Field Fitting

In this notebook we show examples of how the views can be interacted with. Using the provided docker image, feel free to browse the data.

## Import packages

In [None]:
from pprint import pprint

import numpy as np
from qcportal import load_dataset_view
from qcportal.serialization import encode_to_json
from openff.units import unit
from openff.toolkit import Molecule
from forcebalance.molecule import Molecule as FBMolecule

In [None]:
filename_opt_dsv = "views/OpenFF-Industry-Benchmark-Season-1-v1.2_optimization_view.sqlite"

## Optimization Dataset

In [None]:
dsv_opt = load_dataset_view(filename_opt_dsv)

all_entries = list(dsv_opt.iterate_entries())
name_to_entry = {
    entry.name: entry
    for entry in all_entries
}

In [None]:
for name, spec_name, record in dsv_opt.iterate_records():
    try:
        mapped_smiles = name_to_entry[name].attributes["canonical_isomeric_explicit_hydrogen_mapped_smiles"]
    except KeyError:
        try:
            mapped_smiles = name_to_entry[name].initial_molecule.extras["canonical_isomeric_explicit_hydrogen_mapped_smiles"]
        except KeyError:
            raise KeyError("canonical_isomeric_explicit_hydrogen_mapped_smiles")
    geometry_au = record.final_molecule.geometry

    molecule = Molecule.from_mapped_smiles(mapped_smiles, allow_undefined_stereo=True)
    molecule.add_conformer(
        np.array(geometry_au) * unit.bohr
    )
    # Files used in ForceBalance -- name uniquely for different targets
    molecule.to_file("outputs/mol.pdb", "PDB")
    molecule.to_file("outputs/mol.xyz", "XYZ")
    molecule.to_file("outputs/mol.sdf", "SDF")

    # Alternatively, using QCElemental
    xyz_str = record.final_molecule.to_string("xyz")
    open("outputs/qce_mol.xyz", "w").write(xyz_str)

    # Alternatively, using ForceBalance
    fb_molecule = FBMolecule()
    fb_molecule.Data = {
        "resname": ["UNK"] * molecule.n_atoms,
        "resid": [0] * molecule.n_atoms,
        "elem": [atom.symbol for atom in molecule.atoms],
        "bonds": [
            (bond.atom1_index, bond.atom2_index) for bond in molecule.bonds
        ],
        "name": f"{record.id}",
        "xyzs": [molecule.conformers[0].m_as(unit.angstrom)],
    }
    fb_molecule.write("outputs/mol.pdb")

## Pulling Energies / Geometries from Dataset View

In [None]:
entry_name, spec_name, record = next(dsv_opt.iterate_records())

print(f"The final energy of this record is: {record.energies[-1]}")
print(f"The final geometry of this record is:\n{record.final_molecule.geometry}")

In [None]:
# Create dictionary of output
rec_dict = encode_to_json(record)
pprint(rec_dict)