Read data from completed DFT calculations back into CASM.

# Create working directory

In [5]:
import copy
import shutil
import os

In [6]:
shutil.copytree('input', 'SiGe')
os.chdir('SiGe')

# Set up project

In [109]:
import json
import libcasm.configuration as casmconfig
import libcasm.mapping.methods as mapmethods
import libcasm.xtal as xtal

Read prim

In [8]:
with open('prim.json', 'r') as f:
    prim = xtal.Prim.from_dict(json.load(f))

config_prim = casmconfig.Prim(prim)

Read enumerated configurations from JSON

In [9]:
with open('config_list.json', 'r') as f:
    config_data = json.load(f)

Create a config list from the enumerated configurations

In [10]:
supercell_set = casmconfig.SupercellSet(prim=config_prim)
config_list = [
    casmconfig.Configuration.from_dict(
        data=d,
        supercells=supercell_set,
    ) for d in config_data
]
config_set = casmconfig.ConfigurationSet()
for config in config_list:
    config_set.add(config)

# Read DFT data

The `training_data` folder contains the results of DFT calculations. Each directory labeled `SCELX_X_X_X_X_X_X/Y` corresponds to a configuration. The unrelaxed structure can be found in the file `POS` and the DFT data, including the relaxed structure, can be found in the file `calctype.default/properties.calc.json`.

In [11]:
from pathlib import Path

In [29]:
training_data = Path('training_data')
training_data = [i for i in training_data.rglob('SCEL*/[0-9]*/')]

We will now read the training data into configurations with properties. For each folder in `training_data`, we will find a match with a configuration in `config_set` by reading the unrelaxed structure from `POS`. Then, we will use `properties.calc.json` to get the DFT energy and calculate the deformation from structural relaxation. Note that the training data does not contain calculations for every enumerated configuration in the `config_list`. Also, the name of each configuration (e.g. `SCEL1_1_1_1_0_0_0/1`) is not a unique identifier -- the "fingerprint" of a configuration is its occupation string. For simplicity, we ignore configurations which are not primitive. Results are written to a file called `config_props.json`.

In [151]:
config_props = []
for t in training_data:
    pos = t/'POS'
    properties = t/'calctype.default/properties.calc.json'
    if not pos.exists() or not properties.exists(): continue

    struc = xtal.Structure.from_poscar(pos.as_posix())
    struc_as_prim = xtal.Prim.from_poscar(pos.as_posix())
    config = casmconfig.Configuration.from_structure(config_prim, struc)
    if not casmconfig.is_primitive_configuration(config): continue

    enumerated_config = config_set.get(config)
    if not enumerated_config:
        raise ValueError(f'missing config: {t}')

    with open(properties, 'r') as f:
        props = json.load(f)
        relaxed_struc = xtal.Structure.from_dict(props)

    maps = mapmethods.map_structures(
        struc_as_prim,
        relaxed_struc,
        structure_factor_group=xtal.make_factor_group(relaxed_struc),
        max_vol=1
    )
    assert len(maps) == 1

    config_with_properties = casmconfig.ConfigurationWithProperties(
        config,
        local_properties={
            'force': props['atom_properties']['force']['value']
        },
        global_properties={
            'energy': [props['global_properties']['energy']['value']],
            'atomic_deformation_cost': [maps[0].atom_cost()],
            'lattice_deformation_cost': [maps[0].lattice_cost()],
            'total_cost': [maps[0].total_cost()], ## cannot store strings?
            #'init': [enumerated_config.configuration_name],
            #'path': properties.as_posix()
        }
    )
    config_props.append(config_with_properties.to_dict())

In [152]:
with open('config_props.json', 'w') as f:
    json.dump(config_props, f)