Read data from completed DFT calculations back into CASM.

# Create working directory

In [1]:
import copy
import shutil
import os

In [2]:
shutil.copytree('input', 'SiGe')
os.chdir('SiGe')

# Set up project

In [3]:
import json
import libcasm.composition as casmcomp
import libcasm.configuration as casmconfig
import libcasm.mapping.info as mapinfo
import libcasm.mapping.methods as mapmethods
import libcasm.xtal as xtal

Read prim

In [4]:
with open('prim.json', 'r') as f:
    prim = xtal.Prim.from_dict(json.load(f))

config_prim = casmconfig.Prim(prim)

Read enumerated configurations from JSON

In [5]:
with open('config_list.json', 'r') as f:
    config_data = json.load(f)

Create a config list from the enumerated configurations

In [6]:
supercell_set = casmconfig.SupercellSet(prim=config_prim)
config_list = [
    casmconfig.Configuration.from_dict(
        data=d,
        supercells=supercell_set,
    ) for d in config_data
]
config_set = casmconfig.ConfigurationSet()
for config in config_list:
    config_set.add(config)

# Read DFT data

The `training_data` folder contains the results of DFT calculations. Each directory labeled `SCELX_X_X_X_X_X_X/Y` corresponds to a configuration. The unrelaxed structure can be found in the file `POS` and the DFT data, including the relaxed structure, can be found in the file `calctype.default/properties.calc.json`.

In [7]:
from pathlib import Path

In [8]:
training_data = Path('training_data')
training_data = [i for i in training_data.rglob('SCEL*/[0-9]*/')]

We will now read the training data into configurations with properties. For each folder in `training_data`, we will find a match with a configuration in `config_set` by reading the unrelaxed structure from `POS`. Then, we will use `properties.calc.json` to get the DFT energy and calculate the deformation from structural relaxation. Note that the training data does not contain calculations for every enumerated configuration in the `config_list`. Also, the name of each configuration (e.g. `SCEL1_1_1_1_0_0_0/1`) is not a unique identifier -- the "fingerprint" of a configuration is its occupation string. For simplicity, we ignore configurations which are not primitive. Results are written to a file called `config_props.json`.

This routine maps two structures assuming that the second (child) structure was produced directly from the first (parent) structure, for example via relaxation with VASP.

In [14]:
def map_relaxed_structure(ideal_structure, relaxed_structure_with_properties):
    """Given the ideal structure and relaxed structure, map and report mapping cost.

    Parameters
    ----------
    ideal_structure_path: str
        Path to the ideal structure in VASP POSCAR format.
    relaxed_structure_with_properties_path: str

    Returns
    -------
    structure_mapping: mapinfo.StructureMapping
    """
    # the ideal structure must be converted to CASM `xtal.Prim` format for some mapping steps.
    ideal_prim = xtal.Prim.from_poscar_str(ideal_structure.to_poscar_str())

    # map lattices, assuming that the relaxed structure is relaxed directly from the ideal structure in VASP
    lattice_mapping = mapmethods.map_lattices_without_reorientation(
        ideal_structure.lattice(), relaxed_structure_with_properties.lattice()
    )

    # now map the atoms using the lattice mapping which was found -- there should only be one atom mapping
    atom_mapping = mapmethods.map_atoms(
        ideal_prim, relaxed_structure_with_properties,
        lattice_mapping=lattice_mapping,
        prim_factor_group=xtal.make_factor_group(ideal_structure)
    )
    assert len(atom_mapping) == 1
    atom_mapping = atom_mapping[0]

    structure_mapping = mapinfo.StructureMapping(
        ideal_prim, lattice_mapping, atom_mapping
    )

    # temporarily forward a scored structure mapping so that atom_cost is accessible: does not work
    scored_structure_mapping = mapinfo.ScoredStructureMapping(
        structure_mapping=structure_mapping,
        lattice_cost=-1, atom_cost=atom_mapping.atom_cost(), total_cost=-1
    )    
    return scored_structure_mapping

Now we map all structures in training data, assuming that the structures are all primitive.

In [15]:
config_props = []

for directory in training_data:
    # get the ideal structure and the relaxed structure
    ideal_poscar = directory/'POS'
    relaxed_structure_with_properties_json = directory/'calctype.default/properties.calc.json'
    if not ideal_poscar.exists() or not relaxed_structure_with_properties_json.exists(): continue
    
    ideal_structure = xtal.Structure.from_poscar(ideal_poscar.as_posix())
    with open(relaxed_structure_with_properties_json, 'r') as f:
        props = json.load(f)
        relaxed_structure_with_properties = xtal.Structure.from_dict(props)

    ###### enforce primitive (will not be needed later)

    # this does not work! coordinate mode seems to be wrong
    # primitive_structure = xtal.make_primitive_structure(ideal_structure)
    # if not primitive_structure.is_equivalent_to(ideal_structure): continue
    _config = casmconfig.Configuration.from_structure(config_prim, ideal_structure)
    if not casmconfig.is_primitive_configuration(_config): continue
    ###################################################

    # perform mapping and generate configuration with properties
    structure_mapping = map_relaxed_structure(ideal_structure, relaxed_structure_with_properties)

    mapped_structure = mapmethods.make_mapped_structure(
        structure_mapping, relaxed_structure_with_properties
    )

    configuration_with_properties = casmconfig.ConfigurationWithProperties.from_structure(
        config_prim, mapped_structure
    )

    config_props.append({
        'configuration_with_properties': configuration_with_properties,
        'structure_mapping': structure_mapping,
        'path': directory,
    })

Using the initial `config_set` from CASM enumeration, we can assign names to the configurations.

In [16]:
calculated_config = config_props[0]['configuration_with_properties'].configuration
enumerated_config = config_set.get(calculated_config)
print(enumerated_config.configuration_name)

SCEL4_4_1_1_0_0_2/7


In [17]:
for index, value in enumerate(config_props):
    calculated_config = value['configuration_with_properties'].configuration
    enumerated_config = config_set.get(calculated_config)
    configuration_name = enumerated_config.configuration_name
    config_props[index]['configuration_name'] = configuration_name

We can also calculate compositions and formation energies. The calculator will return the fraction of each species in the configuration. We will order the species as [Si, Ge] so that querying the first index as $x$ will give Si$_{1-x}$Ge$_x$.

In [18]:
components = ['Si', 'Ge'] # the order to report the composition in
allowed_occs = [['Si', 'Ge']] # the allowed species on each site of the prim
composition_calculator = casmcomp.CompositionCalculator(
    components=components,
    allowed_occs=allowed_occs
)

calculated_config = config_props[0]['configuration_with_properties'].configuration
occupation = calculated_config.occupation
species_frac = composition_calculator.species_frac(occupation)
x = species_frac[0]
print(f'atom fraction Si = {x}')

atom fraction Si = 0.375


In [19]:
for index, value in enumerate(config_props):
    calculated_config = value['configuration_with_properties'].configuration
    occupation = calculated_config.occupation
    species_frac = composition_calculator.species_frac(occupation)
    x = species_frac[0]
    config_props[index]['atom_fraction_Si'] = x

We can get information about the child's lattice deformation from the structure mapping, specifically the lattice deformation gradient. A simple volume change has a cost of 0.

In [20]:
calculated_config = config_props[0]['configuration_with_properties'].configuration
structure_mapping = config_props[0]['structure_mapping']
isotropic_cost = mapinfo.lattice_isotropic_strain_cost(structure_mapping.lattice_mapping().deformation_gradient())
symmetry_breaking_cost = mapinfo.lattice_symmetry_breaking_strain_cost(
    structure_mapping.lattice_mapping().deformation_gradient(),
    xtal.make_point_group(calculated_config.supercell.superlattice)
)

print(f'isotropic cost: {round(isotropic_cost, 5)}')
print(f'symmetry-breaking cost: {round(symmetry_breaking_cost, 5)}')

isotropic cost: 0.0
symmetry-breaking cost: 0.0


In [21]:
for index, value in enumerate(config_props):
    calculated_config = value['configuration_with_properties'].configuration
    structure_mapping = value['structure_mapping']
    isotropic_cost = mapinfo.lattice_isotropic_strain_cost(structure_mapping.lattice_mapping().deformation_gradient())
    symmetry_breaking_cost = mapinfo.lattice_symmetry_breaking_strain_cost(
        structure_mapping.lattice_mapping().deformation_gradient(),
        xtal.make_point_group(calculated_config.supercell.superlattice)
    )
    config_props[index]['lattice_isotropic_strain_cost'] = isotropic_cost
    config_props[index]['lattice_symmetry_breaking_strain_cost'] = symmetry_breaking_cost

We can calculate high-symmetry strain componenents from the deformation gradient as well. By default, the mapping info will contain Ustrain (the right stretch tensor) values for $E_{xx}$, $E_{yy}$, etc. Let's get $e_1$ through $e_6$ for Hencky strain.

In [22]:
# choose a symmetry-adapted strain basis
basis = xtal.make_symmetry_adapted_strain_basis()
# choose Hencky strain
hencky_strain_converter = xtal.StrainConverter(
    metric='Hstrain',
    basis=basis
)

# get the Hencky strain in the symmetry-adapted strain basis
structure_mapping = config_props[0]['structure_mapping']
deformation_gradient = structure_mapping.lattice_mapping().deformation_gradient()
symmetry_adapted_strain = hencky_strain_converter.from_F(deformation_gradient)
for i in range(0, 6):
    print(f'e_{i+1}: {round(symmetry_adapted_strain[i], 5)}')

e_1: 0.00959
e_2: -0.0
e_3: 0.00016
e_4: -9e-05
e_5: 9e-05
e_6: 0.00061


In [23]:
for index, value in enumerate(config_props):
    structure_mapping = value['structure_mapping']
    deformation_gradient = structure_mapping.lattice_mapping().deformation_gradient()
    symmetry_adapted_strain = hencky_strain_converter.from_F(deformation_gradient)
    config_props[index]['symmetry_adapted_hencky_strain'] = symmetry_adapted_strain

Finally, we write the data to a JSON file for further processing.

In [24]:
# with open('config_props.json', 'w') as f:
#     json.dump(config_props, f)

# Plotting

In [25]:
import bokeh.plotting as plt
from bokeh.io import output_notebook
output_notebook()

The first component of the symmetry-adapted Hencky strain, $e_1$, corresponds to volume change.

In [26]:
e1 = [i['symmetry_adapted_hencky_strain'][0] for i in config_props]
atom_fraction_Si = [i['atom_fraction_Si'] for i in config_props]
p = plt.figure(title='e_1 strain', x_axis_label='x_Si', y_axis_label='e_1')
p.scatter(atom_fraction_Si, e1)
plt.show(p)

We can get further information about the deformation during relaxation by also including atomic deformation cost.

In [None]:
# atom_cost = [i['structure_mapping'].atom_mapping().atom_cost() for i in config_props]
# atom_fraction_Si = [i['atom_fraction_Si'] for i in config_props]
# p = plt.figure(title='e_1 strain', x_axis_label='x_Si', y_axis_label='e_1')
# p.scatter(atom_fraction_Si, e1)
# plt.show(p)

We can compare the different lattice cost metrics.

In [37]:
lattice_isotropic_strain_cost = [i['lattice_isotropic_strain_cost'] for i in config_props]
lattice_symmetry_breaking_strain_cost = [i['lattice_symmetry_breaking_strain_cost'] for i in config_props]
p = plt.figure(title='lattice cost', x_axis_label='x_Si', y_axis_label='cost')
p.scatter(atom_fraction_Si, lattice_isotropic_strain_cost, color='blue', legend_label='isotropic')
p.scatter(atom_fraction_Si, lattice_symmetry_breaking_strain_cost, color='orange', legend_label='symmetry_breaking')
plt.show(p)