Read data from completed DFT calculations back into CASM.

# Create working directory

In [1]:
import copy
import shutil
import os

In [2]:
shutil.copytree('input', 'SiGe')
os.chdir('SiGe')

# Set up project

In [3]:
import json
import libcasm.composition as casmcomp
import libcasm.configuration as casmconfig
import libcasm.mapping.info as mapinfo
import libcasm.mapping.methods as mapmethods
import libcasm.xtal as xtal
import numpy as np

Read prim

In [4]:
with open('prim.json', 'r') as f:
    prim = xtal.Prim.from_dict(json.load(f))

config_prim = casmconfig.Prim(prim)

# Read DFT data

The `training_data` folder contains the results of DFT calculations. Each directory labeled `SCELX_X_X_X_X_X_X/Y` corresponds to a configuration. The unrelaxed structure can be found in the file `POS` and the DFT data, including the relaxed structure, can be found in the file `calctype.default/properties.calc.json`.

In [5]:
from pathlib import Path

In [6]:
training_data = Path('training_data')
training_data = [i for i in training_data.rglob('SCEL*/[0-9]*/')]

We will now read the training data into configurations with properties. For each folder in `training_data`, we will find a match with a configuration in `config_set` by reading the unrelaxed structure from `POS`. Then, we will use `properties.calc.json` to get the DFT energy and calculate the deformation from structural relaxation. Note that the training data does not contain calculations for every enumerated configuration in the `config_list`. Also, the name of each configuration (e.g. `SCEL1_1_1_1_0_0_0/1`) is not a unique identifier -- the "fingerprint" of a configuration is its occupation string. For simplicity, we ignore configurations which are not primitive. Results are written to a file called `config_props.json`.

This routine maps two structures assuming that the second (child) structure was produced directly from the first (parent) structure, for example via relaxation with VASP.

In [7]:
def map_relaxed_structure(ideal_prim, relaxed_structure_with_properties, ideal_transformation_matrix):
    """Given the ideal prim and relaxed structure, map and report mapping cost.

    Parameters
    ----------
    ideal_prim: xtal.Prim
    relaxed_structure_with_properties: xtal.Structure
    ideal_transformation_matrix: np.array((3,3), dtype=int)
        transformation matrix from the ideal prim to the ideal (unrelaxed) structure

    Returns
    -------
    structure_mapping: mapinfo.StructureMapping
    """
    # map lattices, assuming that the relaxed structure is relaxed directly from the ideal structure in VASP
    # the ideal structure S is related to the ideal prim P via the ideal transformation matrix T:
    # PT = S
    lattice_mapping = mapmethods.map_lattices_without_reorientation(
        lattice1=ideal_prim.lattice(),
        lattice2=relaxed_structure_with_properties.lattice(),
        transformation_matrix_to_super=ideal_transformation_matrix
    )

    # now map the atoms using the lattice mapping which was found -- there should only be one atom mapping
    atom_mapping = mapmethods.map_atoms(
        ideal_prim, relaxed_structure_with_properties,
        lattice_mapping=lattice_mapping,
        prim_factor_group=xtal.make_factor_group(ideal_prim)
    )
    assert len(atom_mapping) == 1
    atom_mapping = atom_mapping[0]

    structure_mapping = mapinfo.StructureMapping(
        ideal_prim, lattice_mapping, atom_mapping
    )

    # temporarily forward a scored structure mapping so that atom_cost is accessible: does not work
    scored_structure_mapping = mapinfo.ScoredStructureMapping(
        structure_mapping=structure_mapping,
        lattice_cost=-1, atom_cost=atom_mapping.atom_cost(), total_cost=-1
    )    
    return scored_structure_mapping

Now we map all structures in training data, assuming that the structures are all primitive.

In [8]:
config_props = []

for directory in training_data:
    # get the ideal structure and the relaxed structure
    ideal_poscar = directory/'POS'
    relaxed_structure_with_properties_json = directory/'calctype.default/properties.calc.json'
    if not ideal_poscar.exists() or not relaxed_structure_with_properties_json.exists(): continue
    
    ideal_structure = xtal.Structure.from_poscar(ideal_poscar.as_posix())
    ideal_configuration = casmconfig.Configuration.from_structure(config_prim, ideal_structure)
    with open(relaxed_structure_with_properties_json, 'r') as f:
        props = json.load(f)
        relaxed_structure_with_properties = xtal.Structure.from_dict(props)

    # perform mapping and generate configuration with properties
    ideal_transformation_matrix = ideal_configuration.supercell.transformation_matrix_to_super
    structure_mapping = map_relaxed_structure(
        ideal_prim=prim,
        relaxed_structure_with_properties=relaxed_structure_with_properties,
        ideal_transformation_matrix=ideal_transformation_matrix
    )

    mapped_structure = mapmethods.make_mapped_structure(
        structure_mapping, relaxed_structure_with_properties
    )

    configuration_with_properties = casmconfig.ConfigurationWithProperties.from_structure(
        config_prim, mapped_structure
    )

    config_props.append({
        'configuration_with_properties': configuration_with_properties,
        'structure_mapping': structure_mapping,
        'path': directory,
    })

We can calculate compositions and formation energies. The calculator will return the fraction of each species in the configuration. We will order the species as [Si, Ge] so that querying the second index gives $x$ in Si$_{1-x}$Ge$_x$.

In [9]:
components = ['Si', 'Ge'] # the order to report the composition in
allowed_occs = [['Si', 'Ge']] # the allowed species on each site of the prim
composition_calculator = casmcomp.CompositionCalculator(
    components=components,
    allowed_occs=allowed_occs
)

calculated_config = config_props[0]['configuration_with_properties'].configuration
occupation = calculated_config.occupation
species_frac = composition_calculator.species_frac(occupation)
x = species_frac[1]

print(calculated_config.to_dict())
print('--------------------------')
print(calculated_config.to_structure().to_poscar_str())
print('--------------------------')
print(f'atom fraction Ge = {x}')

{'dof': {'occ': [1, 1, 1, 0, 1, 0, 0, 1]}, 'supercell_name': 'SCEL4_4_1_1_0_0_2', 'transformation_matrix_to_supercell': [[0, 2, 0], [0, -1, 2], [1, -1, -1]]}
--------------------------
<title>
1.00000000
2.80000000 2.80000000 0.00000000
-5.60000000  2.80000000  2.80000000
 2.80000000 -2.80000000  5.60000000
Ge Si 
5 3 
Direct
0.00000000 0.00000000 0.00000000 Ge
0.75000000 0.50000000 0.25000000 Ge
0.50000000 0.00000000 0.50000000 Ge
0.56250000 0.12500000 0.18750000 Ge
0.81250000 0.62500000 0.93750000 Ge
0.25000000 0.50000000 0.75000000 Si
1.31250000 0.62500000 0.43750000 Si
1.06250000 0.12500000 0.68750000 Si


--------------------------
atom fraction Ge = 0.625


In [10]:
for index, value in enumerate(config_props):
    calculated_config = value['configuration_with_properties'].configuration
    occupation = calculated_config.occupation
    species_frac = composition_calculator.species_frac(occupation)
    x = species_frac[1]
    config_props[index]['atom_fraction_Ge'] = x

The formation energy $\Delta E$ is given by $\Delta E_f = E - n_\mathrm{Ge}E_\mathrm{Ge} - n_\mathrm{Si}E_\mathrm{Si}$ where $E$ is the DFT energy, $n_\mathrm{Ge}$ and $n_\mathrm{Si}$ are the number of atoms of Ge and Si, and $E_\mathrm{Ge}$ and $E_\mathrm{Si}$ are the reference DFT energies (per atom) of the Ge and Si endstates. We report the formation energy $\Delta e_f$ normalized per primitive cell given by $\Delta e_f = e - xE_\mathrm{Ge} - (1-x)E_\mathrm{Si}$ where $e$ is the DFT energy per primitive cell and $x$ is the atom fraction of Ge.

In [11]:
def calculate_formation_energy(energy_per_prim, x, ref_Si, ref_Ge):
    return energy_per_prim - (1 - x)*(ref_Si) - x*(ref_Ge)

We will set the reference energies to the energies of pure Si and Ge structures in the primitive cell, SCEL1_1_1_1_0_0_0/0.

In [12]:
# set the paths to the reference calculations
ref_Si_config = 'SCEL1_1_1_1_0_0_0/0'
ref_Ge_config = 'SCEL1_1_1_1_0_0_0/2'

# find the reference data in the config_props dictionary
paths = ['/'.join(i['path'].parts[-2:]) for i in config_props]
ref_Si_data = config_props[paths.index(ref_Si_config)]
ref_Ge_data = config_props[paths.index(ref_Ge_config)]
ref_Si_energy = ref_Si_data['configuration_with_properties'].scalar_global_property_value('energy')
ref_Ge_energy = ref_Ge_data['configuration_with_properties'].scalar_global_property_value('energy')

print(f'atom fraction Ge in Si reference state: {ref_Si_data["atom_fraction_Ge"]}')
print(f'atom fraction Ge in Ge reference state: {ref_Ge_data["atom_fraction_Ge"]}')
print(f'DFT reference energy in Si reference state: {round(ref_Si_energy, 5)} eV / prim')
print(f'DFT reference energy in Ge reference state: {round(ref_Ge_energy, 5)} eV / prim')

atom fraction Ge in Si reference state: 0.0
atom fraction Ge in Ge reference state: 1.0
DFT reference energy in Si reference state: -10.84965 eV / prim
DFT reference energy in Ge reference state: -9.03553 eV / prim


Now we can calculate formation energies. The energies must be normalized per primitive cell. We do this by dividing by the number of primitive cells in the supercell.

In [13]:
calculated_config = config_props[0]['configuration_with_properties'].configuration
dft_energy = config_props[0]['configuration_with_properties'].scalar_global_property_value('energy')
atom_fraction_Ge = config_props[0]['atom_fraction_Ge']
supercell_size = calculated_config.supercell.n_unitcells
normalized_dft_energy = dft_energy / supercell_size
formation_energy = calculate_formation_energy(
    energy_per_prim=normalized_dft_energy,
    x=atom_fraction_Ge,
    ref_Si=ref_Si_energy,
    ref_Ge=ref_Ge_energy
)

print(f'DFT Energy: {round(dft_energy, 5)}')
print(f'normalized by {supercell_size} supercells: {round(normalized_dft_energy, 5)}')
print(f'formation energy: {round(formation_energy, 5)} eV / primitive cell')

DFT Energy: -38.74168
normalized by 4 supercells: -9.68542
formation energy: 0.0304 eV / primitive cell


In [14]:
for index, value in enumerate(config_props):
    calculated_config = value['configuration_with_properties'].configuration
    dft_energy = value['configuration_with_properties'].scalar_global_property_value('energy')
    atom_fraction_Ge = value['atom_fraction_Ge']
    supercell_size = calculated_config.supercell.n_unitcells
    normalized_dft_energy = dft_energy / supercell_size
    formation_energy = calculate_formation_energy(
        energy_per_prim=normalized_dft_energy,
        x=atom_fraction_Ge,
        ref_Si=ref_Si_energy,
        ref_Ge=ref_Ge_energy
    )
    config_props[index]['formation_energy'] = formation_energy

We can get information about the child's lattice deformation from the structure mapping, specifically the lattice deformation gradient. A simple volume change has a cost of 0.

In [15]:
calculated_config = config_props[0]['configuration_with_properties'].configuration
structure_mapping = config_props[0]['structure_mapping']
isotropic_cost = mapinfo.lattice_isotropic_strain_cost(structure_mapping.lattice_mapping().deformation_gradient())
symmetry_breaking_cost = mapinfo.lattice_symmetry_breaking_strain_cost(
    structure_mapping.lattice_mapping().deformation_gradient(),
    xtal.make_point_group(calculated_config.supercell.superlattice)
)

print(f'isotropic cost: {round(isotropic_cost, 5)}')
print(f'symmetry-breaking cost: {round(symmetry_breaking_cost, 5)}')

isotropic cost: 0.0
symmetry-breaking cost: 0.0


In [16]:
for index, value in enumerate(config_props):
    calculated_config = value['configuration_with_properties'].configuration
    structure_mapping = value['structure_mapping']
    isotropic_cost = mapinfo.lattice_isotropic_strain_cost(structure_mapping.lattice_mapping().deformation_gradient())
    symmetry_breaking_cost = mapinfo.lattice_symmetry_breaking_strain_cost(
        structure_mapping.lattice_mapping().deformation_gradient(),
        xtal.make_point_group(calculated_config.supercell.superlattice)
    )
    config_props[index]['lattice_isotropic_strain_cost'] = isotropic_cost
    config_props[index]['lattice_symmetry_breaking_strain_cost'] = symmetry_breaking_cost

We can calculate high-symmetry strain componenents from the deformation gradient as well. By default, the mapping info will contain Ustrain (the right stretch tensor) values for $E_{xx}$, $E_{yy}$, etc. Let's get $e_1$ through $e_6$ for Hencky strain.

In [17]:
# choose a symmetry-adapted strain basis
basis = xtal.make_symmetry_adapted_strain_basis()
# choose Hencky strain
hencky_strain_converter = xtal.StrainConverter(
    metric='Hstrain',
    basis=basis
)

# get the Hencky strain in the symmetry-adapted strain basis
structure_mapping = config_props[0]['structure_mapping']
deformation_gradient = structure_mapping.lattice_mapping().deformation_gradient()
symmetry_adapted_strain = hencky_strain_converter.from_F(deformation_gradient)
for i in range(0, 6):
    print(f'e_{i+1}: {round(symmetry_adapted_strain[i], 5)}')

e_1: 0.00959
e_2: -0.0
e_3: 0.00016
e_4: -9e-05
e_5: 9e-05
e_6: 0.00061


In [18]:
for index, value in enumerate(config_props):
    structure_mapping = value['structure_mapping']
    deformation_gradient = structure_mapping.lattice_mapping().deformation_gradient()
    symmetry_adapted_strain = hencky_strain_converter.from_F(deformation_gradient)
    config_props[index]['symmetry_adapted_hencky_strain'] = symmetry_adapted_strain

Finally, we write the data to a JSON file for further processing. Many objects must be converted to dictionaries for this to work.

In [19]:
for k, v in config_props[0].items():
    print(k, type(v))

configuration_with_properties <class 'libcasm.configuration._configuration.ConfigurationWithProperties'>
structure_mapping <class 'libcasm.mapping.info._mapping_info.ScoredStructureMapping'>
path <class 'pathlib.PosixPath'>
atom_fraction_Ge <class 'numpy.float64'>
formation_energy <class 'numpy.float64'>
lattice_isotropic_strain_cost <class 'float'>
lattice_symmetry_breaking_strain_cost <class 'float'>
symmetry_adapted_hencky_strain <class 'numpy.ndarray'>


In [20]:
config_props_out = []
for value in config_props:
    config_props_out.append({
        'configuration_with_properties': value['configuration_with_properties'].to_dict(),
        'structure_mapping': value['structure_mapping'].to_dict(),
        'path': value['path'].as_posix(),
        'atom_fraction_Ge': value['atom_fraction_Ge'],
        'formation_energy': value['formation_energy'],
        'lattice_isotropic_strain_cost': value['lattice_isotropic_strain_cost'],
        'lattice_symmetry_breaking_strain_cost': value['lattice_symmetry_breaking_strain_cost'],
        'symmetry_adapted_hencky_strain': value['symmetry_adapted_hencky_strain'].tolist()
    })

In [21]:
with open('config_props.json', 'w') as f:
    json.dump(config_props_out, f)

# Plotting

In [22]:
import bokeh.plotting as plt
from bokeh.io import output_notebook
output_notebook()

We can plot the formation energies of the system with respect to the atomic fraction of Ge in order to see that we have a miscibility gap at 0 K.

In [23]:
formation_energy = [i['formation_energy'] for i in config_props]
atom_fraction_Ge = [i['atom_fraction_Ge'] for i in config_props]
p = plt.figure(title='formation energy', x_axis_label='x_Ge', y_axis_label='formation energy (eV / primitive cell)')
p.scatter(atom_fraction_Ge, formation_energy)
plt.show(p)

The first component of the symmetry-adapted Hencky strain, $e_1$, corresponds to volume change.

In [24]:
e1 = [i['symmetry_adapted_hencky_strain'][0] for i in config_props]
atom_fraction_Ge = [i['atom_fraction_Ge'] for i in config_props]
p = plt.figure(title='e_1 strain', x_axis_label='x_Ge', y_axis_label='e_1')
p.scatter(atom_fraction_Ge, e1)
plt.show(p)

We can get further information about the deformation during relaxation by also including atomic deformation cost.

In [25]:
# atom_cost = [i['structure_mapping'].atom_mapping().atom_cost() for i in config_props]
# atom_fraction_Si = [i['atom_fraction_Si'] for i in config_props]
# p = plt.figure(title='e_1 strain', x_axis_label='x_Si', y_axis_label='e_1')
# p.scatter(atom_fraction_Si, e1)
# plt.show(p)

We can compare the different lattice cost metrics.

In [26]:
lattice_isotropic_strain_cost = [i['lattice_isotropic_strain_cost'] for i in config_props]
lattice_symmetry_breaking_strain_cost = [i['lattice_symmetry_breaking_strain_cost'] for i in config_props]
p = plt.figure(title='lattice cost', x_axis_label='x_Ge', y_axis_label='cost')
p.scatter(atom_fraction_Ge, lattice_isotropic_strain_cost, color='blue', legend_label='isotropic')
p.scatter(atom_fraction_Ge, lattice_symmetry_breaking_strain_cost, color='orange', legend_label='symmetry_breaking')
plt.show(p)