# Prepare Defects

This Notebook is used to prepare the surface_energy, point_defect_formation, stacking_fault_multi, and dislocation_monopole calculation. These are being prepared from a Notebook for the following reasons

- The prepare functions are being called directly to save time.  All the defect calculations take calculation_system_relax records as parent records.  By using the prepare functions, the list of calculation_system_relax records only has to be built once as opposed to once per defect calculation. 
- Calculations are only being prepared for the preferred crystal structure(s) of elements, i.e. only fcc defects for Au and only bcc defects for W. This requires calling each prepare function multiple times, but reduces the total number of calculations that need to be prepared. 

## 1. Import Python packages

In [1]:
#Standard library imports
from __future__ import print_function, division
import os
import sys
import glob
import uuid
import shutil

#http://www.numpy.org/
import numpy as np

#http://pandas.pydata.org/
import pandas as pd

from DataModelDict import DataModelDict as DM

#https://github.com/usnistgov/atomman
import atomman as am
import atomman.lammps as lmp
import atomman.unitconvert as uc

#https://github.com/usnistgov/iprPy
import iprPy

## 2. Input parameters

- **database** (*str*) - The name of the database to interact with (defined [here](#databases)).

- **run_terms** (*dict*) - Defines the commands and directories to use for each number of processors on which this computer can run.
    
    - **lammps_command** (*str*) -The LAMMPS executable to use.
    - **mpi_command** (*str*) - The MPI command to call when LAMMPS is executed.
    - **run_directory** (*str*) - The run_directory to prepare the calculations in (defined with databases below).

In [2]:
database = 'iprhub'

run_terms = {}

# Define serial run terms
run_terms[1] = {}
run_terms[1]['lammps_command'] = 'lmp_mpi'
run_terms[1]['mpi_command'] = ''

# Define 2 core run terms
run_terms[2] = {}
run_terms[2]['lammps_command'] = 'lmp_mpi'
run_terms[2]['mpi_command'] = 'mpiexec -localonly 2'

# Define 3 core run terms
run_terms[3] = {}
run_terms[3]['lammps_command'] = 'lmp_mpi'
run_terms[3]['mpi_command'] = 'mpiexec -localonly 3'

# Define 4 core run terms
run_terms[4] = {}
run_terms[4]['lammps_command'] = 'lmp_mpi'
run_terms[4]['mpi_command'] = 'mpiexec -localonly 4'

### Define databases and run directories <a id="databases"></a>

These are specific to the computer this Notebook is running on and the databases that are being accessed.

In [3]:
# 'local' is a local directory
if database == 'local':
    dbase = iprPy.Database('local', 
                           host='C:\\Users\\lmh1\\Documents\\calculations\\ipr\\library')
    
    run_terms[1]['run_directory'] = 'C:\\Users\\lmh1\\Documents\\calculations\\ipr\\torun\\local\\1'
    run_terms[2]['run_directory'] = 'C:\\Users\\lmh1\\Documents\\calculations\\ipr\\torun\\local\\2'
    run_terms[3]['run_directory'] = 'C:\\Users\\lmh1\\Documents\\calculations\\ipr\\torun\\local\\3'
    run_terms[4]['run_directory'] = 'C:\\Users\\lmh1\\Documents\\calculations\\ipr\\torun\\local\\4'

# 'test' is a local directory for testing
elif database == 'test':
    dbase = iprPy.Database('local', 
                           host='C:\\Users\\lmh1\\Documents\\calculations\\ipr\\library_test')
    
    run_terms[1]['run_directory'] = 'C:\\Users\\lmh1\\Documents\\calculations\\ipr\\torun\\test\\1'
    run_terms[2]['run_directory'] = 'C:\\Users\\lmh1\\Documents\\calculations\\ipr\\torun\\test\\2'
    run_terms[3]['run_directory'] = 'C:\\Users\\lmh1\\Documents\\calculations\\ipr\\torun\\test\\3'
    run_terms[4]['run_directory'] = 'C:\\Users\\lmh1\\Documents\\calculations\\ipr\\torun\\test\\4'

# 'curator' is a local MDCS curator
elif database == 'curator':
    dbase = iprPy.Database('curator', 
                           host='https://iprhub.nist.gov/', 
                           user='admin', pswd='admin')
    
    run_terms[1]['run_directory'] = 'C:\\Users\\lmh1\\Documents\\calculations\\ipr\\torun\\curator\\1'
    run_terms[2]['run_directory'] = 'C:\\Users\\lmh1\\Documents\\calculations\\ipr\\torun\\curator\\2'
    run_terms[3]['run_directory'] = 'C:\\Users\\lmh1\\Documents\\calculations\\ipr\\torun\\curator\\3'
    run_terms[4]['run_directory'] = 'C:\\Users\\lmh1\\Documents\\calculations\\ipr\\torun\\curator\\4'

# 'iprhub' is the remote MDCS curator at iprhub
elif database == 'iprhub':
    dbase = iprPy.Database('curator', 
                           host='https://iprhub.nist.gov/', 
                           user='lmh1', 
                           pswd='C:/users/lmh1/documents/iprhub/iprhub_password.txt',
                           cert='C:/users/lmh1/documents/iprhub/iprhub-ca.pem')
    
    run_terms[1]['run_directory'] = 'C:\\Users\\lmh1\\Documents\\calculations\\ipr\\torun\\iprhub\\1'
    run_terms[2]['run_directory'] = 'C:\\Users\\lmh1\\Documents\\calculations\\ipr\\torun\\iprhub\\2'
    run_terms[3]['run_directory'] = 'C:\\Users\\lmh1\\Documents\\calculations\\ipr\\torun\\iprhub\\3'
    run_terms[4]['run_directory'] = 'C:\\Users\\lmh1\\Documents\\calculations\\ipr\\torun\\iprhub\\4'

## 3. Setup

### Sort element symbols by primary crystal prototype(s)

In [4]:
symbols = {}
symbols['A1--Cu--fcc'] = ['Ne', 'Al', 'AlS', 'Ar', 'Ca', 'Fe', 'FeS', 'Ni', 'Cu', 'CuS', 
                          'Kr', 'Sr', 'Rh', 'Pd', 'Ag', 'Xe', 'Ir', 'Pt', 'Au', 'Pb', 
                          'Ac', 'Ce', 'Yb', 'Th']
symbols['A2--W--bcc'] = ['Li', 'Na', 'K', 'V', 'Cr', 'Fe', 'FeS', 'Rb', 'Nb', 'Mo', 
                         'Cs', 'Ba', 'Ta', 'W', 'Eu']
symbols['A3--Mg--hcp'] = ['H', 'He', 'Be', 'Mg', 'MgS', 'Sc', 'Ti', 'Co', 'Zn', 'Y', 
                          'Zr', 'Tc', 'Ru', 'Cd', 'Hf', 'Re', 'Os' 'Tl', 'Gd', 'Tb', 
                          'Dy', 'Ho', 'Er', 'Tm', 'Lu']
symbols['A4--C--dc'] = ['C', 'Si', 'SiS', 'Ge', 'Sn']

### Build dataframes of records for each primary crystal prototype

In [5]:
calculation_system_relax_df = dbase.get_records_df(style='calculation_system_relax', full=False, flat=True)
print(len(calculation_system_relax_df), 'total calculation_system_relax records')

17532 total calculation_system_relax records


In [6]:
fcc_df = calculation_system_relax_df[calculation_system_relax_df.family=='A1--Cu--fcc']
fcc_df = fcc_df[fcc_df.symbols.isin(symbols['A1--Cu--fcc'])]

fcc_records = dbase.get_records(name=fcc_df.calc_key.tolist())
print(len(fcc_records), 'primary fcc records loaded')

320 primary fcc records loaded


In [7]:
bcc_df = calculation_system_relax_df[calculation_system_relax_df.family=='A2--W--bcc']
bcc_df = bcc_df[bcc_df.symbols.isin(symbols['A2--W--bcc'])]

bcc_records = dbase.get_records(name=bcc_df.calc_key.tolist())
print(len(bcc_records), 'primary bcc records loaded')

128 primary bcc records loaded


In [8]:
hcp_df = calculation_system_relax_df[calculation_system_relax_df.family=='A3--Mg--hcp']
hcp_df = hcp_df[hcp_df.symbols.isin(symbols['A3--Mg--hcp'])]

hcp_records = dbase.get_records(name=hcp_df.calc_key.tolist())
print(len(hcp_records), 'primary hcp records loaded')

104 primary hcp records loaded


In [9]:
dc_df = calculation_system_relax_df[calculation_system_relax_df.family=='A4--C--dc']
dc_df = dc_df[dc_df.symbols.isin(symbols['A4--C--dc'])]

dc_records = dbase.get_records(name=dc_df.calc_key.tolist())
print(len(dc_records), 'primary dc records loaded')

6 primary dc records loaded


In [10]:
primary_records = fcc_records + bcc_records + hcp_records + dc_records
print(len(primary_records), 'primary records loaded')

558 primary records loaded


## 4. surface_energy

### Define calculation-specific parameters

- **n** (*int*) - The number of processors to use for this style of calculation.
- **sizemults** (*str*) - Space-delimited string of size multipliers to use for the systems.

In [11]:
n = 1
sizemults = '5 5 10'

In [12]:
run_directory = run_terms[n]['run_directory']

input_dict = {}
input_dict['calculation_style'] = 'surface_energy'
input_dict['lammps_command'] = run_terms[n]['lammps_command']
input_dict['mpi_command'] = run_terms[n]['mpi_command']
input_dict['sizemults'] = sizemults
input_dict['parent_records'] = primary_records

Call prepare function

In [13]:
iprPy.highthroughput.prepare(dbase, run_directory, input_dict=input_dict)

  value[np.isclose(value/value.max(), 0.0, atol=1e-9)] = 0.0
  value[np.isclose(value/value.max(), 0.0, atol=1e-9)] = 0.0


## 5. stacking_fault_multi

### Define calculation-specific parameters

- **n** (*int*) - The number of processors to use for this style of calculation.
- **sizemults** (*str*) - Space-delimited string of size multipliers to use for the systems.
- **numshifts1** (*int*) - Number of steps to measure along the first shift vector.
- **numshifts2** (*int*) - Number of steps to measure along the second shift vector.

In [17]:
n = 1
sizemults = '5 5 10'
numshifts1 = 51 
numshifts2 = 51

In [18]:
run_directory = run_terms[n]['run_directory']

input_dict = {}
input_dict['calculation_style'] = 'stacking_fault_multi'
input_dict['lammps_command'] = run_terms[n]['lammps_command']
input_dict['mpi_command'] = run_terms[n]['mpi_command']
input_dict['sizemults'] = sizemults
input_dict['stackingfault_numshifts1'] = str(numshifts1)
input_dict['stackingfault_numshifts2'] = str(numshifts2)
input_dict['parent_records'] = primary_records

Call prepare function

In [19]:
iprPy.highthroughput.prepare(dbase, run_directory, input_dict=input_dict)

## 6. point_defect_static

### Define calculation-specific parameters

- **n** (*int*) - The number of processors to use for this style of calculation.
- **sizemults** (*str*) - Space-delimited string of size multipliers to use for the systems.

In [14]:
n = 1
sizemults = '10 10 10'

In [15]:
run_directory = run_terms[n]['run_directory']

input_dict = {}
input_dict['calculation_style'] = 'point_defect_static'
input_dict['lammps_command'] = run_terms[n]['lammps_command']
input_dict['mpi_command'] = run_terms[n]['mpi_command']
input_dict['sizemults'] = sizemults
input_dict['parent_records'] = primary_records

Call prepare function

In [16]:
iprPy.highthroughput.prepare(dbase, run_directory, input_dict=input_dict)

## 7. dislocation_monopole

### Define calculation-specific parameters

- **n** (*int*) - The number of processors to use for this style of calculation.
- **sizemults** (*dict*) - Space-delimited string of size multipliers to use for each dislocation type.
- **annealtemperature** (*float*) - Temperature at which to anneal the system.

In [20]:
n = 4

sizemults = {}
sizemults['A1--Cu--fcc--110-screw'] = '-28 28 -40 40 0 2'
sizemults['A1--Cu--fcc--111-edge'] = '-49 49 -40 40 0 2'
sizemults['A2--W--bcc--110-edge'] = '-40 40 -28 28 0 2'
sizemults['A2--W--bcc--111-screw'] = '-28 28 -49 49 0 2'
sizemults['A2--W--bcc--112-edge'] = '-40 40 -49 49 0 2'

annealtemperature = 50

Call prepare

In [21]:
run_directory = run_terms[n]['run_directory']

for dislocation_name in sizemults.keys():
    input_dict = {}
    input_dict['calculation_style'] = 'dislocation_monopole'
    input_dict['lammps_command'] = run_terms[n]['lammps_command']
    input_dict['mpi_command'] = run_terms[n]['mpi_command']
    input_dict['annealtemperature'] = str(annealtemperature)
    input_dict['parent_records'] = primary_records
    input_dict['dislocation_name'] = dislocation_name
    input_dict['sizemults'] = sizemults[dislocation_name]
    
    iprPy.highthroughput.prepare(dbase, run_directory, input_dict=input_dict)