# Check calculation_system_relax records

This Notebook is designed for investigating results in calculation_system_relax records

#### Library imports

In [1]:
# Standard Python libraries
from __future__ import print_function
import glob
import os
from collections import OrderedDict
from copy import deepcopy

from IPython.core.display import display, HTML

# pandas.pydata.org
import pandas as pd

# http://www.numpy.org/
import numpy as np

# https://github.com/usnistgov/DataModelDict
from DataModelDict import DataModelDict as DM

# https://github.com/usnistgov/atomman
import atomman as am
import atomman.unitconvert as uc

# https://github.com/usnistgov/iprPy
import iprPy

import analysis

#### Plotting library imports

In [2]:
# https://bokeh.pydata.org/
import bokeh
from bokeh.plotting import figure, output_file, show
from bokeh.embed import components
from bokeh.resources import Resources, CDN
from bokeh.io import output_notebook
from bokeh.models import Range1d
print('bokeh version =', bokeh.__version__)
output_notebook()

bokeh version = 0.12.7


## 1. Read Calculation Data

This section reads in raw data from a database. 

## 1. Raw Data

This section reads in or generates the raw_data associated with the calculation. 

### 1.1 Initialize database

- __dbasename__ is used here to predefine different dbase settings
- __dbase__ is the iprPy.Database object to use for accessing a database

In [4]:
dbasename = 'test2'

# 'local' is a local directory
if   dbasename == 'local':
    dbase = iprPy.Database('local',   host='C:\Users\lmh1\Documents\calculations\ipr\library')

# 'test' is a local directory for testing 
elif dbasename == 'test':
    dbase = iprPy.Database('local',   host='C:\Users\lmh1\Documents\calculations\ipr\library_test')

# 'test2' is a local directory for testing 
elif dbasename == 'test2':
    dbase = iprPy.Database('local',   host='C:\\Users\\lmh1\\Documents\\calculations\\ipr\\test2')
    
# 'curator' is a local MDCS curator
elif dbasename == 'curator':
    dbase = iprPy.Database('curator', host='http://127.0.0.1:8000/', 
                                      user='admin', 
                                      pswd='admin')

# 'iprhub' is the remote MDCS curator at iprhub
elif dbasename == 'iprhub':
    dbase = iprPy.Database('curator', host='https://iprhub.nist.gov/', 
                                      user='lmh1',
                                      pswd='C:/users/lmh1/documents/iprhub/iprhub_password.txt',
                                      cert='C:/users/lmh1/documents/iprhub/iprhub-ca.pem')
else:
    raise ValueError('unknown dbasename ' + dbasename)

### 1.2 Access records

In [5]:
proto_df = dbase.get_records_df(style='crystal_prototype')
print(str(len(proto_df)) + ' prototype records loaded')

19 prototype records loaded


In [6]:
pot_df = dbase.get_records_df(style='potential_LAMMPS')
print(str(len(pot_df)) + ' potential records loaded')

166 potential records loaded


In [7]:
raw_df = dbase.get_records_df(style='calculation_system_relax')
print(str(len(raw_df)) + ' calculation records loaded')

  value[np.isclose(value/value.max(), 0.0, atol=1e-9)] = 0.0
  value[np.isclose(value/value.max(), 0.0, atol=1e-9)] = 0.0


1200 calculation records loaded


### 1.3 Check errors

In [8]:
if 'error' in raw_df:
    for error in np.unique(raw_df[pd.notnull(raw_df.error)].error):
        print(error)
        print()

Traceback (most recent call last):
  File "calc_LAMMPS_ELASTIC.py", line 450, in <module>
    main(*sys.argv[1:])
  File "calc_LAMMPS_ELASTIC.py", line 58, in main
    pressure_unit = input_dict['pressure_unit'])
  File "calc_LAMMPS_ELASTIC.py", line 170, in lammps_ELASTIC_refine
    dmax=dmax, pressure_unit=pressure_unit)
  File "calc_LAMMPS_ELASTIC.py", line 333, in lammps_ELASTIC
    output = lmp.run(lammps_command, 'in.elastic', mpi_command)
  File "c:\users\lmh1\documents\python-packages\atomman\atomman\lammpsun.py", line 98, in run
    raise ValueError('Invalid LAMMPS input: 
%s' % lines[-2])
ValueError: Invalid LAMMPS input: 
Last command: read_restart restart.equil


Traceback (most recent call last):
  File "calc_LAMMPS_ELASTIC.py", line 450, in <module>
    main(*sys.argv[1:])
  File "calc_LAMMPS_ELASTIC.py", line 58, in main
    pressure_unit = input_dict['pressure_unit'])
  File "calc_LAMMPS_ELASTIC.py", line 218, in lammps_ELASTIC_refine
    raise Runti

## 2. Process Data

This section processes and refines the data.

### 2.1 Identify composition

We need to identify the composition of each calculation so that we can collect duplicates and filter out artificial compounds.

- __counts__ is a dictionary counting the number of times each atype appears in a crystal prototype's unit cell (i.e. the number of symmetry equivalent sites)

In [9]:
counts = {}
for i, prototype in proto_df.iterrows():
    model = DM(dbase.get_record(name=prototype.id, style='crystal_prototype').content)
    counts[prototype.id] = np.unique(model.finds('component'), return_counts=True)[1]

- __comp_refine()__ takes a list of symbols and count of how many times each symbol appears in a structure and generates a composition string.__comp_refine__ takes a list of symbols and count of how many times each symbol appears in a structure and generates a composition string.

In [10]:
def comp_refine(symbols, counts):
    """Takes a list of symbols and count of how many times each symbol appears and generates a composition string."""
    primes = [2,3,5,7,11,13,17,19,23,29,31,37,41,43,47]
    
    sym_dict = {}
    for i in xrange(len(symbols)):
        sym_dict[symbols[i]] = counts[i]
    
    for prime in primes:
        if max(sym_dict.values()) < prime:
            break
        
        while True:
            breaktime = False
            for value in sym_dict.values():
                if value % prime != 0:
                    breaktime = True
                    break
            if breaktime:
                break
            for key in sym_dict:
                sym_dict[key] /= prime
    
    composition=''
    for key in sorted(sym_dict):
        if sym_dict[key] > 0:
            composition += key
            if sym_dict[key] != 1:
                composition += str(sym_dict[key])
            
    return composition       

In [11]:
compositions = []
for i, calc in raw_df.iterrows():
    compositions.append(comp_refine(calc.symbols, counts[calc.family]))
raw_df = raw_df.assign(composition=compositions)

### 2.2 Identify current ipr potentials 

In [12]:
# Extract versionstyle and versionnumber from potential implementation ids
versionstyle = []
versionnumber = []
for name in pot_df['id'].values:
    version = name.split('--')[-1]
    try:
        versionnumber.append(int(version[-1]))
    except:
        versionnumber.append(np.nan)
        versionstyle.append(version)
    else:
        versionstyle.append(version[:-1])

pot_df['versionstyle'] = versionstyle
pot_df['versionnumber'] = versionnumber

# Loop through unique potential id's
includeid = []
for pot_id in np.unique(pot_df.pot_id.values):
    check_df = pot_df[pot_df.pot_id == pot_id]
    check_df = check_df[check_df.versionstyle == 'ipr']
    check_df = check_df[check_df.versionnumber == check_df.versionnumber.max()]
    if len(check_df) == 1:
        includeid.append(check_df['id'].values[0])
    elif len(check_df) > 1:
        raise ValueError('Bad currentIPR check for '+pot_id)

# Identify current IPR potentials
raw_df['currentIPR'] = raw_df.potential_LAMMPS_id.isin(includeid)

### 2.3 Identify crystals that have relaxed to a different crystal family

In [13]:
tol = 1e-5

In [14]:
crystal_families = {}
for proto_id, crystal_family in zip(proto_df['id'], proto_df.crystal_family):
    crystal_families[proto_id] = crystal_family
    
crystal_families = {
    #elemental
    'A1--Cu--fcc':                'cubic',
    'A2--W--bcc':                 'cubic',
    'A3--Mg--hcp':                'hexagonal',
    'A3\'--alpha-La--double-hcp': 'hexagonal',
    'A4--C--dc':                  'cubic',
    'A5--beta-Sn':                'tetragonal',
    'A6--In--bct':                'tetragonal',
    'A7--alpha-As':               'hexagonal',
    'A15--beta-W':                'cubic',
    'Ah--alpha-Po--sc':           'cubic',
   #1:1
    'B1--NaCl--rock-salt':        'cubic',
    'B2--CsCl':                   'cubic',
    'B3--ZnS--cubic-zinc-blende': 'cubic',
    'L1_0--AuCu':                 'tetragonal',
   #1:2
    'C1--CaF2--fluorite':         'cubic',
   #1:3
    'A15--Cr3Si':                 'cubic',
    'D0_3--BiF3':                 'cubic',
    'L1_2--AuCu3':                'cubic',
   #1:1:2
    'L2_1--AlCu2Mn--heusler':     'cubic'
}
np.unique(crystal_families.values())

array(['cubic', 'hexagonal', 'tetragonal'], 
      dtype='|S10')

In [15]:
hasrelaxed = np.empty(len(raw_df), dtype=bool)
for i, row in enumerate(raw_df.itertuples()):
    
    # Extract data for comparison
    a = row.final_a
    b = row.final_b
    c = row.final_c
    family = row.family
    crystal_family = crystal_families[family]
    
    # Evaluate based on crystal_family
    if crystal_family == 'cubic':
        if np.isclose(b/a, 1.0, atol=tol, rtol=0.0) and np.isclose(c/a, 1.0, atol=tol, rtol=0.0):
            hasrelaxed[i] = False
        else:
            hasrelaxed[i] = True
 
    elif crystal_family == 'hexagonal':
        if np.isclose(b/a, 3.**0.5, atol=tol, rtol=0.0):
            hasrelaxed[i] = False
        else:
            hasrelaxed[i] = True
            
    elif crystal_family == 'tetragonal':
        if np.isclose(b/a, 1.0, atol=tol, rtol=0.0) and not np.isclose(c/a, 1.0, atol=tol, rtol=0.0):
            hasrelaxed[i] = False
        else:
            hasrelaxed[i] = True

    # Evaluate based on family
    if family == 'A6--In--bct':
        if np.isclose(c/a, 2**(0.5), atol=tol, rtol=0.0):
            hasrelaxed[i] = True
    elif family == 'L1_0--AuCu':
        if np.isclose(c/a, 2**(0.5)/2, atol=tol, rtol=0.0):
            hasrelaxed[i] = True
            
raw_df['hasrelaxed'] = hasrelaxed

## 3. Investigate important structures 

Look at the wanted data

In [21]:
raw_df.keys()

Index([u'C', u'E_cohesive', u'LAMMPS_version', u'calc_key', u'calc_script',
       u'error', u'family', u'final_a', u'final_b', u'final_c', u'initial_a',
       u'initial_b', u'initial_c', u'iprPy_version', u'load_file',
       u'load_options', u'load_style', u'potential_LAMMPS_id',
       u'potential_LAMMPS_key', u'potential_id', u'potential_key',
       u'pressure_xx', u'pressure_yy', u'pressure_zz', u'sizemults', u'status',
       u'strainrange', u'symbols', u'temperature', u'composition',
       u'currentIPR', u'hasrelaxed'],
      dtype='object')

In [35]:
fcc_df = raw_df[(raw_df.family=='A1--Cu--fcc') & (raw_df.hasrelaxed==False)].reset_index()
bcc_df = raw_df[(raw_df.family=='A2--W--bcc') & (raw_df.hasrelaxed==False)].reset_index()
print(len(fcc_df), 'fcc records')
print(len(bcc_df), 'bcc records')

57 fcc records
52 bcc records


In [51]:
def add_cubic_properties(df):
    df['a (Angstrom)'] = (df.final_a + df.final_b + df.final_c) / 3
    c11s = []
    c12s = []
    c44s = []
    for i, row in df.iterrows():
        cij = row.C.Cij
        c11 = (cij[0,0] + cij[1,1] + cij[2,2]) / 3
        c12 = (cij[0,1] + cij[0,2] + cij[1,2]) / 3
        c44 = (cij[3,3] + cij[4,4] + cij[5,5]) / 3

        c11s.append(uc.get_in_units(c11, 'eV/Angstrom^3'))
        c12s.append(uc.get_in_units(c12, 'eV/Angstrom^3'))
        c44s.append(uc.get_in_units(c44, 'eV/Angstrom^3'))
    df['C11 (eV/Angstrom^3)'] = c11s
    df['C12 (eV/Angstrom^3)'] = c12s
    df['C44 (eV/Angstrom^3)'] = c44s
    

In [52]:
add_cubic_properties(fcc_df)
add_cubic_properties(bcc_df)

In [54]:
showkeys = ['potential_LAMMPS_id', 'composition', 'calc_script', 
            'E_cohesive', 'a (Angstrom)', 'C11 (eV/Angstrom^3)', 'C12 (eV/Angstrom^3)', 'C44 (eV/Angstrom^3)']
sortkeys = ['potential_LAMMPS_id', 'composition', 'calc_script']

In [49]:
fcc_df[showkeys].sort_values(sortkeys).to_csv('fcc.csv', index=False)

In [55]:
bcc_df[showkeys].sort_values(sortkeys).to_csv('bcc.csv', index=False)