# Reference atomic structures

This Notebook contains the workflow for fetching reference atomic crystal structures from various external sites, and comparing them to the defined crystal prototypes in the iprPy library.

__WARNING:__ The functions called here access and download all matching crystal structures from the external sites.  For convenience, you should ideally limit access to only compositions of interest and/or locally copy the references between systems rather than downloading again.

__Global workflow details:__

This Notebook is optional, but if used it should be executed prior to "3. Crystal relaxation".

**Library imports**

In [1]:
# Standard Python libraries
from __future__ import (print_function, division, absolute_import,
                        unicode_literals)
import os
import glob

# http://www.numpy.org/
import numpy as np

# https://pandas.pydata.org/
import pandas as pd

# https://github.com/usnistgov/atomman
import atomman as am
import atomman.lammps as lmp

# https://github.com/usnistgov/iprPy
import iprPy
print('iprPy version', iprPy.__version__)

iprPy version 0.8.3


## 1. Build unique sets of elements from included potentials

### Load database

In [2]:
database = iprPy.load_database('master')

### Option #1: Get all elements lists from potentials

In [3]:
elements_set = set()
for potential_record in database.get_records(style='potential_LAMMPS'):
    potential = lmp.Potential(potential_record.content)
    elements = potential.elements()
    elements.sort()
    elements_set.add(' '.join(elements))

### Option #2: Only get elements lists for certain systems

In [3]:
#elements_set = set()
#elements_set.add('Fe')
#elements_set.add('Cu')
#elements_set.add('Ni')
#elements_set.add('Cu Ni')

## 2. Fetch Materials Project reference structures

### Specify personal Materials Project API key

In [4]:
mp_api_key_location = 'C:\\users\\lmh1\\Documents\\Materials Project\\API key.txt'

with open(mp_api_key_location) as f:
    mp_api_key = f.read()

### Get structures from Materials Project

In [5]:
for elements in elements_set:
    iprPy.tools.get_mp_structures(elements.split(), api_key=mp_api_key)



Added mvc-11423
Added mvc-11600
Added mvc-12404
Added mvc-12466
Added mvc-13391
Added mvc-4715




Added mvc-5171
Added mvc-11155
Added mvc-11911
Added mvc-12893
Added mvc-13345
Added mvc-14043
Added mvc-14277
Added mvc-3776
Added mvc-3780
Added mvc-4622
Added mvc-12050
Added mvc-12574
Added mvc-12901
Added mvc-13696
Added mvc-5627
0 of 646 done 0.0%
500 of 646 done 77.4%
646 of 646 done 100.0%
Added mvc-13894
Added mvc-4415
Added mp-1008280
Added mp-1010071
Added mp-1025503
Added mp-1077706
Added mp-1087479
Added mp-1226209
Added mp-1226444
Added mp-1227307
Added mp-1012640
Added mp-1096923
Added mp-1183752
Added mp-1007852
Added mp-1080603
Added mp-1084832
Added mp-1096987
Added mp-1181589
Added mp-1225016
Added mp-1225083
Added mp-18695
Added mp-2090
Added mp-601820
Added mp-601842
Added mp-601848
Added mp-641526
Added mp-984711


## 3. Fetch OQMD reference structures

### Get structures from OQMD

In [6]:
for elements in elements_set:
    try:
        iprPy.tools.get_oqmd_structures(elements.split())
    except:
        print('Failed:', elements)

Failed: Al Ni O
Failed: Al Mg
Failed: Al H Ni
Failed: C H O
Failed: Cr Fe Ni
Failed: Cd Hg S Se Te Zn
Failed: C Si
Failed: Br Cl Cs F I K Li Na Rb
Failed: Al Mg Zn
Failed: Ag Au Cu Ni Pd Pt
Failed: O Ti
Failed: meta_TWIP
Failed: Cd Te Zn
Failed: Al Co Ni
Failed: C Fe Ti
Failed: Ag H Pd
Failed: Ag O Ta
Failed: Al Cu Fe Mg Si
Failed: Cd Se Te
Failed: Ni Ti V
Failed: Cu Fe Ni
Failed: O Si
Failed: Al Mn Pd
Failed: Cu Zr
Failed: Ag Au Cu
Failed: Al Cu H
Failed: Ni Zr
Failed: Cr Fe W
Failed: Al Nb Ti
Failed: B C N


## 4. Run crystal_space_group calculations on the reference structures

In [7]:
calculation = iprPy.load_calculation('crystal_space_group')
run_directory = iprPy.load_run_directory('master_1')

In [8]:
print(calculation.allkeys)

['length_unit', 'pressure_unit', 'energy_unit', 'force_unit', 'load_file', 'load_content', 'load_style', 'family', 'load_options', 'symbols', 'box_parameters', 'symmetryprecision', 'primitivecell', 'idealcell']


### Write input script

In [9]:
input_script = """

# Build load information based on prototype records
buildcombos                 crystalprototype load_file proto

# Build load information based on reference structures
buildcombos                 atomicreference load_file ref

# Specify reference buildcombos limiters (only build for element sets listed)

# Units that input/output values are in
length_unit                 
pressure_unit               
energy_unit                 
force_unit                  

# Run parameters
symmetryprecision           
primitivecell               
idealcell                   
"""
with open('input_script.in', 'w') as f:
    f.write(input_script)

### Prepare calculations

In [10]:
with open('input_script.in') as f:
    input_dict = iprPy.input.parse(f, singularkeys=calculation.singularkeys)
    
database.prepare(run_directory, calculation, **input_dict)

In [11]:
database.check_records(calculation.record_style)

In database style local at C:\Users\lmh1\Documents\calculations\ipr\master :
- 38495 of style calculation_crystal_space_group
 - 37770 are complete
 - 725 still to run
 - 0 issued errors


### Run calculations

In [12]:
database.runner(run_directory)

Runner started with pid 7564
No simulations left to run


In [13]:
results_df = database.get_records_df(style=calculation.record_style)
error_df = results_df[results_df.status=='error']
print(len(error_df), 'calculations issued errors:')
errors = []
for error in error_df.error:
    lines = error.splitlines()
    err = ''
    for i in range(len(lines)-1, -1, -1):
        if 'Error:' in lines[i]:
            err = '\n'.join(lines[i:-1])
            break
        if i == 0:
            err = error
    errors.append(err)
for error in np.unique(errors):
    print(error)

0 calculations issued errors:


## 5. Match prototypes to references

In [14]:
crystal_match_file = 'reference_prototype_match.csv'

### Retrieve finished calculation results

In [15]:
spg_records = database.get_records_df(style='calculation_crystal_space_group', full=True, flat=False, status='finished')

### Identify compositions

In [16]:
iprPy.analysis.assign_composition(spg_records, database)

### Split all spg records into references, prototypes and calculation relaxes

In [17]:
spg_records['record_type'] = 'calc'
spg_records.loc[(spg_records.load_file == spg_records.family + '.poscar'), 'record_type'] = 'reference'
spg_records.loc[(spg_records.load_file == spg_records.family + '.json'), 'record_type'] = 'prototype'

prototype_records = spg_records[spg_records.record_type == 'prototype']
reference_records = spg_records[spg_records.record_type == 'reference']
family_records = spg_records[(spg_records.record_type == 'prototype') | (spg_records.record_type == 'reference')]

calc_records = spg_records[spg_records.record_type == 'calc'].reset_index(drop=True)

### Create crystal_match_file

In [18]:
ref_proto_match = []

for ref_record in reference_records.itertuples():
    ref_proto_dict = {}
    ref_proto_dict['reference'] = ref_record.family
    ref_proto_dict['composition'] = ref_record.composition
    ref_proto_dict['site'], ref_proto_dict['number'] = ref_record.family.split('-')
    ref_proto_dict['number'] = int(ref_proto_dict['number'])
    
    # Find matching prototypes based on spg number and wykoffs
    prototype = np.nan
    
    # A1--Cu--fcc
    if (ref_record.spacegroup_number == 225 
    and ref_record.wykoff_fingerprint in ['a', 'b']):
        prototype = 'A1--Cu--fcc'
    
    # A2--W--bcc
    elif (ref_record.spacegroup_number == 229 
      and ref_record.wykoff_fingerprint in ['a']):
        prototype = 'A2--W--bcc'
    
    # A3'--alpha-La--double-hcp
    elif (ref_record.spacegroup_number == 194
      and ref_record.wykoff_fingerprint in ['ab', 'ac', 'ad']):
        prototype = "A3'--alpha-La--double-hcp"
    
    # A3--Mg--hcp
    elif (ref_record.spacegroup_number == 194
      and ref_record.wykoff_fingerprint in ['b', 'c', 'd']):
        prototype = 'A3--Mg--hcp'
        
    # A4--C--dc
    elif (ref_record.spacegroup_number == 227
      and ref_record.wykoff_fingerprint in ['a', 'b']):
        prototype = 'A4--C--dc'
    
    # A5--beta-Sn
    elif (ref_record.spacegroup_number == 141
      and ref_record.wykoff_fingerprint in ['a', 'b']):
        prototype = 'A5--beta-Sn'
    
    # A6--In--bct
    elif (ref_record.spacegroup_number == 139
      and ref_record.wykoff_fingerprint in ['a', 'b']):
        prototype = 'A6--In--bct'
        
    # A7--alpha-As
    elif (ref_record.spacegroup_number == 166
      and ref_record.wykoff_fingerprint in ['c']):
        prototype = 'A7--alpha-As'
        
    # A15--beta-W
    elif (ref_record.spacegroup_number == 223
      and ref_record.wykoff_fingerprint in ['ac', 'ad']):
        prototype = 'A15--beta-W'
    
    # A15--Cr3Si
    elif (ref_record.spacegroup_number == 223
      and ref_record.wykoff_fingerprint in ['a c', 'c a', 'a d', 'd a']):
        prototype = 'A15--Cr3Si'
    
    # Ah--alpha-Po--sc
    elif (ref_record.spacegroup_number == 221
      and ref_record.wykoff_fingerprint in ['a', 'b']):
        prototype = 'Ah--alpha-Po--sc'
    
    # B1--NaCl--rock-salt
    elif (ref_record.spacegroup_number == 225
      and ref_record.wykoff_fingerprint in ['a b', 'b a']):
        prototype = 'B1--NaCl--rock-salt'
    
    # B2--CsCl
    elif (ref_record.spacegroup_number == 221
      and ref_record.wykoff_fingerprint in ['a b', 'b a']):
        prototype = 'B2--CsCl'
    
    # B3--ZnS--cubic-zinc-blende
    elif (ref_record.spacegroup_number == 216
      and ref_record.wykoff_fingerprint in ['a c', 'c a', 'b d', 'd b', 'a d', 'd a', 'b c', 'c b']):
        prototype = 'B3--ZnS--cubic-zinc-blende'
    
    # C1--CaF2--fluorite
    elif (ref_record.spacegroup_number == 225
      and ref_record.wykoff_fingerprint in ['a c', 'c a', 'b c', 'c b']):
        prototype = 'C1--CaF2--fluorite'
    
    # D0_3--BiF3
    elif (ref_record.spacegroup_number == 225
      and ref_record.wykoff_fingerprint in ['a bc', 'bc a', 'b ac', 'ac b']):
        prototype = 'D0_3--BiF3'
    
    # L1_0--AuCu
    elif (ref_record.spacegroup_number == 123
      and ref_record.wykoff_fingerprint in ['a d', 'd a', 'b c', 'c b']):
        prototype = 'L1_0--AuCu'
    
    # L1_2--AuCu3
    elif (ref_record.spacegroup_number == 221
      and ref_record.wykoff_fingerprint in ['a c', 'c a', 'b d', 'd b']):
        prototype = 'L1_2--AuCu3'
    
    # L2_1--AlCu2Mn--heusler
    elif (ref_record.spacegroup_number == 225
      and ref_record.wykoff_fingerprint in ['a b c', 'b c a', 'c a b', 'a c b', 'c b a', 'b a c']):
        prototype = 'L2_1--AlCu2Mn--heusler'
    
    ref_proto_dict['prototype'] = prototype
    ref_proto_match.append(ref_proto_dict)
ref_proto_match = pd.DataFrame(ref_proto_match)
ref_proto_match = ref_proto_match.sort_values(['site', 'number']).reset_index()
ref_proto_match = ref_proto_match[['reference', 'prototype', 'composition']]
ref_proto_match.to_csv(crystal_match_file, index=False)

In [19]:
for protoname in sorted(prototype_records.family):
    print(protoname, len(ref_proto_match[ref_proto_match.prototype==protoname]))

A1--Cu--fcc 94
A15--Cr3Si 5
A15--beta-W 57
A2--W--bcc 85
A3'--alpha-La--double-hcp 71
A3--Mg--hcp 109
A4--C--dc 55
A5--beta-Sn 59
A6--In--bct 67
A7--alpha-As 17
Ah--alpha-Po--sc 19
B1--NaCl--rock-salt 99
B2--CsCl 108
B3--ZnS--cubic-zinc-blende 59
C1--CaF2--fluorite 15
D0_3--BiF3 174
L1_0--AuCu 50
L1_2--AuCu3 166
L2_1--AlCu2Mn--heusler 25
