# Structure Evaluation

**Library imports**

In [1]:
# Standard Python libraries
from __future__ import (absolute_import, print_function,
                        division, unicode_literals)
import os
import glob
from copy import deepcopy

# http://www.numpy.org/
import numpy as np

import pandas as pd

from DataModelDict import DataModelDict as DM

# https://github.com/usnistgov/atomman
import atomman as am
import atomman.unitconvert as uc

# https://github.com/usnistgov/iprPy
import iprPy
import iprPy.highthroughput as htp
print('iprPy version', iprPy.__version__)

iprPy version 0.8.a


## 1. Access information

**Load database**

In [2]:
database = htp.get_database('master')

**Get reference and results information**

In [3]:
potential_LAMMPS_df = database.get_records_df(style='potential_LAMMPS', full=True, flat=True)

relax_box_df = database.get_records_df(style='calculation_relax_box', full=True, flat=True)
relax_static_df = database.get_records_df(style='calculation_relax_static', full=True, flat=True)
relax_dynamic_df = database.get_records_df(style='calculation_relax_dynamic', full=True, flat=True)

crystal_space_group_df = database.get_records_df(style='calculation_crystal_space_group', full=True, flat=True) 

**Separate crystal_space_group_df into reference and results**

In [4]:
reference_df = crystal_space_group_df[(crystal_space_group_df.family+'.poscar'==crystal_space_group_df.load_file)
                                     |(crystal_space_group_df.family+'.xml'==crystal_space_group_df.load_file)]
csp_results_df = crystal_space_group_df.iloc[crystal_space_group_df.index.difference(reference_df.index)]
reference_df = reference_df.reset_index()
csp_results_df = csp_results_df.reset_index()

## 2. Identify crystal_space_group's parents

In [5]:
def identify_parents(database, record):
    
    parents = []
    try:
        model=DM(record.content).find('system-info')
    except:
        pass
    else:
        for load_file in model.finds('file'):
            d, b = os.path.split(os.path.normpath(load_file))
            if d != '':
                parent = d
            else:
                parent = os.path.splitext(b)[0]   
            parents.append(parent)
            try:
                precord = database.get_record(name=parent)
            except:
                pass
            else:
                parents.extend(identify_parents(database, precord))
    return parents

In [6]:
parent_sets = []
potentials = []
for rname in csp_results_df.key:
    record = database.get_record(name=rname)
    potential = np.nan
    parents = identify_parents(database, record)
    parent_sets.append(parents)
    for parent in parents:
        try:
            precord = database.get_record(name=parent).todict()
        except:
            pass
        else:
            try:
                potential = precord['potential_LAMMPS_id']
                break
            except:
                pass
    potentials.append(potential)
csp_results_df['parents'] = parent_sets
csp_results_df['potential_LAMMPS_id'] = potentials

## 4. Build output

In [8]:
for potential in potential_LAMMPS_df[potential_LAMMPS_df.id == '2004--Zhou-X-W--Al--LAMMPS--ipr2'].itertuples():    
    print(potential.id)
    box_df = relax_box_df[relax_box_df.potential_LAMMPS_id==potential.id]
    static_df = relax_static_df[relax_static_df.potential_LAMMPS_id==potential.id]
    dynamic_df = relax_dynamic_df[relax_dynamic_df.potential_LAMMPS_id==potential.id]
    csp_df = csp_results_df[csp_results_df.potential_LAMMPS_id==potential.id]
    

2004--Zhou-X-W--Al--LAMMPS--ipr2


In [9]:
csp_df.keys()

Index(['index', 'a', 'alpha', 'b', 'beta', 'c', 'error', 'family', 'gamma',
       'idealcell', 'iprPy_version', 'key', 'load_file', 'load_options',
       'load_style', 'natoms', 'pearson_symbol', 'primitivecell', 'script',
       'spacegroup_Schoenflies', 'spacegroup_international',
       'spacegroup_number', 'status', 'symmetryprecision', 'wykoff_letters',
       'parents', 'potential_LAMMPS_id'],
      dtype='object')

In [10]:
results_df = []
for series in csp_df.itertuples():
    results_dict = {}
    results_dict['key'] = series.key
    results_dict['family'] = series.family
    results_dict['a'] = series.a
    results_dict['b'] = series.b
    results_dict['c'] = series.c
    results_dict['alpha'] = series.alpha
    results_dict['beta'] = series.beta
    results_dict['gamma'] = series.gamma
    
    family_series = reference_df[reference_df.family == series.family].iloc[0]
    results_dict['transformed'] = not (family_series.spacegroup_number == series.spacegroup_number
                                       and family_series.pearson_symbol == series.pearson_symbol)
    
    for parent in series.parents:
        if parent in relax_box_df.key.tolist():
            results_dict['method'] = 'box'
            results_dict['E_cohesive'] = relax_box_df[relax_box_df.key==parent].iloc[0].E_cohesive
            continue
        elif parent in relax_dynamic_df.key.tolist():
            results_dict['method'] = 'dynamic'
        elif parent in relax_static_df.key.tolist():
            if 'relaxation' not in results_dict:
                results_dict['method'] = 'static'
            results_dict['E_cohesive'] = relax_static_df[relax_static_df.key==parent].iloc[0].E_cohesive
    
    results_df.append(results_dict)
results_df = pd.DataFrame(results_df).sort_values('E_cohesive')

In [11]:
results_df[['key','family', 'method', 'E_cohesive', 'a', 'b', 'c', 'alpha', 'beta', 'gamma', 'transformed']]

Unnamed: 0,key,family,method,E_cohesive,a,b,c,alpha,beta,gamma,transformed
33,edcc7b05-88fa-441d-b720-0c50ca576fad,A1--Cu--fcc,dynamic,-3.580002,4.0502,4.0502,4.0502,90.0,90.0,90.0,False
17,6339becf-5446-4b1f-86ed-5559c1096c27,oqmd-8100,box,-3.580002,4.0502,4.0502,4.0502,90.0,90.0,90.0,False
27,ae4f0bdf-0a1b-41d7-ad34-0191d462b0ff,mp-134,box,-3.580002,4.0502,4.0502,4.0502,90.0,90.0,90.0,False
15,5c114e38-2695-4661-9a57-0e4686548982,mp-134,static,-3.580002,4.0502,4.0502,4.0502,90.0,90.0,90.0,False
12,47c9f142-e075-43c6-aa34-d5d6766ac922,oqmd-8100,static,-3.580002,4.0502,4.0502,4.0502,90.0,90.0,90.0,False
28,c485d024-38f6-4314-88d0-224fbc520426,A6--In--bct,static,-3.579995,4.050216,4.050216,4.050216,90.0,90.0,90.0,True
14,566f8579-8ea6-4841-b8a4-ef5314aa7180,mp-998860,dynamic,-3.579987,4.050232,4.050232,4.050232,90.0,90.0,90.0,True
9,31d9862b-6420-4fd6-8d77-4979b5c994bb,A6--In--bct,dynamic,-3.579987,4.050232,4.050232,4.050232,90.0,90.0,90.0,True
6,20a0554e-9d95-4a5f-a261-301413a94369,oqmd-8100,dynamic,-3.579979,4.050247,4.050247,4.050247,90.0,90.0,90.0,False
0,0162e074-9b30-484a-8997-9cc66c0e4a43,mp-134,dynamic,-3.579979,4.050247,4.050247,4.050247,90.0,90.0,90.0,False


## 5. Filter transformed and duplicates

In [12]:
filtered_df = results_df[results_df.transformed == False].reset_index()

In [13]:
ref2proto_df = pd.read_csv('reference_prototype_match.csv')

In [14]:
for family in np.unique(filtered_df.family):
    convert = ref2proto_df[ref2proto_df.reference==family]
    if len(convert) == 1:
        convert = convert.iloc[0]
        filtered_df.loc[filtered_df.family == family, 'family'] = convert.prototype

In [15]:
skipindex = set()
nresults = len(filtered_df)
for i in range(nresults):
    iseries = filtered_df.iloc[i]
    for j in range(i+1, nresults):
        jseries = filtered_df.iloc[j]
        if (np.isclose(iseries.E_cohesive, jseries.E_cohesive, rtol=0.0, atol=0.001)
            and np.isclose(iseries.a, jseries.a, rtol=0.0, atol=0.001)
            and np.isclose(iseries.b, jseries.b, rtol=0.0, atol=0.001)
            and np.isclose(iseries.c, jseries.c, rtol=0.0, atol=0.001)):
            
            if iseries.method == 'dynamic' or iseries.method == jseries.method:
                skipindex.add(j)
            elif jseries.method == 'dynamic':
                skipindex.add(i)
            elif iseries.method == 'static':
                skipindex.add(j)
            elif jseries.method == 'static':
                skipindex.add(i)
            else:
                raise ValueError('oops!')
            
filtered_df = filtered_df.iloc[filtered_df.index.difference(skipindex)]

In [16]:
filtered_df[['key','family', 'method', 'E_cohesive', 'a', 'b', 'c', 'alpha', 'beta', 'gamma']]

Unnamed: 0,key,family,method,E_cohesive,a,b,c,alpha,beta,gamma
0,edcc7b05-88fa-441d-b720-0c50ca576fad,A1--Cu--fcc,dynamic,-3.580002,4.0502,4.0502,4.0502,90.0,90.0,90.0
9,27923e80-e3ab-4602-a421-174c08cc7e4c,A3--Mg--hcp,dynamic,-3.578845,2.8312,2.8312,4.893291,90.0,90.0,120.0
12,ad34f286-1864-44eb-b77f-1167da5ea82b,A3'--alpha-La--double-hcp,dynamic,-3.577534,2.839117,2.839117,9.658255,90.0,90.0,120.0
15,338db902-276b-412d-90c8-95ecaf9f9b93,A15--beta-W,dynamic,-3.563638,5.173408,5.173408,5.173408,90.0,90.0,90.0
19,0c6b9902-e649-4419-a878-85102597e33c,A2--W--bcc,static,-3.5463,3.309992,3.309992,3.309992,90.0,90.0,90.0
23,60af2f8c-e58a-4cc5-ba71-df1045900afa,A5--beta-Sn,static,-3.465649,5.273533,5.273533,2.807452,90.0,90.0,90.0
24,88fb3357-0e49-40d5-95a1-c50d5bdef57c,Ah--alpha-Po--sc,static,-3.435405,2.716499,2.716499,2.716499,90.0,90.0,90.0
27,efe34d28-7a02-471d-89f2-e55a78ee6d8e,A4--C--dc,static,-3.025113,5.662937,5.662937,5.662937,90.0,90.0,90.0
