# Process data for E_vs_r_scan calculation

This Notebook is designed for reading and converting results from the E_vs_r_scan calculation

Library imports

In [1]:
from __future__ import print_function

import glob
import os
from collections import OrderedDict
from datetime import date
from math import floor

import pandas as pd

import numpy as np

from DataModelDict import DataModelDict as DM

import atomman as am
import atomman.lammps as lmp
import atomman.unitconvert as uc

import iprPy

In [2]:
from bokeh.plotting import figure, output_file, show
from bokeh.embed import components
from bokeh.resources import Resources
from bokeh.io import output_notebook
output_notebook()

## 1. Read Raw Data

This section reads in raw data from a database 

### 1.1 Initialize database

- __dbasename__ is used here to predefine different dbase settings
- __dbase__ is the iprPy.Database object to use for accessing a database

In [3]:
dbasename = 'iprhub'

# 'local' is a local directory
if   dbasename == 'local':
    dbase = iprPy.Database('local',   host='C:\Users\lmh1\Documents\calculations\ipr\library')

# 'curator' is a local MDCS curator
elif dbasename == 'curator':
    dbase = iprPy.Database('curator', host='http://127.0.0.1:8000/', 
                                      user='admin', 
                                      pswd='admin')

# 'iprhub' is the remote MDCS curator at iprhub
elif dbasename == 'iprhub':
    dbase = iprPy.Database('curator', host='https://iprhub.nist.gov/', 
                                      user='lmh1',
                                      pswd='C:/users/lmh1/documents/iprhub/iprhub_password.txt',
                                      cert='C:/users/lmh1/documents/iprhub/iprhub-ca.pem')
else:
    raise ValueError('unknown dbasename ' + dbasename)

### 1.2 Access records

- __E_vs_r_scan_recordstyle__ is the iprPy.Record style associated with the E_vs_r_scan simulations

- __prototype_recordstyle__ is the iprPy.Record style associated with the crystal prototype definintions

In [4]:
E_vs_r_scan_recordstyle = 'calculation-cohesive-energy-relation'
prototype_recordstyle = 'crystal-prototype'

Access the prototype records

- __proto_df__ is a DataFrame of the prototype records

In [5]:
proto_df = []
for record in dbase.iget_records(style=prototype_recordstyle):
    proto_df.append(record.todict())
proto_df = pd.DataFrame(proto_df)
print(str(len(proto_df)) + ' records loaded')

19 records loaded


Access the calculation records

- __raw_df__ is a DataFrame of the raw calculation records

In [6]:
raw_df = []
records = None
for i in xrange(10):
    try:
        records = dbase.get_records(style=E_vs_r_scan_recordstyle)
    except:
        print('failed '+ str(i+1) + ' times')

if records is not None:        
    for record in records:
        raw_df.append(record.todict())
    raw_df = pd.DataFrame(raw_df)
else:
    raise ValueError('Failed to load')
print(str(len(raw_df)) + ' records loaded')

5942 records loaded


### 1.3 Check errors

In [7]:
if 'error' in raw_df:
    for error in np.unique(raw_df[pd.notnull(raw_df.error)].error):
        print(error)
        print()

Traceback (most recent call last):
  File "calc_E_vs_r_scan.py", line 276, in <module>
    main(*sys.argv[1:])    
  File "calc_E_vs_r_scan.py", line 45, in main
    rsteps = input_dict['number_of_steps_r'])
  File "calc_E_vs_r_scan.py", line 125, in e_vs_r
    output = lmp.run(lammps_command, lammps_script, mpi_command)
  File "c:\users\lmh1\documents\python-packages\atomman\atomman\lammps\run.py", line 46, in run
    raise ValueError('Invalid LAMMPS input: \n%s' % lines[-2])
ValueError: Invalid LAMMPS input: 
ERROR on proc 0: Invalid number of spline knots in MEAM potential file (../pair_meam_spline.cpp:577)

Traceback (most recent call last):
  File "calc_E_vs_r_scan.py", line 276, in <module>
    main(*sys.argv[1:])    
  File "calc_E_vs_r_scan.py", line 45, in main
    rsteps = input_dict['number_of_steps_r'])
  File "calc_E_vs_r_scan.py", line 125, in e_vs_r
    output = lmp.run(lammps_command, lammps_script, mpi_command)
  File "c:\users\lmh1\documents\python-packages\atomman\at

## 2. Process Data

This section processes and refines the data

### 2.1 Identify composition

- __counts__ is a dictionary counting the number of times each atype appears in a crystal prototype's unit cell (i.e. the number of symmetry equivalent sites)

In [8]:
counts = {}
for i, prototype in proto_df.iterrows():
    model = DM(dbase.get_record(name=prototype.id).content)
    counts[prototype.id] = np.unique(model.finds('component'), return_counts=True)[1]

- __comp_refine()__ takes a list of symbols and count of how many times each symbol appears in a structure and generates a composition string.

In [9]:
def comp_refine(symbols, counts):
    """Takes a list of symbols and count of how many times each symbol appears and generates a composition string."""
    primes = [2,3,5,7,11,13,17,19,23,29,31,37,41,43,47]
    
    sym_dict = {}
    for i in xrange(len(symbols)):
        sym_dict[symbols[i]] = counts[i]
    
    for prime in primes:
        if max(sym_dict.values()) < prime:
            break
        
        while True:
            breaktime = False
            for value in sym_dict.values():
                if value % prime != 0:
                    breaktime = True
                    break
            if breaktime:
                break
            for key in sym_dict:
                sym_dict[key] /= prime
    
    composition=''
    for key in sorted(sym_dict):
        if sym_dict[key] > 0:
            composition += key
            if sym_dict[key] != 1:
                composition += str(sym_dict[key])
            
    return composition       

In [10]:
compositions = []
for i, calc in raw_df.iterrows():
    compositions.append(comp_refine(calc.symbols, counts[calc.prototype]))
raw_df = raw_df.assign(composition=compositions)

### 2.2 Remove unwanted entries

- __df__ is the dataframe during/after processing and refining

In [11]:
#Ignore unfinished or error calculations
df = raw_df[pd.isnull(raw_df.status)]

#Ignore any that don't use the standard run parameters
df = df[np.isclose(df.minimum_r, 2.0) & np.isclose(df.maximum_r, 6.0) & (df.number_of_steps_r == 200)]

#Ignore false compounds (ones where number of unique symbols != number of symbols)
df = df[df.symbols.apply(lambda x: len(np.unique(x))) == df.symbols.apply(lambda x: len(x))] 

#Ignore duplicate compounds
ignore = set()
for i in xrange(len(df)):
    trunc = df.iloc[i+1:]
    matches = trunc.calc_key[(trunc.potential_id == df.iloc[i].potential_id) & 
                             (trunc.prototype ==    df.iloc[i].prototype) &
                             (trunc.composition ==  df.iloc[i].composition) ].tolist()
    ignore = ignore.union(matches)
df = df[~df.calc_key.isin(ignore)]

df.reset_index(drop=True, inplace=True)
print(str(len(df)) + ' records after filtering')

3060 records after filtering


### 2.3 Filter data

- __headers__ gives the list of data columns from raw_data to include in and how they should be renamed in data.

In [12]:
#                        raw names       new names
headers = OrderedDict([ ('calc_key',     'key'        ),
                        ('potential_id', 'potential'  ),
                        ('prototype',    'prototype'  ),
                        ('composition',  'composition'),
                        ('e_vs_r_plot',  'e_vs_r_plot')])

df = pd.DataFrame(df, columns=headers.keys())
df.rename(columns=headers, inplace=True)
df

Unnamed: 0,key,potential,prototype,composition,e_vs_r_plot
0,1ec63265-96e2-4931-b90a-37743696adb3,2013--Zhou-X-W--Zn-Cd-Hg-S-Se-Te,L2_1--AlCu2Mn--heusler,CdHgSe2,E_coh a r 0 109....
1,13d551cb-0c82-41f4-978a-886246e1fd1e,2007--Mendelev-M-I--Zr-1,A7--alpha-As,Zr,E_coh a r 0 2.74523...
2,973a9aac-d070-4df1-ad37-6a7444633a03,2013--Zhou-X-W--Zn-Cd-Hg-S-Se-Te,A3--Mg--hcp,Se,E_coh a r 0 266.5...
3,36da1519-3bb5-4c96-a062-d23818e57c42,2013--Hale-L-M--Pd-Ag-H-Morse,L1_2--AuCu3,Ag3Pd,E_coh a r 0 15.426...
4,4712fad7-1c74-48da-98bf-907a04a9d1ce,2014--Liyanage-L-S-I--Fe-C,C1--CaF2--fluorite,C2Fe,E_coh a r 0 -5.5054...
5,5b373b20-1417-4bc3-96f8-6ee141be566b,2014--Liyanage-L-S-I--Fe-C,L1_0--AuCu,CFe,E_coh a r 0 -3.82810...
6,62d3dc11-3a79-4cc8-be63-e0ad74979419,2013--Bonny-G--Fe-Ni-Cr,L1_2--AuCu3,FeNi3,E_coh a r 0 7.46211...
7,12a166ef-8061-4e13-a42c-fefbf93fd4a2,2013--Hale-L-M--Pd-Ag-H-Morse,B1--NaCl--rock-salt,HPd,E_coh a r 0 -2.8908...
8,ce0f684d-f2b7-434c-add0-99cacd6fb980,2004--Zhou-X-W--Ti,A5--beta-Sn,Ti,E_coh a r 0 5.1906...
9,36ffbbe3-6a85-42c8-8c5a-0cd53bf002b5,2005--Mendelev-M-I--Al-Fe,B3--ZnS--cubic-zinc-blende,AlFe,E_coh a r 0 -1.2658...


## 3. Generate page content

This section generates the html content based on the processed data.

### 3.1 Define parameters

#### HTML settings

- __per_potential_directory__ is the directory where the per potential content is saved.

- __html_info_file__ contains the html description for the calculation.

In [13]:
per_potential_directory = 'C:\\Users\\lmh1\\Documents\\website\\per_potential'
html_info_file = 'E_vs_r_plots/html info.html'

#### Data table settings

- __table_header_file__ contains the header description for the data table file.

- __string_format__ is the format to use for printing the string prototype names.

- __float_format__ is the format to use for printing the floating point data.

- __max_string_length__ is the maximum length allowed for the string prototype names.

In [14]:
table_header_file = 'E_vs_r_plots/table header.txt'
string_format = '%-16s'
float_format = '%16.10f'
max_string_length = 16

#### Plot settings

- __line_color__ dictionary defines specific line colors for each crystal prototype

- __line_dash__ dictionary defines specific line styles for each crystal prototype

In [15]:
line_color = {
   #elemental
    'A1--Cu--fcc':                'black',
    'A2--W--bcc':                 'blue',
    'A3--Mg--hcp':                'red',
    'A3\'--alpha-La':             'cyan',
    'A4--C--dc':                  'magenta',
    'A5--beta-Sn':                '#EAC117',
    'A6--In--bct':                'orange',
    'A7--alpha-As':               'gray',
    'A15--beta-W':                'green',
    'Ah--alpha-Po--sc':           'brown',
   #1:1
    'B1--NaCl':                   'black',
    'B2--CsCl':                   'blue',
    'B3--ZnS':                    'red',
    'L1_0--AuCu':                 'cyan',
   #1:2
    'C1--CaF2':                   'black',
   #1:3
    'A15--Cr3Si':                 'black',
    'D0_3--BiF3':                 'blue',
    'L1_2--AuCu3':                'red',
   #1:1:2
    'L2_1--AlCu2Mn':              'black'
}

In [16]:
line_dash = {
   #elemental
    'A1--Cu--fcc':                'solid',
    'A2--W--bcc':                 'solid',
    'A3--Mg--hcp':                'dashed',
    'A3\'--alpha-La':             'dashdot',
    'A4--C--dc':                  'solid',
    'A5--beta-Sn':                'solid',
    'A6--In--bct':                'solid',
    'A7--alpha-As':               'solid',
    'A15--beta-W':                'solid',
    'Ah--alpha-Po--sc':           'solid',
   #1:1
    'B1--NaCl':                   'solid',
    'B2--CsCl':                   'solid',
    'B3--ZnS':                    'solid',
    'L1_0--AuCu':                 'solid',
   #1:2
    'C1--CaF2':                   'solid',
   #1:3
    'A15--Cr3Si':                 'solid',
    'D0_3--BiF3':                 'solid',
    'L1_2--AuCu3':                'solid',
   #1:1:2
    'L2_1--AlCu2Mn':              'solid'
}

### 3.2 Read text files

In [32]:
with open(html_info_file) as f:
    html_info = f.read()
    
with open(table_header_file) as f:
    table_header = f.read()

### 3.3 Table generation

In [18]:
def table_gen(df, header, header_terms, str_fmt, float_fmt):
    """Generate a data table from a df (assuming the same composition and potential for each entry)"""
    
    table = iprPy.tools.filltemplate(header, header_terms, '<', '>')
    
    table += '\n   ' + ' '.join([str_fmt for x in xrange(len(df.columns))]) % tuple(df.columns)
    for i, row in df.iterrows():
        table += '\n' + ' '.join([float_fmt for x in xrange(len(row))]) % tuple(row.tolist())
    
    return table

### Plot generation

In [27]:
def plot_gen(df, composition, colors, dashes):
    
    ymin = floor(df.min().min())
    if ymin < -10: ymin = -10
    
    p = figure(title='Cohesive Energy vs. Interatomic Spacing for ' + composition,
               plot_width = 800,
               plot_height = 600,
               x_range = [2, 6],
               y_range = [ymin, 0],              
               x_axis_label='r (A)', 
               y_axis_label='Cohesive Energy (eV/atom)')
   
    for prototype in df.columns.tolist()[1:]:
        p.line(df.r, df[prototype], 
               legend=prototype, 
               line_color=colors[prototype], 
               line_dash=dashes[prototype], 
               line_width = 2)            
    
    p.legend.location = "bottom_right"    
    return p        

In [33]:
#Loop over all potentials
for potential in np.unique(df.potential):
    print(potential)
    potential_df = df[df.potential==potential]
    
    #Check that a directory exists for the potential
    if not os.path.isdir(os.path.join(per_potential_directory, potential)):
        os.makedirs(os.path.join(per_potential_directory, potential))
    
    #Start html with html_info
    html = html_info
    scripts = ''
    
    #Loop over all compositions
    for composition in np.unique(potential_df.composition):
        composition_df = potential_df[potential_df.composition==composition]
        
        #Extract plot values
        plot_df = {}
        columns = []
        plot_df['r'] = uc.get_in_units(composition_df.iloc[0].e_vs_r_plot.r, 'angstrom')
        for i, v in composition_df.iterrows():
            if len(v.prototype) > max_string_length:
                name = '--'.join(v.prototype.split('--')[:-1])
            else:
                name = v.prototype
            plot_df[name] = uc.get_in_units(v.e_vs_r_plot.E_coh, 'eV')
            columns.append(name)
        plot_df = pd.DataFrame(plot_df, columns=['r'] + sorted(columns))
        
        #Construct data table
        table_header_terms = {'potential': potential, 'composition':composition, 'day':str(date.today())}
        table = table_gen(plot_df, table_header, table_header_terms, string_format, float_format)
        
        #Save data table
        table_file = os.path.join(per_potential_directory, potential, 'EvsR.' + composition + '.txt')
        with open(table_file, 'w') as f:
            f.write(table)
        
        #Construct plot
        plot =  plot_gen(plot_df, composition, line_color, line_dash)
        script, div = components(plot)
        
        #Append html with data table link and plot
        html += '<div><h3>Cohesive Energy vs. Interatomic Spacing for ' + composition + '</h3>\n'
        html += '<a href="./' + potential + '/EvsR.' + composition + '.txt" target="_blank">Download data</a></div>'
        #html += '<a href="./EvsR.' + composition + '.txt" target="_blank">Download data</a>\n'
        html += div.strip()
        scripts += script
    
    html += scripts

    #Save html
    with open(os.path.join(per_potential_directory, potential, 'EvsR.info'), 'w') as html_file:
        html_file.write(html)


1985--Foiles-S-M--Ni-Cu
1987--Ackland-G-J--Ag
1987--Ackland-G-J--Au
1987--Ackland-G-J--Cu
1987--Ackland-G-J--Mo
1987--Ackland-G-J--Ni
1989--Adams-J-B--Ag
1989--Adams-J-B--Au
1989--Adams-J-B--Cu
1989--Adams-J-B--Ni
1989--Adams-J-B--Pd
1989--Adams-J-B--Pt
1992--Ackland-G-J--Ti
1995--Angelo-J-E--Ni-Al-H
1996--Farkas-D--Nb-Ti-Al
1997--Ackland-G-J--Fe
1997--Liu-X-Y--Al-Mg
1998--Liu-X-Y--Al-Mg
1999--Liu-X-Y--Al-Cu
1999--Mishin-Y--Al
1999--Mishin-Y--Ni
2000--Landa-A--Al-Pb
2000--Sturgeon-J-B--Al
2001--Mishin-Y--Cu-1
2002--Mishin-Y--Ni-Al
2003--Han-S--Cs
2003--Han-S--K
2003--Han-S--Li
2003--Han-S--Na
2003--Han-S--Rb
2003--Han-S--W
2003--Hoyt-J-J--Cu-Pb
2003--Li-Y-H--Ta
2003--Mendelev-M-I--Fe-2
2003--Mendelev-M-I--Fe-5
2003--Zope-R-R--Al
2003--Zope-R-R--Ti-Al
2004--Ackland-G-J--Fe-P
2004--Liu-X-Y--Al
2004--Mishin-Y--Ni-Al
2004--Zhou-X-W--Ag
2004--Zhou-X-W--Al
2004--Zhou-X-W--Au
2004--Zhou-X-W--Co
2004--Zhou-X-W--Cu
2004--Zhou-X-W--Fe
2004--Zhou-X-W--Mg
2004--Zhou-X-W--Mo
2004--Zhou-X-W--Ni
2004