In [56]:
import numpy as np
import pandas as pd
from pymatgen.core import Structure
import matbench
import os
from matminer.datasets import load_dataset
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
from pymatgen.analysis.structure_matcher import StructureMatcher
from pymatgen.analysis.structure_matcher import ElementComparator
from pymatgen.ext.matproj import MPRester
import shutil

In [121]:
path_to_crystal = '../results/MgO/'
optimizer = 'tpe'
pretty_formula = 'MgO'
path_to_data = os.path.join(path_to_crystal, optimizer)

In [122]:
MP_API_KEY = 'edSrcmMEuWF0k1Qi'
properties = ["cifs.conventional_standard", "formation_energy_per_atom"]
criteria = {"formation_energy_per_atom": {"$exists": True}, "pretty_formula": pretty_formula}

with MPRester(MP_API_KEY) as mpr:
    ground_truth = mpr.query(criteria, properties)

In [123]:
#Structure.from_str(ground_truth[0]['cifs.conventional_standard'], fmt = 'cif')

In [124]:
all_directories = sorted(os.listdir(path_to_data))

In [125]:
final_matching = {}

In [126]:
for directory in all_directories:
    path_to_curr_dir = os.path.join(path_to_data, directory)
    energy_csv = pd.read_csv(path_to_curr_dir + '/results/energy_data.csv')
    min_energy = np.min(energy_csv['energy'])
    optimal_step = int(energy_csv[energy_csv['energy'] == min_energy]['step'])
    best_cif = [x for x in os.listdir(path_to_curr_dir + '/results/structures/') if x.split('_')[-2] == str(optimal_step)]
    shutil.copy(path_to_curr_dir + '/results/structures/' + best_cif[0], path_to_curr_dir + '/results/structures/' + 'best_cif.cif')
    
    pred = Structure.from_file(path_to_curr_dir + '/results/structures/' + 'best_cif.cif')
    for i in range(len(ground_truth)):
        true = Structure.from_str(ground_truth[i]['cifs.conventional_standard'], fmt = 'cif')
        sm = StructureMatcher(comparator = ElementComparator(), primitive_cell = False)
        if sm.fit(pred, true) == True:
            true_form = ground_truth[i]['formation_energy_per_atom']
            form_e_error = abs(true_form - min_energy)
            lattice_pred = np.array([pred.as_dict()['lattice']['a'], pred.as_dict()['lattice']['b'],
                                     pred.as_dict()['lattice']['c']])
            lattice_true = np.array([true.as_dict()['lattice']['a'], true.as_dict()['lattice']['b'],
                                     true.as_dict()['lattice']['c']])
            mape_lattice = np.mean((np.abs(lattice_true - lattice_pred) / lattice_true) * 100)
            final_matching[directory] = [True, form_e_error, mape_lattice]
            break



In [127]:
final_matching

{'MgO_train1-230_perturbedepoch1000_tpe_space75-230': [True,
  0.04812109155904132,
  6.10984300657539],
 'MgO_train75-230_unperturbedepoch1000_tpe_space75-230': [True,
  0.23072325868855792,
  7.0296089448474435]}

In [128]:
#Structure.from_str(ground_truth[i]['cifs.conventional_standard'], fmt = 'cif').as_dict()['lattice']['a']

In [129]:
mean_form_error = np.mean([final_matching[x][1] for x in final_matching])
mean_lattice_percent_error = np.mean([final_matching[x][2] for x in final_matching])

In [130]:
print('Mean Absolute Error in Prediction of Formation Energy is {} eV/atom'.format(round(mean_form_error, 4)))
print('Mean Absolute Percentage Error in Prediction of Lattice Constant is {}%'
      .format(round(mean_lattice_percent_error, 3)))

Mean Absolute Error in Prediction of Formation Energy is 0.1394 eV/atom
Mean Absolute Percentage Error in Prediction of Lattice Constant is 6.57%
