In [23]:
import yaml
import json

import pandas as pd
import numpy as np
import tensorflow as tf

from pathlib import Path
from pymatgen.core import Structure
from sklearn.model_selection import train_test_split
from megnet.models import MEGNetModel
from megnet.data.crystal import CrystalGraph

In [3]:
def read_pymatgen_dict(file):
    with open(file, "r") as f:
        d = json.load(f)
    return Structure.from_dict(d)

In [4]:
def energy_within_threshold(prediction, target):
    e_thresh = 0.02
    error_energy = tf.math.abs(target - prediction)

    success = tf.math.count_nonzero(error_energy < e_thresh)
    total = tf.size(target)
    return success / tf.cast(total, tf.int64)

In [57]:
def prepare_dataset(dataset_path):
    dataset_path = Path(dataset_path)
    targets = pd.read_csv(dataset_path / "targets.csv", index_col=0)
    struct = {
        item.name.strip(".json"): read_pymatgen_dict(item)
        for item in (dataset_path / "train").iterdir()
    }

    data = pd.DataFrame(columns=["structures"], index=struct.keys())
    data = data.assign(structures=struct.values(), targets=targets)

    train, test =  train_test_split(data, test_size=0.25, random_state=666)
    
    return train, test

In [50]:
prepare_dataset(r'C:\Users\KIIT\Downloads\IDAO')

  result = np.asarray(values, dtype=dtype)


(                                                                 structures  \
 6142710b4e27a1844a5f07f4  [[1.27612629e-07 1.84192955e+00 3.71975100e+00...   
 6141d01431cf3ef3d4a9edec  [[1.27612629e-07 1.84192955e+00 3.71975100e+00...   
 6141d38dee0a3fd43fb47b49  [[1.27612629e-07 1.84192955e+00 3.71975100e+00...   
 6141e2d4ee0a3fd43fb47cc5  [[1.27612629e-07 1.84192955e+00 3.71975100e+00...   
 61422bfc4e27a1844a5f0682  [[1.27612629e-07 1.84192955e+00 3.71975100e+00...   
 ...                                                                     ...   
 6141cf2031cf3ef3d4a9ed56  [[1.27612629e-07 1.84192955e+00 3.71975100e+00...   
 614415764e27a1844a5f0aa0  [[1.27612629e-07 1.84192955e+00 3.71975100e+00...   
 61421fb831cf3ef3d4a9f32c  [[1.27612629e-07 1.84192955e+00 3.71975100e+00...   
 61421c9c31cf3ef3d4a9f30e  [[1.27612629e-07 1.84192955e+00 3.71975100e+00...   
 61425a159cbada84a8676d25  [[1.27612629e-07 1.84192955e+00 3.71975100e+00...   
 
                           targets  
 

In [53]:
train

NameError: name 'train' is not defined

In [58]:
train, test = prepare_dataset(r'C:\Users\KIIT\Downloads\IDAO')

In [56]:
data

KeyboardInterrupt: 

Structure Summary
Lattice
    abc : 25.5225256 25.5225256 14.879004
 angles : 90.0 90.0 119.99999999999999
 volume : 8393.668021812642
      A : 25.5225256 0.0 1.5628039641098191e-15
      B : -12.761262799999994 22.10315553833868 1.5628039641098191e-15
      C : 0.0 0.0 14.879004
PeriodicSite: Mo (0.0000, 1.8419, 3.7198) [0.0417, 0.0833, 0.2500]
PeriodicSite: Mo (-3.1903, 7.3677, 3.7198) [0.0417, 0.3333, 0.2500]
PeriodicSite: Mo (-4.7855, 10.1306, 3.7198) [0.0417, 0.4583, 0.2500]
PeriodicSite: Mo (-6.3806, 12.8935, 3.7198) [0.0417, 0.5833, 0.2500]
PeriodicSite: Mo (-7.9758, 15.6564, 3.7198) [0.0417, 0.7083, 0.2500]
PeriodicSite: Mo (-9.5709, 18.4193, 3.7198) [0.0417, 0.8333, 0.2500]
PeriodicSite: Mo (-11.1661, 21.1822, 3.7198) [0.0417, 0.9583, 0.2500]
PeriodicSite: Mo (3.1903, 1.8419, 3.7198) [0.1667, 0.0833, 0.2500]
PeriodicSite: Mo (1.5952, 4.6048, 3.7198) [0.1667, 0.2083, 0.2500]
PeriodicSite: Mo (0.0000, 7.3677, 3.7198) [0.1667, 0.3333, 0.2500]
PeriodicSite: Mo (-1.5952, 10.1306, 

In [9]:
def prepare_model(cutoff, lr):
    nfeat_bond = 10
    r_cutoff = cutoff
    gaussian_centers = np.linspace(0, r_cutoff + 1, nfeat_bond)
    gaussian_width = 0.8
    
    return MEGNetModel(
        graph_converter=CrystalGraph(cutoff=r_cutoff),
        centers=gaussian_centers,
        width=gaussian_width,
        loss=["MAE"],
        npass=2,
        lr=lr,
        metrics=energy_within_threshold
    )

In [13]:
def main():
    train, test = prepare_dataset(r'C:\Users\KIIT\Downloads\IDAO')
    model = prepare_model(4,  2e-4)
    model.train(
        train.structures,
        train.targets,
        validation_structures=test.structures,
        validation_targets=test.targets,
        epochs=800,
        batch_size=128,
    )

In [14]:
main()

  result = np.asarray(values, dtype=dtype)
  super(Adam, self).__init__(name, **kwargs)


Epoch 1/800


INFO:megnet.callbacks:
Epoch 00001: val_mae improved from inf to 0.59915, saving model to callback\val_mae_00001_0.599147.hdf5


Epoch 2/800

INFO:megnet.callbacks:
Epoch 00002: val_mae improved from 0.59915 to 0.45744, saving model to callback\val_mae_00002_0.457436.hdf5


Epoch 3/800
Epoch 4/800
Epoch 5/800
Epoch 6/800
Epoch 7/800
Epoch 8/800
Epoch 9/800
Epoch 10/800
Epoch 11/800
Epoch 12/800
Epoch 13/800
Epoch 14/800
Epoch 15/800

KeyboardInterrupt: 

In [24]:
f  = open(r'C:\Users\KIIT\Downloads\mp.2018.6.1.json\mp.2018.6.1.json')

In [25]:
data = json.load(f)

In [38]:
a = pd.json_normalize(data)

NameError: name 'train' is not defined

In [39]:
a

Unnamed: 0,material_id,structure,formation_energy_per_atom,band_gap,graph.index1,graph.index2,graph.atom,graph.bond,graph.state,G,K
0,mvc-8151,# generated using pymatgen\ndata_MgSn(GeO3)2\n...,-2.166565,2.2274,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 12, 13...","[12, 12, 50, 50, 32, 32, 32, 32, 8, 8, 8, 8, 8...","[3.888033601337422, 3.136339708154554, 3.13633...","[[0, 0]]",,
1,mvc-8154,#\#CIF1.1\n###################################...,-1.336841,0.0000,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[2, 2, 5, 5, 6, 7, 9, 10, 10, 10, 13, 13, 13, ...","[24, 24, 24, 24, 24, 24, 8, 8, 8, 8, 8, 8, 8, ...","[2.9174146214289935, 2.9217589665784702, 2.917...","[[0, 0]]",,
2,mvc-1148,#\#CIF1.1\n###################################...,-2.264626,0.0606,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 3, 3, 3, 3, 4, 4, 4, 4, 6, 6, 7, ...","[56, 56, 13, 26, 26, 26, 8, 8, 8, 8, 8, 8, 8, 8]","[3.8660262401400005, 3.866026240140001, 3.8660...","[[0, 0]]",,
3,mvc-14313,#\#CIF1.1\n###################################...,-2.301683,1.3002,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, ...","[12, 12, 42, 42, 42, 8, 8, 8, 8, 8, 8, 8, 8]","[3.579005750881316, 3.579005922381625, 3.76512...","[[0, 0]]",,
4,mvc-13470,#\#CIF1.1\n###################################...,-2.935452,0.0546,"[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, ...","[2, 3, 4, 5, 6, 7, 2, 3, 4, 5, 6, 7, 0, 1, 3, ...","[12, 83, 9, 9, 9, 9, 9, 9]","[2.00749321866107, 2.0074934686054213, 2.00749...","[[0, 0]]",,
...,...,...,...,...,...,...,...,...,...,...,...
69234,mvc-3636,#\#CIF1.1\n###################################...,-2.633844,3.4641,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[2, 3, 5, 11, 13, 16, 19, 20, 22, 23, 25, 28, ...","[12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 1...","[3.8215929109579503, 3.821592810112047, 3.8215...","[[0, 0]]",,
69235,mp-510584,# generated using pymatgen\ndata_MoO3\n_symmet...,-1.992217,1.9418,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0, 0, 0, 0, 1, 1, 4, 8, 9, 9, 12, 12, 14, 14,...","[42, 42, 42, 42, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,...","[3.7549969, 3.92140897, 3.9214089699999994, 3....","[[0, 0]]",26.0,31.0
69236,mp-756515,# generated using pymatgen\ndata_Li4Nb(TeO4)3\...,-2.015558,0.0000,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 4, 5, 5, 6, 6, 7, 7, 7, 7, 9, 9, 10, 11...","[3, 3, 3, 3, 41, 52, 52, 52, 8, 8, 8, 8, 8, 8,...","[2.8243003402268427, 2.8243003402268427, 3.688...","[[0, 0]]",,
69237,mp-774245,# generated using pymatgen\ndata_Li5Mn5(NiO6)2...,-1.938301,0.5302,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[3, 3, 3, 4, 5, 7, 7, 7, 8, 9, 10, 11, 13, 13,...","[3, 3, 3, 3, 3, 25, 25, 25, 25, 25, 28, 28, 8,...","[2.975668849594103, 2.998639655426787, 2.99301...","[[0, 0]]",,


In [47]:
a['structure'][50000]

"# generated using pymatgen\ndata_TiCoO3\n_symmetry_space_group_name_H-M   'P 1'\n_cell_length_a   5.53301526\n_cell_length_b   5.53301525\n_cell_length_c   5.53301525\n_cell_angle_alpha   54.94958439\n_cell_angle_beta   54.94958438\n_cell_angle_gamma   54.94958432\n_symmetry_Int_Tables_number   1\n_chemical_formula_structural   TiCoO3\n_chemical_formula_sum   'Ti2 Co2 O6'\n_cell_volume   105.698662274\n_cell_formula_units_Z   2\nloop_\n _symmetry_equiv_pos_site_id\n _symmetry_equiv_pos_as_xyz\n  1  'x, y, z'\nloop_\n _atom_site_type_symbol\n _atom_site_label\n _atom_site_symmetry_multiplicity\n _atom_site_fract_x\n _atom_site_fract_y\n _atom_site_fract_z\n _atom_site_occupancy\n  O  O1  1  0.561927  0.949694  0.226768  1\n  O  O2  1  0.226768  0.561927  0.949694  1\n  O  O3  1  0.949694  0.226768  0.561927  1\n  O  O4  1  0.438073  0.050306  0.773232  1\n  O  O5  1  0.773232  0.438073  0.050306  1\n  O  O6  1  0.050306  0.773232  0.438073  1\n  Ti  Ti7  1  0.855859  0.855859  0.855859

In [46]:
190*6

1140