In [1]:
from __future__ import annotations

import os
import shutil
import warnings

import numpy as np
import pytorch_lightning as pl
from dgl.data.utils import split_dataset
from mp_api.client import MPRester
from pytorch_lightning.loggers import CSVLogger

import matgl
from matgl.ext.pymatgen import Structure2Graph, get_element_list
from matgl.graph.data import MGLDataset, MGLDataLoader, collate_fn_efs
from matgl.models import M3GNet
from matgl.utils.training import PotentialLightningModule

# To suppress warnings for clearer output
warnings.simplefilter("ignore")


  from .autonotebook import tqdm as notebook_tqdm


No module named 'phonopy'
No module named 'phonopy'


In [2]:
# Obtain your API key here: https://next-gen.materialsproject.org/api
# mpr = MPRester(api_key="YOUR_API_KEY")
mpr = MPRester("FwTXcju8unkI2VbInEgZDTN8coDB6S6U")
entries = mpr.get_entries_in_chemsys(["Si", "O"])
structures = [e.structure for e in entries]
energies = [e.energy for e in entries]
forces = [np.zeros((len(s), 3)).tolist() for s in structures]
stresses = [np.zeros((3, 3)).tolist() for s in structures]
labels = {
    "energies": energies,
    "forces": forces,
    "stresses": stresses,
}

print(f"{len(structures)} downloaded from MP.")


Retrieving ThermoDoc documents: 100%|██████████| 407/407 [00:00<00:00, 5560526.80it/s]


407 downloaded from MP.


In [3]:
element_types = get_element_list(structures)
converter = Structure2Graph(element_types=element_types, cutoff=5.0)
dataset = MGLDataset(
    threebody_cutoff=4.0,
    structures=structures,
    converter=converter,
    labels=labels,
)
train_data, val_data, test_data = split_dataset(
    dataset,
    frac_list=[0.8, 0.1, 0.1],
    shuffle=True,
    random_state=42,
)
train_loader, val_loader, test_loader = MGLDataLoader(
    train_data=train_data,
    val_data=val_data,
    test_data=test_data,
    collate_fn=collate_fn_efs,
    batch_size=2,
    num_workers=0,
)
model = M3GNet(
    element_types=element_types,
    is_intensive=False,
)
lit_module = PotentialLightningModule(model=model)


In [4]:
print(element_types)
print(type(structures[0]))
print(type(energies[0]))
print(type(forces[0]))
print(stresses)


('O', 'Si')
<class 'pymatgen.core.structure.Structure'>
<class 'float'>
<class 'list'>
[[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[0.0, 0.0,

In [None]:
# If you wish to disable GPU or MPS (M1 mac) training, use the accelerator="cpu" kwarg.
logger = CSVLogger("logs", name="M3GNet_training")
# Inference mode = False is required for calculating forces, stress in test mode and prediction mode
trainer = pl.Trainer(max_epochs=10, accelerator="cpu", logger=logger, inference_mode=False)
trainer.fit(model=lit_module, train_dataloaders=train_loader, val_dataloaders=val_loader)


In [None]:
# test the model, remember to set inference_mode=False in trainer (see above)
trainer.test(dataloaders=test_loader)

In [None]:
# save trained model
model_export_path = "./trained_model/"
model.save(model_export_path)

# load trained model
model = matgl.load_model(path=model_export_path)


In [5]:
# download a pre-trained M3GNet
m3gnet_nnp = matgl.load_model("M3GNet-MP-2021.2.8-DIRECT-PES")
model_pretrained = m3gnet_nnp.model
lit_module_finetune = PotentialLightningModule(model=model_pretrained, lr=1e-4)


In [6]:
# If you wish to disable GPU or MPS (M1 mac) training, use the accelerator="cpu" kwarg.
logger = CSVLogger("logs", name="M3GNet_finetuning")
trainer = pl.Trainer(max_epochs=5, accelerator="cpu", logger=logger, inference_mode=False)
trainer.fit(model=lit_module_finetune, train_dataloaders=train_loader, val_dataloaders=val_loader)


GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type              | Params
--------------------------------------------
0 | mae   | MeanAbsoluteError | 0     
1 | rmse  | MeanSquaredError  | 0     
2 | model | Potential         | 1.1 M 
--------------------------------------------
1.1 M     Trainable params
0         Non-trainable params
1.1 M     Total params
4.468     Total estimated model params size (MB)


Epoch 4: 100%|██████████| 163/163 [01:07<00:00,  2.43it/s, v_num=1, val_Total_Loss=4.410, val_Energy_MAE=1.030, val_Force_MAE=0.297, val_Stress_MAE=0.000, val_Site_Wise_MAE=0.000, val_Energy_RMSE=1.350, val_Force_RMSE=0.433, val_Stress_RMSE=0.000, val_Site_Wise_RMSE=0.000, train_Total_Loss=4.020, train_Energy_MAE=0.891, train_Force_MAE=0.382, train_Stress_MAE=0.000, train_Site_Wise_MAE=0.000, train_Energy_RMSE=1.130, train_Force_RMSE=0.613, train_Stress_RMSE=0.000, train_Site_Wise_RMSE=0.000]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 163/163 [01:07<00:00,  2.43it/s, v_num=1, val_Total_Loss=4.410, val_Energy_MAE=1.030, val_Force_MAE=0.297, val_Stress_MAE=0.000, val_Site_Wise_MAE=0.000, val_Energy_RMSE=1.350, val_Force_RMSE=0.433, val_Stress_RMSE=0.000, val_Site_Wise_RMSE=0.000, train_Total_Loss=4.020, train_Energy_MAE=0.891, train_Force_MAE=0.382, train_Stress_MAE=0.000, train_Site_Wise_MAE=0.000, train_Energy_RMSE=1.130, train_Force_RMSE=0.613, train_Stress_RMSE=0.000, train_Site_Wise_RMSE=0.000]


In [7]:
# save trained model
model_save_path = "./finetuned_model/"
model_pretrained.save(model_save_path)
# load trained model
trained_model = matgl.load_model(path=model_save_path)