In [1]:
from __future__ import annotations

import os
import shutil
import warnings

import numpy as np
import pytorch_lightning as pl
from dgl.data.utils import split_dataset
from mp_api.client import MPRester
from pytorch_lightning.loggers import CSVLogger
from pytorch_lightning.loggers import TensorBoardLogger

import matgl
from matgl.ext.pymatgen import Structure2Graph, get_element_list
from matgl.graph.data import MGLDataset, MGLDataLoader, collate_fn_efs
from matgl.models import M3GNet
from matgl.utils.training import PotentialLightningModule

# To suppress warnings for clearer output
warnings.simplefilter("ignore")
logger = TensorBoardLogger('tb_logs', name='my_model')

  from .autonotebook import tqdm as notebook_tqdm


No module named 'phonopy'
No module named 'phonopy'


In [2]:
# Obtain your API key here: https://next-gen.materialsproject.org/api
# mpr = MPRester(api_key="YOUR_API_KEY")
mpr = MPRester("FwTXcju8unkI2VbInEgZDTN8coDB6S6U")
entries = mpr.get_entries_in_chemsys(["Si", "O"])
structures = [e.structure for e in entries]
energies = [e.energy for e in entries]
forces = [np.zeros((len(s), 3)).tolist() for s in structures]
stresses = [np.zeros((3, 3)).tolist() for s in structures]
labels = {
    "energies": energies,
    "forces": forces,
    "stresses": stresses,
}

print(f"{len(structures)} downloaded from MP.")


Retrieving ThermoDoc documents: 100%|██████████| 407/407 [00:00<00:00, 4728758.25it/s]


407 downloaded from MP.


In [3]:
element_types = get_element_list(structures)
converter = Structure2Graph(element_types=element_types, cutoff=5.0)
dataset = MGLDataset(
    threebody_cutoff=4.0,
    structures=structures,
    converter=converter,
    labels=labels,
)
train_data, val_data, test_data = split_dataset(
    dataset,
    frac_list=[0.8, 0.1, 0.1],
    shuffle=True,
    random_state=42,
)
train_loader, val_loader, test_loader = MGLDataLoader(
    train_data=train_data,
    val_data=val_data,
    test_data=test_data,
    collate_fn=collate_fn_efs,
    batch_size=2,
    num_workers=0,
)
model = M3GNet(
    element_types=element_types,
    is_intensive=False,
)
lit_module = PotentialLightningModule(model=model)


In [4]:
print(element_types)
print(type(structures[0]))
print(type(energies[0]))
print(type(forces[0]))
print(stresses)


('O', 'Si')
<class 'pymatgen.core.structure.Structure'>
<class 'float'>
<class 'list'>
[[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], [[0.0, 0.0,

In [5]:
# If you wish to disable GPU or MPS (M1 mac) training, use the accelerator="cpu" kwarg.
logger = CSVLogger("logs", name="M3GNet_training")
logger = TensorBoardLogger('tb_logs', name='M3GNet_training')
# Inference mode = False is required for calculating forces, stress in test mode and prediction mode
trainer = pl.Trainer(max_epochs=10, accelerator="auto", logger=logger, inference_mode=False,)
trainer.fit(model=lit_module, train_dataloaders=train_loader, val_dataloaders=val_loader)


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3060') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Missing logger folder: tb_logs/M3GNet_training
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type              | Params
--------------------------------------------
0 | mae   | MeanAbsoluteError | 0     
1 | rmse  | MeanSquaredError  | 0     
2 | model | Potential         | 282 K 
--------------------------------------------
282 K     Trainable params
0         Non-trainable params
282 K     Total params
1.130     Total estimated model params size (MB)


Epoch 9: 100%|██████████| 163/163 [00:21<00:00,  7.53it/s, v_num=0, val_Total_Loss=0.629, val_Energy_MAE=0.431, val_Force_MAE=0.118, val_Stress_MAE=0.000, val_Site_Wise_MAE=0.000, val_Energy_RMSE=0.481, val_Force_RMSE=0.170, val_Stress_RMSE=0.000, val_Site_Wise_RMSE=0.000, train_Total_Loss=0.218, train_Energy_MAE=0.289, train_Force_MAE=0.112, train_Stress_MAE=0.000, train_Site_Wise_MAE=0.000, train_Energy_RMSE=0.316, train_Force_RMSE=0.171, train_Stress_RMSE=0.000, train_Site_Wise_RMSE=0.000] 

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 163/163 [00:21<00:00,  7.51it/s, v_num=0, val_Total_Loss=0.629, val_Energy_MAE=0.431, val_Force_MAE=0.118, val_Stress_MAE=0.000, val_Site_Wise_MAE=0.000, val_Energy_RMSE=0.481, val_Force_RMSE=0.170, val_Stress_RMSE=0.000, val_Site_Wise_RMSE=0.000, train_Total_Loss=0.218, train_Energy_MAE=0.289, train_Force_MAE=0.112, train_Stress_MAE=0.000, train_Site_Wise_MAE=0.000, train_Energy_RMSE=0.316, train_Force_RMSE=0.171, train_Stress_RMSE=0.000, train_Site_Wise_RMSE=0.000]


In [6]:
# test the model, remember to set inference_mode=False in trainer (see above)
trainer.test(dataloaders=test_loader)

Restoring states from the checkpoint path at tb_logs/M3GNet_training/version_0/checkpoints/epoch=9-step=1630.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at tb_logs/M3GNet_training/version_0/checkpoints/epoch=9-step=1630.ckpt


Testing DataLoader 0: 100%|██████████| 21/21 [00:01<00:00, 16.01it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
     test_Energy_MAE        0.3399966061115265
    test_Energy_RMSE        0.35500723123550415
     test_Force_MAE         0.12434855848550797
     test_Force_RMSE        0.2099882960319519
   test_Site_Wise_MAE               0.0
   test_Site_Wise_RMSE              0.0
     test_Stress_MAE                0.0
    test_Stress_RMSE                0.0
     test_Total_Loss        0.2173028290271759
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test_Total_Loss': 0.2173028290271759,
  'test_Energy_MAE': 0.3399966061115265,
  'test_Force_MAE': 0.12434855848550797,
  'test_Stress_MAE': 0.0,
  'test_Site_Wise_MAE': 0.0,
  'test_Energy_RMSE': 0.35500723123550415,
  'test_Force_RMSE': 0.2099882960319519,
  'test_Stress_RMSE': 0.0,
  'test_Site_Wise_RMSE': 0.0}]

In [7]:
# save trained model
model_export_path = "./trained_model/"
model.save(model_export_path)

# load trained model
model = matgl.load_model(path=model_export_path)


In [8]:
# download a pre-trained M3GNet
m3gnet_nnp = matgl.load_model("M3GNet-MP-2021.2.8-DIRECT-PES")
model_pretrained = m3gnet_nnp.model
lit_module_finetune = PotentialLightningModule(model=model_pretrained, lr=1e-4)


In [9]:
# If you wish to disable GPU or MPS (M1 mac) training, use the accelerator="cpu" kwarg.
logger = CSVLogger("logs", name="M3GNet_finetuning")
logger = TensorBoardLogger('tb_logs', name='M3GNet_finetuning')
trainer = pl.Trainer(max_epochs=5, accelerator="auto", logger=logger, inference_mode=False)
trainer.fit(model=lit_module_finetune, train_dataloaders=train_loader, val_dataloaders=val_loader)


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: tb_logs/M3GNet_finetuning
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type              | Params
--------------------------------------------
0 | mae   | MeanAbsoluteError | 0     
1 | rmse  | MeanSquaredError  | 0     
2 | model | Potential         | 1.1 M 
--------------------------------------------
1.1 M     Trainable params
0         Non-trainable params
1.1 M     Total params
4.468     Total estimated model params size (MB)


Epoch 4: 100%|██████████| 163/163 [00:23<00:00,  6.82it/s, v_num=0, val_Total_Loss=4.690, val_Energy_MAE=1.060, val_Force_MAE=0.312, val_Stress_MAE=0.000, val_Site_Wise_MAE=0.000, val_Energy_RMSE=1.380, val_Force_RMSE=0.440, val_Stress_RMSE=0.000, val_Site_Wise_RMSE=0.000, train_Total_Loss=3.830, train_Energy_MAE=0.861, train_Force_MAE=0.316, train_Stress_MAE=0.000, train_Site_Wise_MAE=0.000, train_Energy_RMSE=1.090, train_Force_RMSE=0.502, train_Stress_RMSE=0.000, train_Site_Wise_RMSE=0.000]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 163/163 [00:23<00:00,  6.81it/s, v_num=0, val_Total_Loss=4.690, val_Energy_MAE=1.060, val_Force_MAE=0.312, val_Stress_MAE=0.000, val_Site_Wise_MAE=0.000, val_Energy_RMSE=1.380, val_Force_RMSE=0.440, val_Stress_RMSE=0.000, val_Site_Wise_RMSE=0.000, train_Total_Loss=3.830, train_Energy_MAE=0.861, train_Force_MAE=0.316, train_Stress_MAE=0.000, train_Site_Wise_MAE=0.000, train_Energy_RMSE=1.090, train_Force_RMSE=0.502, train_Stress_RMSE=0.000, train_Site_Wise_RMSE=0.000]


In [10]:
# save trained model
model_save_path = "./finetuned_model/"
model_pretrained.save(model_save_path)
# load trained model
trained_model = matgl.load_model(path=model_save_path)

In [11]:
trainer.test(dataloaders=test_loader)

Restoring states from the checkpoint path at tb_logs/M3GNet_finetuning/version_0/checkpoints/epoch=4-step=815.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at tb_logs/M3GNet_finetuning/version_0/checkpoints/epoch=4-step=815.ckpt


Testing DataLoader 0: 100%|██████████| 21/21 [00:01<00:00, 12.53it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
     test_Energy_MAE        0.6983882784843445
    test_Energy_RMSE        0.8734222650527954
     test_Force_MAE         0.2976267337799072
     test_Force_RMSE        0.48178237676620483
   test_Site_Wise_MAE               0.0
   test_Site_Wise_RMSE              0.0
     test_Stress_MAE                0.0
    test_Stress_RMSE                0.0
     test_Total_Loss         3.277029037475586
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test_Total_Loss': 3.277029037475586,
  'test_Energy_MAE': 0.6983882784843445,
  'test_Force_MAE': 0.2976267337799072,
  'test_Stress_MAE': 0.0,
  'test_Site_Wise_MAE': 0.0,
  'test_Energy_RMSE': 0.8734222650527954,
  'test_Force_RMSE': 0.48178237676620483,
  'test_Stress_RMSE': 0.0,
  'test_Site_Wise_RMSE': 0.0}]