In [1]:
from torch import nn
from mlip.pes import PotentialNeuralNet
from mlip.reann import REANN, compress_symbols

species = [29]

device = 'cpu'

lmax = 2
nmax = 15
loop = 2

encode, decode, numbers = compress_symbols(species)
species = list(set(numbers))
reann = REANN(species, nmax=nmax, lmax=lmax, loop=loop)

moduledict = nn.ModuleDict()
desc = reann
for spe in species:
    moduledict[str(spe)] = nn.Sequential(
        nn.Linear(desc.NO, int(desc.NO*1.3)),
        nn.SiLU(),
        nn.Linear(int(desc.NO*1.3), 1)
    )
moduledict = moduledict.double().to(device=device)
    
model = PotentialNeuralNet(desc, moduledict, species)


  from .autonotebook import tqdm as notebook_tqdm


# Load data

In [2]:
import numpy as np
from pymatgen.core import Structure
from monty.serialization import loadfn
location = "../data/Cu/"
data = loadfn(location + 'training.json')

#data[0]['structure'].cart_coords;
#data[0]['structure'].lattice.matrix;
#data[0]['outputs']['forces'];
#data[0]['structure'].lattice.pbc;
#data[0]['num_atoms']

symbols = [[encode[n] for n in d['structure'].atomic_numbers] for d in data]
positions = [d['structure'].cart_coords for d in data]
energies = [d['outputs']['energy'] for d in data]
cells = [d['structure'].lattice.matrix for d in data]
gradients = [-np.array(d['outputs']['forces']) for d in data]

crystalidx = [[idx] * data[idx]['num_atoms'] for idx in range(len(data))]
pbcs = [np.array(d['structure'].lattice.pbc) for d in data]
           

In [6]:
# Nomenclature
# SPECG-CriP(symbols, positions, energies, cells, gradients, crystalindex, pbcs)

import torch as tc
from torch.utils.data import Dataset, DataLoader

class BPTypeDataset(Dataset):
    
    """Behler Parrinello Type datasets
    Indexing should be done in the unit of crystal, a set of atom used in one calculation. 
    
    
    Parameters
    ----------
        symbols: List
        positions: List
        energies: List
        cells: List
    
    
    """
    def __init__(self, symbols, positions, energies, cells, gradients, crystalidx, pbcs):
        self.symbols = symbols
        self.positions = positions
        self.energies = energies
        self.cells = cells
        self.gradients = gradients
        self.crystalidx = crystalidx
        self.pbcs = pbcs

    def __len__(self):
        return len(self.energies)
    
    def __getitem__(self, idx):
        return self.symbols[idx], self.positions[idx], self.energies[idx], self.cells[idx], self.gradients[idx], self.crystalidx[idx], self.pbcs[idx]

    
def concate(batch, device='cpu'):
    cat = lambda x: tc.from_numpy(np.concatenate(x))
    
    symbols, positions, energies, cells, gradients, crystalidx, pbcs = [], [], [], [], [], [], []
    for data in batch:
        symbol, position, energy, cell, gradient, crystali, pbc = data
        symbols.append(symbol)
        positions.append(position)
        energies.append(energy)
        cells.append(cell[None])
        gradients.append(gradient)
        crystalidx.append(crystali)
        pbcs.append(pbc[None])      

    return (cat(symbols), cat(positions).to(device=device).requires_grad_(True), 
            energies, cat(cells).to(device=device).requires_grad_(True), 
            cat(gradients), cat(crystalidx), cat(pbcs))

imgdataset = BPTypeDataset(symbols, positions, energies, cells, gradients, crystalidx, pbcs)
dataloader = DataLoader(imgdataset, batch_size=10, shuffle=True, collate_fn=concate)


In [7]:
class MSEFLoss:
    def __call__(self, predE, predF, y, dy):
        N = len(y)
        A = len(dy)
        self.lossE = tc.sum((y - predE) ** 2) / N
        self.lossG = tc.sum((predF - dy)**2) / A
        return self.lossE + self.lossG


class Normalizer(object):
    """Normalize a Tensor and restore it later. """

    def __init__(self, tensor, device='cpu'):
        """tensor is taken as a sample to calculate the mean and std"""
        self.mean = tc.mean(tensor).to(device=device)
        self.std = tc.std(tensor).to(device=device)

    def norm(self, tensor):
        return (tensor - self.mean) / self.std

    def denorm(self, normed_tensor):
        return normed_tensor * self.std + self.mean

    def state_dict(self):
        return {'mean': self.mean,
                'std': self.std}

    def load_state_dict(self, state_dict):
        self.mean = state_dict['mean']
        self.std = state_dict['std']

In [None]:
'Ge': 3? 'Li': 3, 'Mo': 42, 'Ni':28, 'Si': 14

In [None]:
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

In [8]:
import torch as tc
from torch.autograd import grad

def train(dataloader, model, loss_fn, optimizer, normalizer, device='cpu'):
    model.train()
    for batch, _ in enumerate(dataloader):

        symbols, positions, energies, cells, gradients, crystalidx, pbcs = _
        
        # Backpropagation
        optimizer.zero_grad()

        _, pred, predG = model(symbols, positions, cells, crystalidx, pbcs)
        
        #loss = loss_fn(pred, predG, normalizer.norm(tc.tensor(energies)), gradients)
        loss = loss_fn(pred, predG, tc.tensor(energies), gradients)
        
        loss.requires_grad_(True)
        
        loss.backward()
        optimizer.step()
        print(loss_fn.lossE.data, loss_fn.lossG.data)

    return loss_fn.lossE, loss_fn.lossG

from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter(log_dir='./20220727/copper_log')

normalizer = Normalizer(tc.tensor(imgdataset.energies).double())

for t in range(5000):
    lossE, lossG = train(dataloader, model, MSEFLoss(), 
                         tc.optim.Adam(model.parameters(), lr=1e-2), normalizer)

    writer.add_scalar('Loss / MSE energy (eV)', lossE, t)
    writer.add_scalar('Loss / MSE grad (eV/A)', lossG, t)
    if t % 10 == 0:
        tc.save(model.state_dict(), './20220727/weights_%d.pt' % t)

writer.flush()
writer.close()

tensor(827949.6567, dtype=torch.float64, grad_fn=<AddBackward0>)
tensor(47408.4957, dtype=torch.float64, grad_fn=<AddBackward0>)
tensor(16957.3239, dtype=torch.float64, grad_fn=<AddBackward0>)
tensor(4.2451e+15, dtype=torch.float64, grad_fn=<AddBackward0>)
tensor(42683.7181, dtype=torch.float64, grad_fn=<AddBackward0>)
tensor(100323.4514, dtype=torch.float64, grad_fn=<AddBackward0>)
tensor(3.3221e+15, dtype=torch.float64, grad_fn=<AddBackward0>)
tensor(1.0363e+08, dtype=torch.float64, grad_fn=<AddBackward0>)
tensor(1.1238e+09, dtype=torch.float64, grad_fn=<AddBackward0>)
tensor(1.3722e+11, dtype=torch.float64, grad_fn=<AddBackward0>)
tensor(212449.2332, dtype=torch.float64, grad_fn=<AddBackward0>)
tensor(234602.6862, dtype=torch.float64, grad_fn=<AddBackward0>)
tensor(5.1062e+15, dtype=torch.float64, grad_fn=<AddBackward0>)
tensor(202908.3980, dtype=torch.float64, grad_fn=<AddBackward0>)
tensor(199802.7297, dtype=torch.float64, grad_fn=<AddBackward0>)
tensor(187960.6382, dtype=torch.fl

KeyboardInterrupt: 

127810 cas_v100_ Serial_g x2419a03 PD       0:00      1 (Priority)
127809 cas_v100_ Serial_g x2419a03 PD       0:00      1 (Priority)
127808

H0Y123700PNPH4a2