In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx
import mbuild as mb
import parmed
import torch_geometric as tg
from torch_geometric.loader import DataLoader
import shutil
import rdkit 
import torch
from torch import nn, optim

import sys
sys.path.append("/raid6/homes/kierannp/projects/multi-egnn")
from megnn.datasets import *
from megnn.megnn import *
from megnn.utils import *

# try:
#     shutil.rmtree('./processed')
# except:
#     pass

  from .autonotebook import tqdm as notebook_tqdm
2023-02-07 13:47:51.839745: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-02-07 13:47:52.746412: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/mpi/gcc/openmpi-4.1.2/lib
2023-02-07 13:47:52.746511: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/mpi/gcc/openmpi-4.1.2/lib


In [4]:
# hyperparameters
n_epochs  = 30
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
dtype = torch.float32
batch_size = 32

# dataset
dat = COF_Dataset(root='.')
dat.shuffle()
train_dataset = dat[:int(len(dat)*.8)]
test_dataset = dat[int(len(dat)*.8):]
train_loader = DataLoader(train_dataset, batch_size=batch_size, follow_batch=['x_s', 'x_t', 'positions_s', 'positions_t'], shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, follow_batch=['x_s', 'x_t', 'positions_s', 'positions_t'], shuffle=False)

# model
model = MEGNN(n_graphs=2, in_node_nf=110, in_edge_nf=0, hidden_nf=64, device=device, n_layers=7, coords_weight=1.0,
             attention=True, node_attr=1)

# optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.MSELoss()

In [5]:
def train(epoch, loader):
    epoch_loss = 0
    model.train()
    for i, data in enumerate(loader):
        conversion = convert_to_dense(data, device, dtype)

        one_hot_s = conversion[0]
        one_hot_t = conversion[1]
        edges_s = conversion[2]
        edges_t = conversion[3]
        atom_mask_s = conversion[4]
        atom_mask_t = conversion[5]
        edge_mask_s = conversion[6]
        edge_mask_t = conversion[7]
        n_nodes_s = conversion[8]
        n_nodes_t = conversion[9]
        atom_positions_s = conversion[10]
        atom_positions_t = conversion[11]
        batch_size_s = conversion[12]
        label = conversion[13]

        pred = model(
            h0 = [one_hot_s, one_hot_t], 
            all_edges = [edges_s, edges_t], 
            all_edge_attr = [None, None], 
            node_masks = [atom_mask_s, atom_mask_t], 
            edge_masks = [edge_mask_s, edge_mask_t],
            n_nodes = [n_nodes_s, n_nodes_t], 
            x = [atom_positions_s, atom_positions_t]
        )
        loss = criterion(pred, label)  # Compute the loss.
        epoch_loss += loss.item() * batch_size_s
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        optimizer.zero_grad()  # Clear gradients.
        if i % 10 == 0:
            print("Epoch %d \t Iteration %d \t loss %.4f" % (epoch, i, loss.item()))
    return epoch_loss/len(loader)

def test(loader):
    model.eval()
    epoch_loss = 0
    for data in loader:  # Iterate in batches over the training/test dataset.
        conversion = convert_to_dense(data, device, dtype)

        one_hot_s = conversion[0]
        one_hot_t = conversion[1]
        edges_s = conversion[2]
        edges_t = conversion[3]
        atom_mask_s = conversion[4]
        atom_mask_t = conversion[5]
        edge_mask_s = conversion[6]
        edge_mask_t = conversion[7]
        n_nodes_s = conversion[8]
        n_nodes_t = conversion[9]
        atom_positions_s = conversion[10]
        atom_positions_t = conversion[11]
        batch_size_s = conversion[12]
        label = conversion[13]

        pred = model(
            h0 = [one_hot_s, one_hot_t], 
            all_edges = [edges_s, edges_t], 
            all_edge_attr = [None, None], 
            node_masks = [atom_mask_s, atom_mask_t], 
            edge_masks = [edge_mask_s, edge_mask_t],
            n_nodes = [n_nodes_s, n_nodes_t], 
            x = [atom_positions_s, atom_positions_t]
        )

        # epoch_loss += criterion(pred, (label - prop_mean) / prop_mad).item()*batch_size
        epoch_loss += criterion(pred, label).item()*batch_size_s
    return epoch_loss /len(loader)


res = {'epochs': [], 'train_loss': [],'test_loss': [], 'best_val': 1e10, 'best_test': 1e10, 'best_epoch': 0}
for epoch in range(0, n_epochs):
    train_loss = train(epoch, train_loader)
    res['train_loss'].append(train_loss)
    if epoch % 1 == 0:
        test_loss = test(test_loader)
        res['epochs'].append(epoch)
        res['test_loss'].append(test_loss)
        if test_loss < res['best_val']:
            res['best_val'] = test_loss
            res['best_test'] = test_loss
            res['best_epoch'] = epoch
        print("test loss: %.4f \t epoch %d" % (test_loss, epoch))
        print("Best: val loss: %.4f \t test loss: %.4f \t epoch %d" % (res['best_val'], res['best_test'], res['best_epoch']))

Epoch 0 	 Iteration 0 	 loss 2130.7080
Epoch 0 	 Iteration 10 	 loss 55.3238
Epoch 0 	 Iteration 20 	 loss 1162.7804
Epoch 0 	 Iteration 30 	 loss 290.5831
Epoch 0 	 Iteration 40 	 loss 23.4714
Epoch 0 	 Iteration 50 	 loss 19.8606
test loss: 304.5676 	 epoch 0
Best: val loss: 304.5676 	 test loss: 304.5676 	 epoch 0
Epoch 1 	 Iteration 0 	 loss 10.1445
Epoch 1 	 Iteration 10 	 loss 13.3259
Epoch 1 	 Iteration 20 	 loss 8.8286
Epoch 1 	 Iteration 30 	 loss 1.4462
Epoch 1 	 Iteration 40 	 loss 0.6428
Epoch 1 	 Iteration 50 	 loss 0.9357
test loss: 69.1354 	 epoch 1
Best: val loss: 69.1354 	 test loss: 69.1354 	 epoch 1
Epoch 2 	 Iteration 0 	 loss 0.5056
Epoch 2 	 Iteration 10 	 loss 0.2776
Epoch 2 	 Iteration 20 	 loss 0.5622
Epoch 2 	 Iteration 30 	 loss 0.9675
Epoch 2 	 Iteration 40 	 loss 1.5211
Epoch 2 	 Iteration 50 	 loss 0.1066
test loss: 75.0765 	 epoch 2
Best: val loss: 69.1354 	 test loss: 69.1354 	 epoch 1
Epoch 3 	 Iteration 0 	 loss 0.4877
Epoch 3 	 Iteration 10 	 loss 0.4

In [None]:
def train(epoch, loader):
    epoch_loss = 0
    model.train()
    for i, data in enumerate(loader):
        conversion = convert_to_dense(data, device, dtype)

        one_hot_s = conversion[0]
        one_hot_t = conversion[1]
        edges_s = conversion[2]
        edges_t = conversion[3]
        atom_mask_s = conversion[4]
        atom_mask_t = conversion[5]
        edge_mask_s = conversion[6]
        edge_mask_t = conversion[7]
        n_nodes_s = conversion[8]
        n_nodes_t = conversion[9]
        atom_positions_s = conversion[10]
        atom_positions_t = conversion[11]
        batch_size_s = conversion[12]
        label = conversion[13]

        pred = model(
            h0 = [one_hot_s, one_hot_t], 
            all_edges = [edges_s, edges_t], 
            all_edge_attr = [None, None], 
            node_masks = [atom_mask_s, atom_mask_t], 
            edge_masks = [edge_mask_s, edge_mask_t],
            n_nodes = [n_nodes_s, n_nodes_t], 
            x = [atom_positions_s, atom_positions_t]
        )
        loss = criterion(pred, label)  # Compute the loss.
        epoch_loss += loss.item() * batch_size_s
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        optimizer.zero_grad()  # Clear gradients.
        if i % 10 == 0:
            print("Epoch %d \t Iteration %d \t loss %.4f" % (epoch, i, loss.item()))
    return epoch_loss/len(loader)

res = {'epochs': [], 'train_loss': [],'test_loss': [], 'best_val': 1e10, 'best_test': 1e10, 'best_epoch': 0}
for epoch in range(0, n_epochs):
    train_loss = train(epoch, train_loader)
    res['train_loss'].append(train_loss)
    if epoch % 1 == 0:
        test_loss = test(test_loader)
        res['epochs'].append(epoch)
        res['test_loss'].append(test_loss)
        if test_loss < res['best_val']:
            res['best_val'] = test_loss
            res['best_test'] = test_loss
            res['best_epoch'] = epoch
        print("test loss: %.4f \t epoch %d" % (test_loss, epoch))
        print("Best: val loss: %.4f \t test loss: %.4f \t epoch %d" % (res['best_val'], res['best_test'], res['best_epoch']))

In [6]:
plt.plot(predictions, actuals, '.',alpha=.2)
plt.plot(np.arange(-15,0),np.arange(-15,0),'-')

NameError: name 'predictions' is not defined

In [17]:
model_parameters = filter(lambda p: p.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
params

48519

In [15]:
device = torch.device("cpu")
dtype = torch.float32
df = pd.read_excel('./cloud_point.xlsx')
df = df.drop_duplicates().dropna()
dat = Cloud_Point_Dataset(root='.',dataframe=df)

batch_size = 16
loader = DataLoader(dat, batch_size=batch_size, follow_batch=['x_s', 'x_t', 'positions_s', 'positions_t'], shuffle=True)

model = MEGNN(n_graphs=2, in_node_nf=7, in_edge_nf=0, hidden_nf=64, device=device, n_layers=7, coords_weight=1.0,
             attention=True, node_attr=1, n_enviro=4)
model.load_state_dict(torch.load('model.pth'))
model = model.to(device)