In [1]:
from model import Autoencoder, GNN3D
from dataset import LogPDataset
import torch

# Loading LogP dataset
dataset = LogPDataset("../data/logp")
print(dataset)

Loading dataset from folder
Initializing Molecular Representation Generator
Dataset Name: LogP
Number of Molecules Loaded: 4197


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
atom_autoencoder = Autoencoder(80, 10).to(device)
bond_autoencoder = Autoencoder(10, 3).to(device)

mse_loss_fn = torch.nn.MSELoss()
atom_autoencoder_optimizer = torch.optim.Adam(atom_autoencoder.parameters())
bond_autoencoder_optimizer = torch.optim.Adam(bond_autoencoder.parameters())

In [4]:
atom_autoencoder.load_state_dict(torch.load("./models/logp_atom_autoencoder.pth"))
bond_autoencoder.load_state_dict(torch.load("./models/logp_bond_autoencoder.pth"))

<All keys matched successfully>

In [5]:
# Testing atomic and bond reconstruction

test_i = 0
test_atomic_vectors = dataset[test_i][0].to(device)
test_bond_vectors = dataset[test_i][1].to(device)
reconstructed_atomic_vectors = atom_autoencoder(test_atomic_vectors)
reconstructed_bond_vectors = bond_autoencoder(test_bond_vectors)

print(f"Test Atomic Vector:\n{test_atomic_vectors}\n\nReconstruction:\n{reconstructed_atomic_vectors}")
print(f"\n\n\nTest Bond Vector:\n{test_bond_vectors}\n\nReconstruction:\n{reconstructed_bond_vectors}")

Test Atomic Vector:
tensor([[1., 0., 0.,  ..., 0., 0., 0.],
        [0., 1., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]], device='cuda:0')

Reconstruction:
tensor([[ 9.9633e-01,  3.5745e-03, -1.0154e-03,  ...,  7.7394e-04,
          5.7995e-04, -2.0181e-03],
        [ 2.9183e-02,  9.8448e-01,  3.5630e-03,  ...,  2.7819e-04,
         -1.9600e-03,  1.0222e-03],
        [ 1.0032e+00, -6.7498e-03,  1.2391e-04,  ..., -4.2486e-04,
         -1.3446e-03, -8.2866e-05],
        ...,
        [-1.2934e-03, -2.5709e-03, -2.2012e-04,  ...,  3.7370e-04,
         -3.3575e-04,  9.6865e-05],
        [-1.2934e-03, -2.5709e-03, -2.2012e-04,  ...,  3.7370e-04,
         -3.3575e-04,  9.6865e-05],
        [-1.2934e-03, -2.5709e-03, -2.2012e-04,  ...,  3.7370e-04,
         -3.3575e-04,  9.6865e-05]], device='cuda:0', grad_fn=<GeluBackward0>)



Test Bond Vecto

# Running GEM with Latent Embeddings

In [6]:
# Loading GNN3D
from model import GNN3D

# Making an instance of the model and an optimizer
gnn3d = GNN3D(atomic_vector_size= 10, bond_vector_size=3, number_of_molecular_features = 200, number_of_targets = 1).to(device)
gnn3d_optimizer = torch.optim.Adam(gnn3d.parameters())
gnn3d

GNN3D(
  (atom_bond_operator): DMPNNLayer(
    (message_generation_network): Sequential(
      (0): Linear(in_features=23, out_features=11, bias=True)
      (1): GELU(approximate='none')
      (2): Linear(in_features=11, out_features=10, bias=True)
      (3): GELU(approximate='none')
    )
    (combination_network): Sequential(
      (0): Linear(in_features=20, out_features=15, bias=True)
      (1): GELU(approximate='none')
      (2): Linear(in_features=15, out_features=10, bias=True)
      (3): GELU(approximate='none')
    )
  )
  (bond_angle_operator): DMPNNLayer(
    (message_generation_network): Sequential(
      (0): Linear(in_features=7, out_features=3, bias=True)
      (1): GELU(approximate='none')
      (2): Linear(in_features=3, out_features=3, bias=True)
      (3): GELU(approximate='none')
    )
    (combination_network): Sequential(
      (0): Linear(in_features=6, out_features=4, bias=True)
      (1): GELU(approximate='none')
      (2): Linear(in_features=4, out_features=3,

In [11]:
#Loading GNN3D
gnn3d.load_state_dict(torch.load("./models/gem_with_latent/geml_save2.pth"))

<All keys matched successfully>

In [12]:
printstep = 20
n_epochs = 50
avg_rmse_list = []

In [13]:
for epoch_i in range(n_epochs):
    avg_rmse = 0
    rolling_rmse = 0
    rolling_avg_i = 0
    for i, molecule in enumerate(dataset):
        target = molecule[8].to(device)
        input_representation = [
                    atom_autoencoder.encode(molecule[0].to(device)),
                    bond_autoencoder.encode(molecule[1].to(device)),
                    molecule[2].to(device),
                    molecule[3].to(device),
                    molecule[4].to(device),
                    molecule[5].to(device),
                    molecule[6].to(device),
                    molecule[7].to(device)]

        # Making prediction
        prediction = gnn3d(input_representation)
        
        # Computing losses
        loss = mse_loss_fn(target, prediction)
    
        # Taking optimization step
        gnn3d_optimizer.zero_grad()    
        loss.backward()
        gnn3d_optimizer.step()
    
        # Updating average losses
        avg_rmse = (avg_rmse * i + torch.sqrt(loss).item()) / (i + 1)
        rolling_rmse = (rolling_rmse * rolling_avg_i + torch.sqrt(loss).item()) / (rolling_avg_i + 1)
        rolling_avg_i += 1
    
        if (i % printstep == 0):
            avg_rmse_list.append(avg_rmse)
            print(f"Ep. {epoch_i}/{n_epochs}, Ex. {i}, avg rmse: {avg_rmse}, rolling rmse: {rolling_rmse}, immediate mse: {loss.item()}, target: {target.item()}, pred: {prediction.item()}")
            rolling_rmse = 0
            rolling_avg_i = 0
    
    torch.save(gnn3d.state_dict(), "./models/gem_with_latent/geml_"+str(epoch_i)+".pth")

Ep. 0/50, Ex. 0, avg rmse: 0.3008460998535156, rolling rmse: 0.3008460998535156, immediate mse: 0.09050837904214859, target: 3.5399999618530273, pred: 3.2391538619995117
Ep. 0/50, Ex. 20, avg rmse: 0.804264729931241, rolling rmse: 0.8294356614351273, immediate mse: 0.18751674890518188, target: 2.9200000762939453, pred: 2.4869680404663086
Ep. 0/50, Ex. 40, avg rmse: 0.7201822429168515, rolling rmse: 0.6318956315517426, immediate mse: 0.23824910819530487, target: 1.399999976158142, pred: 1.8881076574325562
Ep. 0/50, Ex. 60, avg rmse: 0.7569935654030472, rolling rmse: 0.8324567764997483, immediate mse: 1.593755841255188, target: 1.190000057220459, pred: 2.4524405002593994
Ep. 0/50, Ex. 80, avg rmse: 0.6752253432332733, rolling rmse: 0.4258322656154633, immediate mse: 0.024847470223903656, target: 2.0, pred: 1.8423691987991333
Ep. 0/50, Ex. 100, avg rmse: 0.686307432037769, rolling rmse: 0.7311898916959764, immediate mse: 0.09583746641874313, target: 1.4900000095367432, pred: 1.18042373657

KeyboardInterrupt: 

In [14]:
torch.save(gnn3d.state_dict(), "./models/gem_with_latent/geml_save3.pth")

# Running GEM with Latent Embeddings (with attention)

In [2]:
from model import Autoencoder, GNN3D
from dataset import LogPDataset
import torch

# Loading LogP dataset
dataset = LogPDataset("../data/logp")
print(dataset)

Loading dataset from folder
Initializing Molecular Representation Generator
Dataset Name: LogP
Number of Molecules Loaded: 4197


In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [4]:
atom_autoencoder = Autoencoder(80, 10).to(device)
bond_autoencoder = Autoencoder(10, 3).to(device)

mse_loss_fn = torch.nn.MSELoss()
atom_autoencoder_optimizer = torch.optim.Adam(atom_autoencoder.parameters())
bond_autoencoder_optimizer = torch.optim.Adam(bond_autoencoder.parameters())

In [5]:
atom_autoencoder.load_state_dict(torch.load("./models/logp_atom_autoencoder.pth"))
bond_autoencoder.load_state_dict(torch.load("./models/logp_bond_autoencoder.pth"))

<All keys matched successfully>

In [6]:
# Loading GNN3D
from model import GNN3DAttention

# Making an instance of the model and an optimizer
gnn3d = GNN3DAttention(atomic_vector_size= 10, bond_vector_size=3, number_of_molecular_features = 200, number_of_targets = 1).to(device)
gnn3d_optimizer = torch.optim.Adam(gnn3d.parameters())
gnn3d

GNN3DAttention(
  (atom_bond_operator): DMPNNLayer(
    (message_generation_network): Sequential(
      (0): Linear(in_features=23, out_features=11, bias=True)
      (1): GELU(approximate='none')
      (2): Linear(in_features=11, out_features=10, bias=True)
      (3): GELU(approximate='none')
    )
    (combination_network): Sequential(
      (0): Linear(in_features=20, out_features=15, bias=True)
      (1): GELU(approximate='none')
      (2): Linear(in_features=15, out_features=10, bias=True)
      (3): GELU(approximate='none')
    )
  )
  (bond_angle_operator): DMPNNLayer(
    (message_generation_network): Sequential(
      (0): Linear(in_features=7, out_features=3, bias=True)
      (1): GELU(approximate='none')
      (2): Linear(in_features=3, out_features=3, bias=True)
      (3): GELU(approximate='none')
    )
    (combination_network): Sequential(
      (0): Linear(in_features=6, out_features=4, bias=True)
      (1): GELU(approximate='none')
      (2): Linear(in_features=4, out_fe

In [7]:
printstep = 20
n_epochs = 50
avg_rmse_list = []

In [8]:
for epoch_i in range(n_epochs):
    avg_rmse = 0
    rolling_rmse = 0
    rolling_avg_i = 0
    for i, molecule in enumerate(dataset):
        target = molecule[8].to(device)
        input_representation = [
                    atom_autoencoder.encode(molecule[0].to(device)),
                    bond_autoencoder.encode(molecule[1].to(device)),
                    molecule[2].to(device),
                    molecule[3].to(device),
                    molecule[4].to(device),
                    molecule[5].to(device),
                    molecule[6].to(device),
                    molecule[7].to(device)]

        # Making prediction
        prediction = gnn3d(input_representation)
        
        # Computing losses
        loss = mse_loss_fn(target, prediction)
    
        # Taking optimization step
        gnn3d_optimizer.zero_grad()    
        loss.backward()
        gnn3d_optimizer.step()
    
        # Updating average losses
        avg_rmse = (avg_rmse * i + torch.sqrt(loss).item()) / (i + 1)
        rolling_rmse = (rolling_rmse * rolling_avg_i + torch.sqrt(loss).item()) / (rolling_avg_i + 1)
        rolling_avg_i += 1
    
        if (i % printstep == 0):
            avg_rmse_list.append(avg_rmse)
            print(f"Ep. {epoch_i}/{n_epochs}, Ex. {i}, avg rmse: {avg_rmse}, rolling rmse: {rolling_rmse}, immediate mse: {loss.item()}, target: {target.item()}, pred: {prediction.item()}")
            rolling_rmse = 0
            rolling_avg_i = 0
    
    torch.save(gnn3d.state_dict(), "./models/gem_with_latent/geml_with_attention_"+str(epoch_i)+".pth")

  attention_weights = torch.nn.functional.softmax(dot_product)


Ep. 0/50, Ex. 0, avg rmse: 3.656558036804199, rolling rmse: 3.656558036804199, immediate mse: 13.370416641235352, target: 3.5399999618530273, pred: -0.11655809730291367
Ep. 0/50, Ex. 20, avg rmse: 1.917193354595275, rolling rmse: 1.830225120484829, immediate mse: 2.8146939277648926, target: 2.9200000762939453, pred: 1.2422951459884644
Ep. 0/50, Ex. 40, avg rmse: 1.4328305176118525, rolling rmse: 0.9242495387792588, immediate mse: 0.05772458389401436, target: 1.399999976158142, pred: 1.6402593851089478
Ep. 0/50, Ex. 60, avg rmse: 1.4033410436794407, rolling rmse: 1.3428876221179962, immediate mse: 0.3666732609272003, target: 1.190000057220459, pred: 1.7955355644226074
Ep. 0/50, Ex. 80, avg rmse: 1.3324784176585116, rolling rmse: 1.1163474082946778, immediate mse: 0.12314682453870773, target: 2.0, pred: 2.3509228229522705
Ep. 0/50, Ex. 100, avg rmse: 1.2362480591429346, rolling rmse: 0.8465151071548462, immediate mse: 0.7227681875228882, target: 1.4900000095367432, pred: 2.34015774726867

KeyboardInterrupt: 

In [9]:
torch.save(gnn3d.state_dict(), "./models/gem_with_latent/geml_with_attention.pth")

# Dihedral Angles

In [1]:
from model import Autoencoder, GNN3D
from dataset import LogPDataset
import torch

# Loading LogP dataset
dataset = LogPDataset("../data/logp")
print(dataset)

Loading dataset from folder
Initializing Molecular Representation Generator
Dataset Name: LogP
Number of Molecules Loaded: 4197


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
atom_autoencoder = Autoencoder(80, 10).to(device)
bond_autoencoder = Autoencoder(10, 3).to(device)

mse_loss_fn = torch.nn.MSELoss()
atom_autoencoder_optimizer = torch.optim.Adam(atom_autoencoder.parameters())
bond_autoencoder_optimizer = torch.optim.Adam(bond_autoencoder.parameters())

In [4]:
atom_autoencoder.load_state_dict(torch.load("./models/logp_atom_autoencoder.pth"))
bond_autoencoder.load_state_dict(torch.load("./models/logp_bond_autoencoder.pth"))

<All keys matched successfully>

In [None]:
# Loading GNN3D
from model import GNN3DFull

# Making an instance of the model and an optimizer
gnn3d = GNN3DAttention(atomic_vector_size= 10, bond_vector_size=3, number_of_molecular_features = 200, number_of_targets = 1).to(device)
gnn3d_optimizer = torch.optim.Adam(gnn3d.parameters())
gnn3d