In [1]:
import sys
from pathlib import Path
from dispatch_jobs import DB, KEY_PREFIX
import pickle
import logging
import numpy as np

logging.info(f'key prefix is {KEY_PREFIX}')
keys = DB.keys(KEY_PREFIX + '*')
sys.path.append('/home/longyuxi/Documents/tnet2017-new/ph')
sys.path.append('/home/longyuxi/Documents/tnet2017-new/ml')

INFO:root:Database connection successful
INFO:root:key prefix is svmph_


In [2]:
with open('observations.npy', 'rb') as f:
    observations = np.load(f)
    print(observations.shape)

with open('binding_affinities.npy', 'rb') as f:
    binding_affinities = np.load(f)
    print(binding_affinities.shape)


(912, 40, 1, 3, 100, 100)
(912,)


In [3]:
# Change data according to GWW algorithm

observations = [o[36:] for o in observations]
observations = np.array(observations)
print(observations.shape)

(912, 4, 1, 3, 100, 100)


In [4]:
# Convert to float
observations = observations.astype(np.float32)
observations = observations.reshape(observations.shape[0], -1, 100)
binding_affinities = binding_affinities.astype(np.float32)

# Convert to torch tensors
import torch

observations = torch.from_numpy(observations)
binding_affinities = torch.from_numpy(binding_affinities)


In [5]:
import torch
from torch.utils.data import Dataset, DataLoader

class BindingAffinityDataset(Dataset):
    def __init__(self, observations, binding_affinities):
        self.observations = observations
        self.binding_affinities = binding_affinities

    def __len__(self):
        return len(self.observations)

    def __getitem__(self, idx):
        return self.observations[idx], self.binding_affinities[idx]

In [6]:
from torch.utils.data import random_split

ds = BindingAffinityDataset(observations, binding_affinities)
train_set, val_set = random_split(ds, [int(0.8 * len(ds)), len(ds) - int(0.8 * len(ds))])
dl = DataLoader(ds, batch_size=1, shuffle=True)

import torch.nn as nn
import torch.nn.functional as F


# A resnet-like model
class BindingAffinityModel(nn.Module):
    def __init__(self):
        super(BindingAffinityModel, self).__init__()
        self.conv1 = nn.Conv1d(1200, 100, 3, padding=1)
        self.conv2 = nn.Conv1d(100, 100, 3, padding=1)
        self.conv3 = nn.Conv1d(100, 100, 3, padding=1)
        self.conv4 = nn.Conv1d(100, 100, 3, padding=1)
        self.conv5 = nn.Conv1d(100, 100, 3, padding=1)
        self.conv6 = nn.Conv1d(100, 100, 3, padding=1)

        self.fc1 = nn.Linear(100, 100)
        self.fc2 = nn.Linear(100, 100)
        self.fc3 = nn.Linear(100, 1)

        self.fc4 = nn.Linear(100, 1)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x)) + x
        x = F.relu(self.conv3(x)) + x
        x = F.relu(self.conv4(x)) + x
        x = F.relu(self.conv5(x)) + x
        x = F.relu(self.conv6(x)) + x


        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)

        x = torch.flatten(x, 1)
        x = self.fc4(x)

        return x


In [7]:
model = BindingAffinityModel()
# Train on GPU if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

from torchsummary import summary
summary(model, (1200, 100), device="cuda")

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv1d-1             [-1, 100, 100]         360,100
            Conv1d-2             [-1, 100, 100]          30,100
            Conv1d-3             [-1, 100, 100]          30,100
            Conv1d-4             [-1, 100, 100]          30,100
            Conv1d-5             [-1, 100, 100]          30,100
            Conv1d-6             [-1, 100, 100]          30,100
            Linear-7             [-1, 100, 100]          10,100
            Linear-8             [-1, 100, 100]          10,100
            Linear-9               [-1, 100, 1]             101
           Linear-10                    [-1, 1]             101
Total params: 531,002
Trainable params: 531,002
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.46
Forward/backward pass size (MB): 0.61
Params size (MB): 2.03
Estimated T

In [8]:
# Training loop
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(30):
    losses = []

    for i, data in enumerate(dl):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = F.mse_loss(outputs, labels)
        loss.backward()
        optimizer.step()
        losses.append(loss.item())

    print(f'Epoch {epoch} loss: {np.mean(losses)}')


  loss = F.mse_loss(outputs, labels)


Epoch 0 loss: 196.13947041307813
Epoch 1 loss: 20.455534437180464
Epoch 2 loss: 656.2799259458644
Epoch 3 loss: 121.34490379406944
Epoch 4 loss: 7.070663582769979
Epoch 5 loss: 3.998700051774559
Epoch 6 loss: 3.899423975409726
Epoch 7 loss: 4.079771538020847
Epoch 8 loss: 4.57666938146258
Epoch 9 loss: 4.391543429048919
Epoch 10 loss: 4.036520822789859
Epoch 11 loss: 3.9146721765691836
Epoch 12 loss: 6.251743684706215
Epoch 13 loss: 4.469218464688164
Epoch 14 loss: 3.772547534580306
Epoch 15 loss: 4.0425982934765265
Epoch 16 loss: 4.070659196549674
Epoch 17 loss: 3.602765840646778
Epoch 18 loss: 6.5110725660807205
Epoch 19 loss: 3.492819641209364
Epoch 20 loss: 3.650001921177561
Epoch 21 loss: 3.5349936572908573
Epoch 22 loss: 3.621591133460801
Epoch 23 loss: 3.782189676145647
Epoch 24 loss: 4.214362877600435
Epoch 25 loss: 3.639530483201579
Epoch 26 loss: 3.837753319798977
Epoch 27 loss: 3.628155620313682
Epoch 28 loss: 3.8110472920561334
Epoch 29 loss: 3.6693801219988322


In [9]:
# Plot test results

import plotly.express as px
import plotly.graph_objects as go

y_pred = []

# Predict from val_set
test_dataloader = DataLoader(val_set, batch_size=1, shuffle=True)
for i, data in enumerate(test_dataloader):
    inputs, labels = data
    inputs, labels = inputs.to(device), labels.to(device)
    outputs = model(inputs)
    y_pred.append(outputs.item())

y_pred = np.array(y_pred)
y_true = np.array([l.item() for l in val_set.dataset.binding_affinities])

fig = go.Figure()
fig.add_trace(go.Scatter(x=y_true, y=y_pred, mode='markers'))
fig.add_trace(go.Scatter(x=y_true, y=y_true, mode='lines'))
fig.update_layout(title='Predicted vs. True Binding Affinities', xaxis_title='True Binding Affinities', yaxis_title='Predicted Binding Affinities')
fig.show()