In [1]:
import torch
from torch.autograd import Variable
import torch.nn.functional as F
from torch.utils import data
from torch import nn 
import copy

from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from time import time
from sklearn.metrics import roc_auc_score, average_precision_score, f1_score, roc_curve, confusion_matrix, precision_score, recall_score, auc
from sklearn.model_selection import KFold
torch.manual_seed(1)    # reproducible torch:2 np:3
np.random.seed(1)

from config import BIN_config_DBPE
from models import BIN_Interaction_Flat
from stream import BIN_Data_Encoder

use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")

# Select which model to use

In [2]:
import os

model_dir = r"/home/ken/projects/MolTrans/results/davis/20220519_124222/train/model_dir"

# Load the configurations

In [3]:
config = BIN_config_DBPE()

In [4]:
config

{'batch_size': 16,
 'input_dim_drug': 23532,
 'input_dim_target': 16693,
 'train_epoch': 50,
 'max_drug_seq': 50,
 'max_protein_seq': 545,
 'emb_size': 384,
 'dropout_rate': 0.1,
 'scale_down_ratio': 0.25,
 'growth_rate': 20,
 'transition_rate': 0.5,
 'num_dense_blocks': 4,
 'kernal_dense_size': 3,
 'intermediate_size': 1536,
 'num_attention_heads': 12,
 'attention_probs_dropout_prob': 0.1,
 'hidden_dropout_prob': 0.1,
 'flat_dim': 78192}

In [5]:
model = BIN_Interaction_Flat(**config)

model = model.cuda()

# Load the model

In [6]:
# original saved file with DataParallel
state_dict = torch.load(os.path.join(model_dir, "max-model.ckpt"))
# create new OrderedDict that does not contain `module.`
from collections import OrderedDict
new_state_dict = OrderedDict()
for k, v in state_dict.items():
    name = k[7:] # remove `module.`
    new_state_dict[name] = v
# load params
model.load_state_dict(new_state_dict)

<All keys matched successfully>

# Protein Embedding and Fasta Embedding

In [7]:
from stream import protein2emb_encoder, drug2emb_encoder

In [8]:
params = {'batch_size': 4,
          'shuffle': True,
          'num_workers': 0,
          'drop_last': True}

In [9]:
dataFolder = './dataset/DAVIS'
df_test = pd.read_csv(dataFolder + '/test.csv')
testing_set = BIN_Data_Encoder(df_test.index.values, df_test.Label.values, df_test)
testing_generator = data.DataLoader(testing_set, **params)

In [13]:
for i, (d, p, d_mask, p_mask, label) in enumerate(testing_generator):
    model.eval()
    score, i_map = model(d.long().cuda(), p.long().cuda(), d_mask.long().cuda(), p_mask.long().cuda())
    score_1, i_map_1 = model(d.long().cuda(), p.long().cuda(), d_mask.long().cuda(), p_mask.long().cuda())
    print(score == score_1)
    break

tensor([[False],
        [False]], device='cuda:0')


d_v_tensor = torch.from_numpy(d_v).long().cuda().unsqueeze(0)
input_mask_d_tensor = torch.from_numpy(input_mask_d).long().cuda().unsqueeze(0)
p_v_tensor = torch.from_numpy(p_v).long().cuda().unsqueeze(0)
input_mask_p_tensor = torch.from_numpy(input_mask_p).long().cuda().unsqueeze(0)
score = model.forward(d_v_tensor, 
                      p_v_tensor, 
                      input_mask_d_tensor, 
                      input_mask_p_tensor)
print(score.shape)
m = torch.nn.Sigmoid()
logits = torch.squeeze(m(score))
logits = logits.detach().cpu().numpy()
print(logits)