-
Notifications
You must be signed in to change notification settings - Fork 11
/
mutation_analysis.py
77 lines (72 loc) · 2.81 KB
/
mutation_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# -*- coding: utf-8 -*-
"""
@Time:Created on 2020/4/27 13:56
@author: LiFan Chen
@Filename: mutation_analysis.py
@Software: PyCharm
"""
import torch
import random
import os
from model import *
import numpy as np
from featurizer import featurizer
if __name__ == "__main__":
import warnings
warnings.filterwarnings("ignore")
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
SEED = 1
random.seed(SEED)
torch.manual_seed(SEED)
"""CPU or GPU"""
if torch.cuda.is_available():
device = torch.device('cuda:1')
print('The code uses GPU...')
else:
device = torch.device('cpu')
print('The code uses CPU!!!')
# device = torch.device('cpu')
""" create model ,trainer and tester """
n_layers = 3
dropout = 0.1
batch = 64
lr = 1e-5
weight_decay = 1e-3
decay_interval = 10
lr_decay = 1.0
iteration = 100
pretrain = torch.load('Bert.pkl')
pretrain.to(device)
for param in pretrain.parameters():
param.requires_grad = False
pretrain.eval()
encoder = Encoder(pretrain,n_layers,device)
decoder = Decoder(n_layers, dropout, device)
model = Predictor(encoder, decoder, device)
model.load_state_dict(torch.load("lr=1e-5,weight_decay=1e-3,dropout=0.1,batch=64.pt",map_location=lambda storage, loc: storage))
model.to(device)
tester = Tester(model,device)
"""Prepare input data. Including SMILES, Sequence and Interaction"""
"""Start training."""
print('Testing...')
sequence = "MSCAGRAGPARLAALALLTCSLWPARADNASQEYYTALINVTVQEPGRGAPLTFRIDRGRYGLDSPKAEVRGQVLAPLPLHGVADHLGCDPQTRFFVPPNIKQWIALLQRGNCTFKEKISRAAFHNAVAVVIYNNKSKEEPVTMTHPGTGDIIAVMITELRGKDILSYLEKNISVQMTIAVGTRMPPKNFSRGSLVFVSISFIVLMIISSAWLIFYFIQKIRYTNARDRNQRRLGDAAKKAISKLTTRTVKKGDKETDPDFDHCAVCIESYKQNDVVRILPCKHVFHKSCVDPWLSEHCTCPMCKLNILKALGIVPNLPCTDNVAFDMERLTRTQAVNRRSALGDLAGDNSLGLEPLRTSGISPLPQDGELTPRTGEINIAVTKEWFIIASFGLLSALTLCYMIIRATASLNANEVEWF"
smiles = "CS(=O)(C1=NN=C(S1)CN2C3CCC2C=C(C4=CC=CC=C4)C3)=O"
compounds, adjacencies, proteins = featurizer(smiles, sequence)
test_set = list(zip(compounds, adjacencies, proteins))
original_score = tester.test(test_set)
mutation = 'ARNDCQEGHILKMFPSTWYV'
n = len(sequence)
delta_score = np.zeros((n,20))
for i in range(n):
k = 0
for m in mutation:
sequence_2 = list(sequence)
sequence_2[i] = m
sequence_2 = ''.join(sequence_2)
compounds,adjacencies,proteins = featurizer(smiles,sequence_2)
test_set = list(zip(compounds,adjacencies,proteins))
score = tester.test(test_set)
delta_score[i,k] = np.abs(original_score - score)
print(delta_score[i,k])
k += 1
np.save('mutation_RNF130.npy',delta_score)