In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv,GATConv
from torch_geometric.data import Data
import pandas as pd
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
torch.cuda.set_device(3)

In [3]:
dir_name='proteinwater'
train_csv=pd.read_csv(f'{dir_name}/eSol_train.csv')


# train_data=read()
train_graphs=[]


for index, row in train_csv.iterrows():
    name,label,seq=row.values
    
    vec=torch.from_numpy(np.load(f"{dir_name}/feature/{name}.npy")).float()[1:-1,:]
    edge_matrix=torch.from_numpy(np.load(f"{dir_name}/map/{name}.npy")).float()

    row, col = np.where((edge_matrix >= 0.5) & (np.eye(edge_matrix.shape[0]) == 0))


    edge = [row.tolist(), col.tolist()]

    edge_index=torch.from_numpy(np.array(edge)).long()

    label=torch.tensor(label).float()
    data=Data(x=vec,edge_index=edge_index,y=label,edge_matrix=edge_matrix)
    train_graphs.append(data)

In [4]:

test_csv=pd.read_csv(f'{dir_name}/eSol_test.csv')


# train_data=read()
test_graphs=[]


for index, row in test_csv.iterrows():
    name,label,seq=row.values

    vec=torch.from_numpy(np.load(f"{dir_name}/feature/{name}.npy")).float()[1:-1,:]
    edge_matrix=torch.from_numpy(np.load(f"{dir_name}/map/{name}.npy")).float()

    row, col = np.where((edge_matrix >= 0.5) & (np.eye(edge_matrix.shape[0]) == 0))


    edge = [row.tolist(), col.tolist()]

    edge_index=torch.from_numpy(np.array(edge)).long()

    label=torch.tensor(label).float()

    data=Data(x=vec,edge_index=edge_index,y=label,edge_matrix=edge_matrix)
    test_graphs.append(data)

In [5]:

from sklearn.metrics import mean_squared_error, r2_score, precision_score, recall_score, f1_score, roc_auc_score, accuracy_score
import numpy as np
def test(model):

    model.eval()
    true_labels = []
    predicted_probs = []
    correct=0
    sums=0
    with torch.no_grad():
        for data in test_graphs:
            data = data.cuda()
            out = model(data)
            sums+=1
            if out[0]<0.5 and data.y<0.5:
                correct+=1
            if out[0]>=0.5 and data.y>=0.5:
                correct+=1

            true_labels.append(data.y.cpu().numpy())
            predicted_probs.append(out.cpu().numpy())
            
    
    true_labels = np.array(true_labels).flatten()
    predicted_probs = np.array(predicted_probs).flatten()

    # RMSE
    rmse = np.sqrt(mean_squared_error(true_labels, predicted_probs))

    # R2
    r2 = r2_score(true_labels, predicted_probs)

    # # Precision, Recall, F1
    binary_predictions = (predicted_probs > 0.5).astype(int)
    precision = precision_score((true_labels > 0.5).astype(int), binary_predictions)
    recall = recall_score((true_labels > 0.5).astype(int), binary_predictions)
    f1 = f1_score((true_labels > 0.5).astype(int), binary_predictions)

    # # Accuracy
    accuracy = correct/sums#accuracy_score((true_labels > 0.5).astype(int), binary_predictions)

    # AUC
    auc = roc_auc_score((true_labels > 0.5).astype(int), predicted_probs)
    #print(correct/sums)
    print(f"RMSE:{rmse:.4f},",f"R2:{r2:.4f}|",f"Accuracy:{accuracy:.4f},",f"Precision:{precision:.4f},",f"Recall:{recall:.4f},",f"F1:{f1:.4f},",f"AUC:{auc:.4f}")

In [6]:
import random
random.seed(1234)
random.shuffle(train_graphs)
split_point=int(len(train_graphs)/10*9)

In [7]:
val_graphs=train_graphs[split_point:]
train_graphs=train_graphs[:split_point]

In [8]:
len(val_graphs)

237

In [9]:
class SelfAttention(nn.Module):
    def __init__(self, hid_dim, n_heads, dropout, device):
        super().__init__()

        self.hid_dim = hid_dim
        self.n_heads = n_heads

        assert hid_dim % n_heads == 0

        self.w_q = nn.Linear(hid_dim, hid_dim)
        self.w_k = nn.Linear(hid_dim, hid_dim)
        self.w_v = nn.Linear(hid_dim, hid_dim)

        self.fc = nn.Linear(hid_dim, hid_dim)

        self.do = nn.Dropout(dropout)

        self.scale = torch.sqrt(torch.FloatTensor([hid_dim // n_heads])).to(device)

    def forward(self, query, key, value, mask=None):
        bsz = query.shape[0]

        # query = key = value [batch size, sent len, hid dim]

        Q = self.w_q(query)
        K = self.w_k(key)
        V = self.w_v(value)

        # Q, K, V = [batch size, sent len, hid dim]

        Q = Q.view(bsz, -1, self.n_heads, self.hid_dim // self.n_heads).permute(0, 2, 1, 3)
        K = K.view(bsz, -1, self.n_heads, self.hid_dim // self.n_heads).permute(0, 2, 1, 3)
        V = V.view(bsz, -1, self.n_heads, self.hid_dim // self.n_heads).permute(0, 2, 1, 3)

        # K, V = [batch size, n heads, sent len_K, hid dim // n heads]
        # Q = [batch size, n heads, sent len_q, hid dim // n heads]
        energy = torch.matmul(Q, K.permute(0, 1, 3, 2)) / self.scale

        # energy = [batch size, n heads, sent len_Q, sent len_K]
        if mask is not None:
            energy = energy.masked_fill(mask == 0, -1e10)

        attention = self.do(F.softmax(energy, dim=-1))

        # attention = [batch size, n heads, sent len_Q, sent len_K]

        x = torch.matmul(attention, V)

        # x = [batch size, n heads, sent len_Q, hid dim // n heads]

        x = x.permute(0, 2, 1, 3).contiguous()

        # x = [batch size, sent len_Q, n heads, hid dim // n heads]

        x = x.view(bsz, -1, self.n_heads * (self.hid_dim // self.n_heads))

        # x = [batch size, src sent len_Q, hid dim]

        x = self.fc(x)

        # x = [batch size, sent len_Q, hid dim]

        return x


class ScaledDotProductAttention(nn.Module):
    """ Scaled Dot-Product Attention """
    def __init__(self, scale):
        super().__init__()

        self.scale = scale
        self.softmax = nn.Softmax(dim=2)

    def forward(self, q, k, v,  mask=None):
        u = torch.bmm(q, k.transpose(1, 2)) # 1.Matmul
        u = u / self.scale # 2.Scale

        if mask is not None:
            u = u.masked_fill(mask, -np.inf) # 3.Mask
        
        #print(u.shape,edge_matrix.shape)
        #print(u)
        #u[0]=u[1]=edge_matrix
        attn = self.softmax(u) # 4.Softmax

        output = torch.bmm(attn, v) # 5.Output

        return output
    
class MultiHeadAttention(nn.Module):
    """ Multi-Head Attention """

    def __init__(self, n_head, d_k_, d_v_, d_k, d_v, d_o):
        super().__init__()

        self.n_head = n_head
        self.d_k = d_k
        self.d_v = d_v

        self.fc_q = nn.Linear(d_k_, n_head * d_k)
        self.fc_k = nn.Linear(d_k_, n_head * d_k)
        self.fc_v = nn.Linear(d_v_, n_head * d_v)

        self.attention = ScaledDotProductAttention(scale=np.power(d_k, 0.5))

        self.fc_o = nn.Linear(n_head * d_v, d_o)

    def forward(self, q, k, v,  mask=None):

        n_head, d_q, d_k, d_v = self.n_head, self.d_k, self.d_k, self.d_v

        batch, n_q, d_q_ = q.size()
        batch, n_k, d_k_ = k.size()
        batch, n_v, d_v_ = v.size()

        q = self.fc_q(q) # 1.单头变多头
        k = self.fc_k(k)
        v = self.fc_v(v)
        q = q.view(batch, n_q, n_head, d_q).permute(2, 0, 1, 3).contiguous().view(-1, n_q, d_q)
        k = k.view(batch, n_k, n_head, d_k).permute(2, 0, 1, 3).contiguous().view(-1, n_k, d_k)
        v = v.view(batch, n_v, n_head, d_v).permute(2, 0, 1, 3).contiguous().view(-1, n_v, d_v)

        if mask is not None:
            mask = mask.repeat(n_head, 1, 1)
        output = self.attention(q, k, v,  mask=mask) # 2.当成单头注意力求输出

        output = output.view(n_head, batch, n_q, d_v).permute(1, 2, 0, 3).contiguous().view(batch, n_q, -1) # 3.Concat
        output = self.fc_o(output) # 4.仿射变换得到最终输出

        return output

class TransformerLayer(nn.Module):
    def __init__(self, d):
        super(TransformerLayer, self).__init__()
        self.self_attn = MultiHeadAttention(n_head=4, d_k_=d, d_v_=d, d_k=d, d_v=d, d_o=d)
        
        self.norm = nn.LayerNorm(d)

    def forward(self, q,k,v):
        # 注意：实际情况中可能还会有一些其他的子层和残差连接
        attn_output = self.self_attn(q,k,v)
        v = v + attn_output
        v = self.norm(v)
        return v



In [10]:
num_node_features=1280
Hidden_feature,lr,weight_decay,batch_size=128,0.0004,0.03,16

In [14]:
models=[]
# d=Hidden_feature
import copy
import random

for Hidden_feature in [792,264,216,312,600,456]:
    for lr in [0.0004,0.001,0.0008,0.0006]:
        class GCN(nn.Module):
            def __init__(self):
                super(GCN, self).__init__()
                self.conv1 = GATConv(num_node_features, Hidden_feature)
                self.conv2 = GATConv(Hidden_feature, Hidden_feature)
                self.attention = ScaledDotProductAttention(scale=np.power(Hidden_feature, 0.5))
                self.liner1=nn.Linear(num_node_features, Hidden_feature)
                self.cnn = nn.Conv1d(num_node_features, Hidden_feature, 3, stride=1, padding=1)
                self.mha = TransformerLayer(Hidden_feature)
                self.liner2=nn.Linear(Hidden_feature, 1)
                self.cnn1 = nn.Conv1d(num_node_features, Hidden_feature, 3, stride=1, padding=1)
                self.cnn2 = nn.Conv1d(num_node_features, Hidden_feature, 5, stride=1, padding=2)
                self.cnn3 = nn.Conv1d(num_node_features, Hidden_feature, 7, stride=1, padding=3)
            def forward(self, data):
                x, edge_index,edge_matrix = data.x, data.edge_index,data.edge_matrix
                value=F.relu(self.liner1(x))
                value=value.unsqueeze(0)


                key1=F.relu(self.cnn1(x.transpose(0, 1)).transpose(0, 1))
                key2=F.relu(self.cnn2(x.transpose(0, 1)).transpose(0, 1))
                key3=F.relu(self.cnn3(x.transpose(0, 1)).transpose(0, 1))
                key=key1+key2+key3
                key=key.unsqueeze(0)
                
                x = F.relu(self.conv1(x, edge_index))
                query = F.relu(self.conv2(x, edge_index))
                query=query.unsqueeze(0)
                
                
                x = self.mha(query,key,value)
                x= x.squeeze(0)
                x=torch.mean(x, dim=0)
                x=self.liner2(x)
                x=torch.sigmoid(x)
                return x

        random.seed(1234)
        np.random.seed(1234)
        torch.manual_seed(1234)
        import copy
        train_data=copy.deepcopy(train_graphs)

        print(f"{Hidden_feature},{lr},{weight_decay},{batch_size}")
        model = GCN().cuda()
        #optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)
        criterion = torch.nn.BCELoss()

        import random
        max_acc=0
        best_model=None
        

        for epochs in range(10):
            optimizer.zero_grad()
            losses=0
            index=0
            random.shuffle(train_data)
            model.train()
            for idx in range(len(train_data)):
                datass=train_data[idx]
                datass = datass.cuda()
                out = model(datass)
                if index==batch_size+1:
                    optimizer.zero_grad()
                    index=0
                
                loss = criterion(out.unsqueeze(0), datass.y.unsqueeze(0).unsqueeze(0))
                loss.backward()
                losses+=loss.item()
                if index==batch_size:
                    optimizer.step()
                
                index+=1
            print(len(models),":")
            test(model)
            models.append(copy.deepcopy(model))

792,0.0004,0.03,16
0 :
RMSE:0.2343, R2:0.4653| Accuracy:0.7831, Precision:0.7429, Recall:0.7645, F1:0.7536, AUC:0.8593
1 :
RMSE:0.2247, R2:0.5085| Accuracy:0.7908, Precision:0.8273, Recall:0.6686, F1:0.7395, AUC:0.8836
2 :
RMSE:0.2272, R2:0.4974| Accuracy:0.8023, Precision:0.7405, Recall:0.8459, F1:0.7897, AUC:0.8773
3 :
RMSE:0.2341, R2:0.4664| Accuracy:0.7651, Precision:0.7752, Recall:0.6715, F1:0.7196, AUC:0.8670
4 :


KeyboardInterrupt: 

In [13]:
len(models)

10

In [26]:
test(model.cuda())

RMSE:0.2272, R2:0.4974| Accuracy:0.8023, Precision:0.7405, Recall:0.8459, F1:0.7897, AUC:0.8773


In [19]:
save_model=models[2]

In [23]:
torch.save(save_model.cpu(), 'model_mwater_acc_8023.pth')

In [24]:
model=torch.load('model_mwater_acc_8023.pth')