In [1]:
import os

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="5"

In [6]:
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm
from tensorflow.keras.utils import to_categorical

import numpy as np
import pickle
from torch.utils.data import DataLoader, Dataset

import sys 
# sys.path.append('..')
# from models.GACNet.modules import GACNet, get_loss

import torch
import random

In [8]:
from tensorflow.keras import backend as K
from collections import Counter
def q3_acc(y_true, y_pred):
    y = tf.argmax(y_true, axis=-1)
    y_ = tf.argmax(y_pred, axis=-1)
    mask = tf.greater(y, 0)
    return K.cast(K.equal(tf.boolean_mask(y, mask), tf.boolean_mask(y_, mask)), K.floatx())

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical

from tensorflow.keras.preprocessing import sequence

atom_dict_ = {'H': 0,
         'C': 1,
         'O': 2,
         'N': 3,
         'S': 4}

In [9]:
with open('../../ms_final/ss/data/atom_ss_X.pkl', 'rb') as handle:
    X = pickle.load(handle)
with open('../../ms_final/ss/data/atom_ss_y.pkl', 'rb') as handle:
    y = pickle.load(handle)

In [10]:
normalize = True
for idx in range(len(X)):
    cur_coord = []
    cur_feat = []
    
    cur_x = X[idx]
    for i in cur_x:
        a = i[0]
        b = i[1]
        c = i[2]
        cur_coord.append([a, b, c])
        
        atom = i[3]
        zeros = [0 for _ in range(5)]
        if atom in atom_dict_:
            zeros[atom_dict_[atom]] = 1
        cur_feat.append(zeros)
        
    xyz = np.asarray(cur_coord, dtype = np.float32)
    norm = np.asarray(cur_feat, dtype = np.int32)
        
    centroid = np.mean(xyz, axis=0)
    xyz -= centroid
    if normalize:
        m = np.max(np.sqrt(np.sum(xyz**2, axis=1)))
        xyz = xyz / m
    
    points = np.concatenate([xyz, norm], 1)
    X[idx] = points

In [11]:
max_len = 4000

In [12]:
input_data = sequence.pad_sequences(X, maxlen=max_len, padding='post', dtype='float32', truncating='post')
tokenizer_decoder = Tokenizer(char_level=True)
tokenizer_decoder.fit_on_texts(y)
target_data = tokenizer_decoder.texts_to_sequences(y)
# target_data = sequence.pad_sequences(target_data, maxlen=max_len, padding='post', truncating='post')

In [13]:
target_data_ = sequence.pad_sequences(target_data, maxlen=max_len, padding='post', truncating='post')

In [14]:
y = []
for i in target_data:
    y.append(list(set(i)))

In [15]:
for i in range(len(y)):
    zeros = 8*[0]
    for j in y[i]:
        zeros[j-1] = 1
    y[i] = zeros

In [16]:
y = np.array(y)

In [17]:
X_train, X_test, y_train, y_test = train_test_split(input_data, y, test_size=.2, random_state=0)
X_train.shape, y_train.shape

((10693, 4000, 8), (10693, 8))

In [18]:
from models.GACNet.modules_new import GACNet, get_loss



In [19]:
class PC_dataset(Dataset):

    def __init__(self, x, y):
        self.x = x
        self.y = y

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        pc = self.x[idx]
        label = self.y[idx]

        return pc, label

train_dataset = PC_dataset(X_train, y_train)
test_dataset = PC_dataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, drop_last=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [20]:
NUM_CLASSES = 8

classifier = GACNet(NUM_CLASSES).cuda()
#criterion = get_loss()
criterion = torch.nn.BCELoss().cuda()
# criterion = get_loss().cuda()

optimizer = torch.optim.Adam(
    classifier.parameters(),
    lr=0.001,
    betas=(0.9, 0.999),
    eps=1e-07,
    weight_decay=0
)

def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv2d') != -1:
        torch.nn.init.xavier_normal_(m.weight.data)
        torch.nn.init.constant_(m.bias.data, 0.0)
    elif classname.find('Linear') != -1:
        torch.nn.init.xavier_normal_(m.weight.data)
        torch.nn.init.constant_(m.bias.data, 0.0)

def inplace_relu(m):
    classname = m.__class__.__name__
    if classname.find('ReLU') != -1:
        m.inplace=True
classifier = classifier.apply(inplace_relu)
classifier = classifier.apply(weights_init)

In [7]:
input = torch.randn(3, requires_grad=True)
target = torch.empty(3).random_(2)
print(input.shape, target.shape)

torch.Size([3]) torch.Size([3])


In [48]:
train_hist_loss = []
train_hist_acc = []
test_hist_acc = []
for epoch in range(20):
    num_batches = len(train_loader)
    total_correct = 0
    total_seen = 0
    loss_sum = 0
    classifier = classifier.train()
    mean_correct = []
    total_loss = 0
    for i, (points, target) in tqdm(enumerate(train_loader), total=len(train_loader), smoothing=0.9):
        optimizer.zero_grad()

        #points = points.data.numpy()
        #points = torch.Tensor(points)
        points = points.transpose(2, 1)
        norm = points[:, 3:, :]
        xyz = points[:, :3, :]
        
        xyz, norm, target = xyz.float().cuda(), norm.float().cuda(), target.float().cuda()
        #print(xyz.shape, norm.shape)
        pred = classifier(xyz, norm)
        pred = torch.sigmoid(pred)
        loss = criterion(pred, target)
        
        loss.backward()
        optimizer.step()
        total_loss += loss.cpu().detach().data.item()

        
        pred[pred >= 0.5] = 1
        pred[pred < 0.5] = 0 
        correct = (pred == target).sum().cpu()
        mean_correct.append(correct.item() / float(points.size()[0]) / 8)
        
        
        
    train_instance_acc = np.mean(mean_correct)
    train_hist_acc.append(train_instance_acc)
    train_hist_loss.append(total_loss)
    print("Epoch: {}".format(epoch+1))
    print('Train Instance Accuracy: %f' % train_instance_acc)
    print('Train Loss: %f' % total_loss)
    
    mean_correct_test = []
    with torch.no_grad():
        for i, (points, target) in enumerate(test_loader):

            points = points.transpose(2, 1)
            norm = points[:, 3:, :]
            xyz = points[:, :3, :]

            xyz, norm, target = xyz.float().cuda(), norm.float().cuda(), target.long().cuda()
            pred = classifier(xyz, norm)
            pred = torch.sigmoid(pred)

            pred[pred >= 0.5] = 1
            pred[pred < 0.5] = 0 
            correct = (pred == target).sum().cpu()
            mean_correct_test.append(correct.item() / float(points.size()[0]) / 8)
        test_instance_acc = np.mean(mean_correct_test)
        test_hist_acc.append(test_instance_acc)
        #print("Epoch: {}".format(epoch+1))
        print('Test Instance Accuracy: %f' % test_instance_acc)

  0%|          | 0/668 [00:00<?, ?it/s]

Epoch: 1
Train Instance Accuracy: 0.876696
Train Loss: 201.940895
Test Instance Accuracy: 0.874349


  0%|          | 0/668 [00:00<?, ?it/s]

Epoch: 2
Train Instance Accuracy: 0.876871
Train Loss: 201.902913
Test Instance Accuracy: 0.873930


  0%|          | 0/668 [00:00<?, ?it/s]

Epoch: 3
Train Instance Accuracy: 0.877140
Train Loss: 201.579371
Test Instance Accuracy: 0.873372


  0%|          | 0/668 [00:00<?, ?it/s]

Epoch: 4
Train Instance Accuracy: 0.876544
Train Loss: 201.276718
Test Instance Accuracy: 0.874163


  0%|          | 0/668 [00:00<?, ?it/s]

Epoch: 5
Train Instance Accuracy: 0.876824
Train Loss: 200.545394
Test Instance Accuracy: 0.873512


  0%|          | 0/668 [00:00<?, ?it/s]

Epoch: 6
Train Instance Accuracy: 0.876649
Train Loss: 200.753572
Test Instance Accuracy: 0.873512


  0%|          | 0/668 [00:00<?, ?it/s]

Epoch: 7
Train Instance Accuracy: 0.877129
Train Loss: 200.562506
Test Instance Accuracy: 0.874535


  0%|          | 0/668 [00:00<?, ?it/s]

Epoch: 8
Train Instance Accuracy: 0.876555
Train Loss: 200.404758
Test Instance Accuracy: 0.874628


  0%|          | 0/668 [00:00<?, ?it/s]

Epoch: 9
Train Instance Accuracy: 0.877058
Train Loss: 200.117628
Test Instance Accuracy: 0.874535


  0%|          | 0/668 [00:00<?, ?it/s]

Epoch: 10
Train Instance Accuracy: 0.876719
Train Loss: 199.842300
Test Instance Accuracy: 0.875000


  0%|          | 0/668 [00:00<?, ?it/s]

Epoch: 11
Train Instance Accuracy: 0.876988
Train Loss: 200.175286
Test Instance Accuracy: 0.874209


  0%|          | 0/668 [00:00<?, ?it/s]

Epoch: 12
Train Instance Accuracy: 0.877164
Train Loss: 199.874115
Test Instance Accuracy: 0.874814


  0%|          | 0/668 [00:00<?, ?it/s]

Epoch: 13
Train Instance Accuracy: 0.876708
Train Loss: 200.370815
Test Instance Accuracy: 0.874581


  0%|          | 0/668 [00:00<?, ?it/s]

Epoch: 14
Train Instance Accuracy: 0.877070
Train Loss: 200.186821
Test Instance Accuracy: 0.874209


  0%|          | 0/668 [00:00<?, ?it/s]

Epoch: 15
Train Instance Accuracy: 0.876602
Train Loss: 199.957280
Test Instance Accuracy: 0.874488


  0%|          | 0/668 [00:00<?, ?it/s]

Epoch: 16
Train Instance Accuracy: 0.877222
Train Loss: 199.644055
Test Instance Accuracy: 0.874070


  0%|          | 0/668 [00:00<?, ?it/s]

Epoch: 17
Train Instance Accuracy: 0.876883
Train Loss: 199.655181
Test Instance Accuracy: 0.874070


  0%|          | 0/668 [00:00<?, ?it/s]

Epoch: 18
Train Instance Accuracy: 0.877292
Train Loss: 199.527383
Test Instance Accuracy: 0.874907


  0%|          | 0/668 [00:00<?, ?it/s]

Epoch: 19
Train Instance Accuracy: 0.876661
Train Loss: 199.501690
Test Instance Accuracy: 0.873233


  0%|          | 0/668 [00:00<?, ?it/s]

Epoch: 20
Train Instance Accuracy: 0.876918
Train Loss: 199.215974
Test Instance Accuracy: 0.874209


In [61]:
res = np.array([0]*8)
pred_, true_ = [], []
with torch.no_grad():
    for i, (points, target) in enumerate(test_loader):

        points = points.transpose(2, 1)
        norm = points[:, 3:, :]
        xyz = points[:, :3, :]

        xyz, norm, target = xyz.float().cuda(), norm.float().cuda(), target.long().cuda()
        pred = classifier(xyz, norm)
        pred = torch.sigmoid(pred)

        pred[pred >= 0.5] = 1
        pred[pred < 0.5] = 0 
        
        res += (pred == target).sum(axis=0).cpu().numpy()
        pred_.append(pred.cpu().numpy())
        true_.append(target.cpu().numpy())

In [63]:
pred_, true_ = np.concatenate(pred_), np.concatenate(true_)

In [66]:
from sklearn.metrics import confusion_matrix

In [80]:
for i in range(8):
    print(i, np.sum(true_[:, i]))
    print(confusion_matrix(true_[:,i], pred_[:,i]).ravel())

0 2511
[   0  163    0 2511]
1 2246
[   0  428    0 2246]
2 2674
[2674]
3 2642
[   0   32    0 2642]
4 2618
[   0   56    0 2618]
5 2143
[   0  531    1 2142]
6 1661
[ 178  835  106 1555]
7 518
[2156    0  518    0]
