In [1]:
import os

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="6"

In [2]:
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm
from tensorflow.keras.utils import to_categorical

import numpy as np
import pickle
from torch.utils.data import DataLoader, Dataset

import sys 
# sys.path.append('..')
# from models.GACNet.modules import GACNet, get_loss

import torch
import random

In [3]:
from tensorflow.keras import backend as K
from collections import Counter
def q3_acc(y_true, y_pred):
    y = tf.argmax(y_true, axis=-1)
    y_ = tf.argmax(y_pred, axis=-1)
    mask = tf.greater(y, 0)
    return K.cast(K.equal(tf.boolean_mask(y, mask), tf.boolean_mask(y_, mask)), K.floatx())

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical

from tensorflow.keras.preprocessing import sequence

atom_dict_ = {'H': 0,
         'C': 1,
         'O': 2,
         'N': 3,
         'S': 4}

In [4]:
with open('../../ms_final/ss/data/atom_ss_X.pkl', 'rb') as handle:
    X = pickle.load(handle)
with open('../../ms_final/ss/data/atom_ss_y.pkl', 'rb') as handle:
    y = pickle.load(handle)

In [5]:
len(X[0]), len(X[1])

(2559, 2457)

In [6]:
perc = 0.3

In [7]:
sel1, sel2 = np.array(X[0]), np.array(X[1])
cls1, cls2 = [], []
for _ in range(5000):
    index = np.random.choice(sel1.shape[0], int(sel1.shape[0]*perc), replace=False)  
    cls1.append(sel1[index])
    
for _ in range(5000):
    index = np.random.choice(sel2.shape[0], int(sel2.shape[0]*perc), replace=False)  
    cls2.append(sel2[index])

In [8]:
X_ = cls1 + cls2
y = [0]*len(cls1) + [1]*len(cls2)

In [9]:
normalize = True
for idx in range(len(X_)):
    cur_coord = []
    cur_feat = []
    
    cur_x = X_[idx]
    for i in cur_x:
        a = i[0]
        b = i[1]
        c = i[2]
        cur_coord.append([a, b, c])
        
        atom = i[3]
        zeros = [0 for _ in range(5)]
        if atom in atom_dict_:
            zeros[atom_dict_[atom]] = 1
        cur_feat.append(zeros)
        
    xyz = np.asarray(cur_coord, dtype = np.float32)
    norm = np.asarray(cur_feat, dtype = np.int32)
    centroid = np.mean(xyz, axis=0)
    xyz -= centroid
    if normalize:
        m = np.max(np.sqrt(np.sum(xyz**2, axis=1)))
        xyz = xyz / m
    
    points = np.concatenate([xyz, norm], 1)
    X_[idx] = points

In [10]:
l = [len(x) for x in X_]

In [11]:
np.percentile(l, 1)

737.0

In [12]:
max_len = int(np.percentile(l, 1))

In [13]:
input_data = sequence.pad_sequences(X_, maxlen=max_len, padding='post', dtype='float32', truncating='post')
y = np.array(y)

In [14]:
X_train, X_test, y_train, y_test = train_test_split(input_data, y, test_size=.2, random_state=0)
X_train.shape, y_train.shape

((8000, 737, 8), (8000,))

In [15]:
from models.GACNet.modules_new import GACNet, get_loss



In [16]:
class PC_dataset(Dataset):

    def __init__(self, x, y):
        self.x = x
        self.y = y

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        pc = self.x[idx]
        label = self.y[idx]

        return pc, label

train_dataset = PC_dataset(X_train, y_train)
test_dataset = PC_dataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, drop_last=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [19]:
NUM_CLASSES = 2

classifier = GACNet(NUM_CLASSES).cuda()
#criterion = get_loss()
# criterion = torch.nn.BCELoss().cuda()
criterion = get_loss().cuda()

optimizer = torch.optim.Adam(
    classifier.parameters(),
    lr=0.001,
    betas=(0.9, 0.999),
    eps=1e-07,
    weight_decay=0
)

def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv2d') != -1:
        torch.nn.init.xavier_normal_(m.weight.data)
        torch.nn.init.constant_(m.bias.data, 0.0)
    elif classname.find('Linear') != -1:
        torch.nn.init.xavier_normal_(m.weight.data)
        torch.nn.init.constant_(m.bias.data, 0.0)

def inplace_relu(m):
    classname = m.__class__.__name__
    if classname.find('ReLU') != -1:
        m.inplace=True
classifier = classifier.apply(inplace_relu)
classifier = classifier.apply(weights_init)

In [30]:
def rotate_point_cloud_z(batch_data):
    """ Randomly rotate the point clouds to augument the dataset
        rotation is per shape based along up direction
        Input:
          BxNx3 array, original batch of point clouds
        Return:
          BxNx3 array, rotated batch of point clouds
    """
    rotated_data = np.zeros(batch_data.shape, dtype=np.float32)
    for k in range(batch_data.shape[0]):
        rotation_angle = np.random.uniform() * 2 * np.pi
        cosval = np.cos(rotation_angle)
        sinval = np.sin(rotation_angle)
        rotation_matrix = np.array([[cosval, sinval, 0],
                                    [-sinval, cosval, 0],
                                    [0, 0, 1]])
        shape_pc = batch_data[k, ...]
        rotated_data[k, ...] = np.dot(shape_pc.reshape((-1, 3)), rotation_matrix)
    return rotated_data

In [None]:
train_hist_loss = []
train_hist_acc = []
test_hist_acc = []
for epoch in range(150):
    num_batches = len(train_loader)
    total_correct = 0
    total_seen = 0
    loss_sum = 0
    classifier = classifier.train()
    mean_correct = []
    total_loss = 0
    for i, (points, target) in tqdm(enumerate(train_loader), total=len(train_loader), smoothing=0.9):
        optimizer.zero_grad()
    
        #points = points.data.numpy()
        #points = torch.Tensor(points)
        points = points.data.numpy()
        points[:, :, :3] = rotate_point_cloud_z(points[:, :, :3])
        points = torch.Tensor(points)
        
        points = points.transpose(2, 1)
        norm = points[:, 3:, :]
        xyz = points[:, :3, :]
        
        xyz, norm, target = xyz.float().cuda(), norm.float().cuda(), target.long().cuda()
        #print(xyz.shape, norm.shape)
        pred = classifier(xyz, norm)
        loss = criterion(pred, target.long())


        pred_choice = pred.data.max(1)[1]
        correct = pred_choice.eq(target.long().data).cpu().sum()
        mean_correct.append(correct.item() / float(points.size()[0]))
        loss.backward()
        optimizer.step()
        total_loss += loss.cpu().detach().data.item()
    train_instance_acc = np.mean(mean_correct)
    train_hist_acc.append(train_instance_acc)
    train_hist_loss.append(total_loss)
    print("Epoch: {}".format(epoch+1))
    print('Train Instance Accuracy: %f' % train_instance_acc)
    print('Train Loss: %f' % total_loss)
    
    mean_correct_test = []
    with torch.no_grad():
        for i, (points, target) in enumerate(test_loader):

            points = points.transpose(2, 1)
            norm = points[:, 3:, :]
            xyz = points[:, :3, :]

            xyz, norm, target = xyz.float().cuda(), norm.float().cuda(), target.long().cuda()
            pred = classifier(xyz, norm)


            pred_choice = pred.data.max(1)[1]
            correct = pred_choice.eq(target.long().data).cpu().sum()
            mean_correct_test.append(correct.item() / float(points.size()[0]))
        test_instance_acc = np.mean(mean_correct_test)
        test_hist_acc.append(test_instance_acc)
        #print("Epoch: {}".format(epoch+1))
        print('Test Instance Accuracy: %f' % test_instance_acc)

  0%|          | 0/500 [00:00<?, ?it/s]

Epoch: 1
Train Instance Accuracy: 0.798000
Train Loss: 221.344244
Test Instance Accuracy: 0.834500


  0%|          | 0/500 [00:00<?, ?it/s]

Epoch: 2
Train Instance Accuracy: 0.835375
Train Loss: 192.865885
Test Instance Accuracy: 0.828000


  0%|          | 0/500 [00:00<?, ?it/s]

Epoch: 3
Train Instance Accuracy: 0.850750
Train Loss: 176.676896
Test Instance Accuracy: 0.879500


  0%|          | 0/500 [00:00<?, ?it/s]

Epoch: 4
Train Instance Accuracy: 0.868000
Train Loss: 167.155889
Test Instance Accuracy: 0.861000


  0%|          | 0/500 [00:00<?, ?it/s]

Epoch: 5
Train Instance Accuracy: 0.877875
Train Loss: 151.793852
Test Instance Accuracy: 0.885000


  0%|          | 0/500 [00:00<?, ?it/s]

Epoch: 6
Train Instance Accuracy: 0.882875
Train Loss: 144.874851
Test Instance Accuracy: 0.889000


  0%|          | 0/500 [00:00<?, ?it/s]

Epoch: 7
Train Instance Accuracy: 0.889875
Train Loss: 139.030209
Test Instance Accuracy: 0.888000


  0%|          | 0/500 [00:00<?, ?it/s]

Epoch: 8
Train Instance Accuracy: 0.886500
Train Loss: 140.109290
Test Instance Accuracy: 0.899500


  0%|          | 0/500 [00:00<?, ?it/s]

Epoch: 9
Train Instance Accuracy: 0.889625
Train Loss: 133.963263
Test Instance Accuracy: 0.897500


  0%|          | 0/500 [00:00<?, ?it/s]

Epoch: 10
Train Instance Accuracy: 0.892750
Train Loss: 132.996935
Test Instance Accuracy: 0.894000


  0%|          | 0/500 [00:00<?, ?it/s]

Epoch: 11
Train Instance Accuracy: 0.895625
Train Loss: 132.527693
Test Instance Accuracy: 0.894500


  0%|          | 0/500 [00:00<?, ?it/s]

Epoch: 12
Train Instance Accuracy: 0.900500
Train Loss: 124.494545
Test Instance Accuracy: 0.907000


  0%|          | 0/500 [00:00<?, ?it/s]

Epoch: 13
Train Instance Accuracy: 0.905000
Train Loss: 124.287761
Test Instance Accuracy: 0.905000


  0%|          | 0/500 [00:00<?, ?it/s]

Epoch: 14
Train Instance Accuracy: 0.904750
Train Loss: 121.769026
Test Instance Accuracy: 0.906000


  0%|          | 0/500 [00:00<?, ?it/s]

Epoch: 15
Train Instance Accuracy: 0.906250
Train Loss: 121.609382
Test Instance Accuracy: 0.895500


  0%|          | 0/500 [00:00<?, ?it/s]

Epoch: 16
Train Instance Accuracy: 0.905500
Train Loss: 116.972053
Test Instance Accuracy: 0.913000


  0%|          | 0/500 [00:00<?, ?it/s]

Epoch: 17
Train Instance Accuracy: 0.909750
Train Loss: 117.136046
Test Instance Accuracy: 0.914000


  0%|          | 0/500 [00:00<?, ?it/s]

Epoch: 18
Train Instance Accuracy: 0.911250
Train Loss: 113.605628
Test Instance Accuracy: 0.919000


  0%|          | 0/500 [00:00<?, ?it/s]

Epoch: 19
Train Instance Accuracy: 0.915375
Train Loss: 112.402882
Test Instance Accuracy: 0.922500


  0%|          | 0/500 [00:00<?, ?it/s]

Epoch: 20
Train Instance Accuracy: 0.922000
Train Loss: 105.215347
Test Instance Accuracy: 0.920500


  0%|          | 0/500 [00:00<?, ?it/s]

In [107]:
pred.shape

torch.Size([16, 2])

In [25]:
points = points.data.numpy()
points[:, :, :3] = rotate_point_cloud_z(points[:, :, :3])
points = torch.Tensor(points)

points = points.transpose(2, 1)
norm = points[:, 3:, :]
xyz = points[:, :3, :]

In [26]:
xyz.shape

torch.Size([16, 3, 8])

In [28]:
loss

tensor(0.3220, device='cuda:0', grad_fn=<NllLossBackward>)

In [29]:
total_loss

143.74223038554192