# Testing on Unseen Data

In [2]:
import itertools
import math
import os
!pip install path.py
from path import Path
import random
import numpy as np
# import matplotlib.pyplot as plt
# import plotly.graph_objects as go
# import plotly.express as px
# import scipy.spatial.distance
# !pip install scikit-plot
# import scikitplot as skplt

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

# from sklearn import metrics
# from sklearn.metrics import confusion_matrix
# from sklearn.metrics import precision_recall_curve
# from sklearn.metrics import plot_precision_recall_curve
# from sklearn.metrics import average_precision_score
# from sklearn.metrics import classification_report
# from sklearn.metrics import roc_curve
# from sklearn.metrics import roc_auc_score

from google.colab import drive
drive.mount('/content/drive')

random.seed = 1234

Collecting path.py
  Downloading https://files.pythonhosted.org/packages/8f/04/130b7a538c25693c85c4dee7e25d126ebf5511b1eb7320e64906687b159e/path.py-12.5.0-py3-none-any.whl
Collecting path
  Downloading https://files.pythonhosted.org/packages/cb/81/b9090d24e60369fd9413b92fcd87e13a37bf43dad3427d35e09915f788ac/path-15.0.0-py3-none-any.whl
Installing collected packages: path, path.py
Successfully installed path-15.0.0 path.py-12.5.0
Mounted at /content/drive


In [3]:
def read_off(file):
    if 'OFF' != file.readline().strip():
        raise('Not a valid OFF header')
        
    n_verts, n_faces, __ = tuple([int(s) for s in file.readline().strip().split(' ')])
    verts = [[float(s) for s in file.readline().strip().split(' ')] for i_vert in range(n_verts)]
    faces = [[int(s) for s in file.readline().strip().split(' ')][1:] for i_face in range(n_faces)]

    return verts, faces


class PointSampler(object):
    def __init__(self, output_size):
        assert isinstance(output_size, int)
        self.output_size = output_size
    
    def triangle_area(self, pt1, pt2, pt3):
        side_a = np.linalg.norm(pt1 - pt2)
        side_b = np.linalg.norm(pt2 - pt3)
        side_c = np.linalg.norm(pt3 - pt1)
        s = 0.5 * ( side_a + side_b + side_c)
        return max(s *
                   (s - side_a) * 
                   (s - side_b) * 
                   (s - side_c), 0)**0.5

    def sample_point(self, pt1, pt2, pt3):
        # barycentric coordinates on a triangle
        # https://mathworld.wolfram.com/BarycentricCoordinates.html
        s, t = sorted([random.random(), random.random()])
        f = lambda i: s * pt1[i] + (t-s)*pt2[i] + (1-t)*pt3[i]
        return (f(0), f(1), f(2))
        
    
    def __call__(self, mesh):
        verts, faces = mesh
        verts = np.array(verts)
        areas = np.zeros((len(faces)))

        for i in range(len(areas)):
            areas[i] = (self.triangle_area(verts[faces[i][0]],
                                           verts[faces[i][1]],
                                           verts[faces[i][2]]))
            
        sampled_faces = (random.choices(faces, 
                                      weights=areas,
                                      cum_weights=None,
                                      k=self.output_size))
        
        sampled_points = np.zeros((self.output_size, 3))

        for i in range(len(sampled_faces)):
            sampled_points[i] = (self.sample_point(verts[sampled_faces[i][0]],
                                                   verts[sampled_faces[i][1]],
                                                   verts[sampled_faces[i][2]]))
        
        return sampled_points


class Normalize(object):
    def __call__(self, pointcloud):
        assert len(pointcloud.shape)==2
        
        norm_pointcloud = pointcloud - np.mean(pointcloud, axis=0) 
        norm_pointcloud /= np.max(np.linalg.norm(norm_pointcloud, axis=1))

        return  norm_pointcloud


class RandRotation_z(object):
    def __call__(self, pointcloud):
        assert len(pointcloud.shape)==2

        theta = random.random() * 2. * math.pi
        rot_matrix = np.array([[ math.cos(theta), -math.sin(theta),    0],
                               [ math.sin(theta),  math.cos(theta),    0],
                               [0,                             0,      1]])
        
        rot_pointcloud = rot_matrix.dot(pointcloud.T).T
        return  rot_pointcloud
    
class RandomNoise(object):
    def __call__(self, pointcloud):
        assert len(pointcloud.shape)==2

        noise = np.random.normal(0, 0.02, (pointcloud.shape))
    
        noisy_pointcloud = pointcloud + noise
        return  noisy_pointcloud


class ToTensor(object):
    def __call__(self, pointcloud):
        assert len(pointcloud.shape)==2

        return torch.from_numpy(pointcloud)


def default_transforms():
    return transforms.Compose([PointSampler(6550),
                               Normalize(),
                               RandRotation_z(),
                               RandomNoise(),
                               ToTensor()])


class PointCloudData(Dataset):
    def __init__(self, root_dir, valid=False, folder="train", transform=default_transforms()):
        self.root_dir = root_dir
        folders = [dir for dir in sorted(os.listdir(root_dir)) if os.path.isdir(root_dir/dir)]
        self.classes = {folder: i for i, folder in enumerate(folders)}
        self.transforms = transform if not valid else default_transforms()
        self.valid = valid
        self.files = []
        for category in self.classes.keys():
            new_dir = root_dir/Path(category)/folder
            for file in os.listdir(new_dir):
                if file.endswith('.off'):
                    sample = {}
                    sample['pcd_path'] = new_dir/file
                    sample['category'] = category
                    self.files.append(sample)

    def __len__(self):
        return len(self.files)

    def __preproc__(self, file):
        verts, faces = read_off(file)
        if self.transforms:
            pointcloud = self.transforms((verts, faces))
        return pointcloud

    def __getitem__(self, idx):
        pcd_path = self.files[idx]['pcd_path']
        category = self.files[idx]['category']
        with open(pcd_path, 'r') as f:
            pointcloud = self.__preproc__(f)
        return {'pointcloud': pointcloud, 
                'category': self.classes[category]}


import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F

class Tnet(nn.Module):
   def __init__(self, k=3):
      super().__init__()
      self.k=k
      self.conv1 = nn.Conv1d(k,64,1)
      self.conv2 = nn.Conv1d(64,128,1)
      self.conv3 = nn.Conv1d(128,1024,1)

      self.fc1 = nn.Linear(1024,512)
      self.fc2 = nn.Linear(512,256)
      self.fc3 = nn.Linear(256,k*k)

      self.bn1 = nn.BatchNorm1d(64)
      self.bn2 = nn.BatchNorm1d(128)
      self.bn3 = nn.BatchNorm1d(1024)
      self.bn4 = nn.BatchNorm1d(512)
      self.bn5 = nn.BatchNorm1d(256)
       

   def forward(self, input):
      bs = input.size(0)
      # input.shape == (bs,n,3)
      xb = F.relu(self.bn1(self.conv1(input)))
      xb = F.relu(self.bn2(self.conv2(xb)))
      xb = F.relu(self.bn3(self.conv3(xb)))

      # pool = nn.MaxPool1d(xb.size(-1))(xb)
      pool = nn.AvgPool1d(xb.size(-1))(xb)
      flat = nn.Flatten(1)(pool)

      xb = F.relu(self.bn4(self.fc1(flat)))
      xb = F.relu(self.bn5(self.fc2(xb)))
      
      #initialize as identity
      init = torch.eye(self.k, requires_grad=True).repeat(bs,1,1)
      if xb.is_cuda:
        init=init.cuda()
      matrix = self.fc3(xb).view(-1,self.k,self.k) + init
      return matrix


class Transform(nn.Module):
   def __init__(self):
        super().__init__()
        self.input_transform = Tnet(k=3)
        self.feature_transform = Tnet(k=64)

        self.conv1 = nn.Conv1d(3,64,1)
        self.conv2 = nn.Conv1d(64,128,1)
        self.conv3 = nn.Conv1d(128,1024,1)
       
        self.bn1 = nn.BatchNorm1d(64)
        self.bn2 = nn.BatchNorm1d(128)
        self.bn3 = nn.BatchNorm1d(1024)
       
   def forward(self, input):
        matrix3x3 = self.input_transform(input)
        # batch matrix multiplication
        xb = torch.bmm(torch.transpose(input,1,2), matrix3x3).transpose(1,2)
        xb = F.relu(self.bn1(self.conv1(xb)))

        matrix64x64 = self.feature_transform(xb)

        xb = torch.bmm(torch.transpose(xb,1,2), matrix64x64).transpose(1,2)

        xb = F.relu(self.bn2(self.conv2(xb)))
        xb = self.bn3(self.conv3(xb))
        # xb = nn.MaxPool1d(xb.size(-1))(xb)
        xb = nn.AvgPool1d(xb.size(-1))(xb)
        output = nn.Flatten(1)(xb)

        return output, matrix3x3, matrix64x64

class PointNet(nn.Module):
    def __init__(self, classes = 2):
        super().__init__()
        self.transform = Transform()
        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, classes)
        

        self.bn1 = nn.BatchNorm1d(512)
        self.bn2 = nn.BatchNorm1d(256)

        self.dropout = nn.Dropout(p=0.3)
        self.logsigmoid = nn.LogSigmoid()

    def forward(self, input):
        xb, matrix3x3, matrix64x64 = self.transform(input)
        xb = F.relu(self.bn1(self.fc1(xb)))
        xb = F.relu(self.bn2(self.dropout(self.fc2(xb))))
        output = self.fc3(xb)

        # return output, matrix3x3, matrix64x64
        return self.logsigmoid(output), matrix3x3, matrix64x64


def pointnetloss(outputs, labels, m3x3, m64x64, alpha = 0.001):
    criterion = torch.nn.NLLLoss()
    bs=outputs.size(0)
    id3x3 = torch.eye(3, requires_grad=True).repeat(bs,1,1)
    id64x64 = torch.eye(64, requires_grad=True).repeat(bs,1,1)
    if outputs.is_cuda:
        id3x3=id3x3.cuda()
        id64x64=id64x64.cuda()
    diff3x3 = id3x3-torch.bmm(m3x3,m3x3.transpose(1,2))
    diff64x64 = id64x64-torch.bmm(m64x64,m64x64.transpose(1,2))

    return criterion(outputs, labels) + alpha * (torch.norm(diff3x3)+torch.norm(diff64x64)) / float(bs)

In [4]:
class UnseenPointCloudData(Dataset):
    def __init__(self, root_dir, transform=default_transforms()):
        self.files = []
        self.root_dir = root_dir
        self.transforms = transform 
       
        for file in os.listdir(root_dir):
           if file.endswith('.off'):
              sample = {}
              sample['pcd_path'] = root_dir/file
              self.files.append(sample)

    def __len__(self):
        return len(self.files)

    def __preproc__(self, file):
        verts, faces = read_off(file)

        if self.transforms:
            pointcloud = self.transforms((verts, faces))

        return pointcloud

    def __getitem__(self, idx):
        pcd_path = self.files[idx]['pcd_path']
        print("PCD_PATH: ", pcd_path)

        with open(pcd_path, 'r') as f:
            pointcloud = self.__preproc__(f)

        return {'pointcloud': pointcloud}


def evaluate_unseen_data(unseen_loader):
    pointnet.to(device)
    predictions = []
    to_save = []

    with torch.no_grad():
      for data in unseen_loader:
        inputs = data['pointcloud'].to(device).float()
        outputs, __, __ = pointnet(inputs.transpose(1,2))
        _, predicted = torch.max(outputs.data, 1)
        print("Predicted Label:  ", predicted.item())
        print("------")
        predictions.append(predicted.item())

      return predictions



def hard_voting(results):
    hard_votes = []
    zipped_results = zip(results[0], results[1], results[2])
    for val in zipped_results:
        vals, counts = np.unique(val, return_counts=True)
        idx = np.argmax(counts)
        hard_votes.append(vals[idx])

    return hard_votes

def soft_voting(results):
    THRESHOLD = .90
    soft_votes = []
    zipped_probabilities = zip(results[0],
                               results[1],
                               results[2])
    
    for item in zipped_probabilities:
        average = sum(item)/3
        soft_votes.append(average)
    
    soft_votes = [1 if vote > THRESHOLD else 0 for vote in soft_votes]

    return soft_votes

In [None]:
roc_auc = roc_auc_score(xzt_all_labels, soft_votes)
fpr, tpr, thresholds = metrics.roc_curve(yzt_all_labels, soft_votes)
print("FPR {} \n TPR: {} \n THRESHOLDS: {} \n".format(fpr, tpr, thresholds))

soft_proba = [1 - x for x in soft_votes] 

skplt.metrics.plot_roc(yzt_all_labels,
                       np.column_stack([soft_proba, soft_votes]))
plt.savefig("./test.jpg", dpi=600, bbox_inches = "tight")

plt.show()

skplt.metrics.plot_precision_recall(yzt_all_labels,
                                    np.column_stack([soft_proba, soft_votes]))
plt.savefig("./west.jpg", dpi=600, bbox_inches = "tight")

plt.show()

skplt.metrics.plot_cumulative_gain(yzt_all_labels,
                                   np.column_stack([soft_proba, soft_votes]))
plt.savefig("./mest.jpg", dpi=600, bbox_inches = "tight")

skplt.metrics.plot_lift_curve(yzt_all_labels,
                              np.column_stack([soft_proba, soft_votes]))

In [8]:
results = []

POINTS =  8550

XYT = Path("/content/drive/My Drive/KM3Net Data/ensemble/xyt/")
XZT = Path("/content/drive/My Drive/KM3Net Data/ensemble/xzt/")
YZT = Path("/content/drive/My Drive/KM3Net Data/ensemble//yzt/")
EVALUATE = Path("/content/drive/My Drive/KM3Net Data/ensemble/test/")

PATHS_DICT = {XYT: 'xyt',
              XZT: 'xzt',
              YZT: 'yzt'}


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

train_transforms = transforms.Compose([
                    PointSampler(POINTS),
                    Normalize(),
                    RandRotation_z(),
                    RandomNoise(),
                    ToTensor()])

dummy_input = torch.randn(64, 3,1,dtype=torch.float).to(device)
starter, ender = torch.cuda.Event(enable_timing=True), torch.cuda.Event(enable_timing=True)
# repetitions = 120
# total_time = 0
# timings=np.zeros((repetitions,1))

# GPU-WARM-UP
for _ in range(10):
   _ = pointnet(dummy_input)

for PATH in PATHS_DICT:
  starter.record()

  file = PATHS_DICT[PATH] + ".pth"

  pointnet = PointNet()
  pointnet.eval()
  pointnet.load_state_dict(torch.load(PATH/file))

  unseen_ds = UnseenPointCloudData(EVALUATE, transform=train_transforms)
  unseen_loader = DataLoader(dataset=unseen_ds)
  results.append(evaluate_unseen_data(unseen_loader))


ender.record()
torch.cuda.synchronize()
curr_time = starter.elapsed_time(ender) #<-- performance
        # timings[epoch] = curr_time #<-- performance
print("Results From 3 Models: ", results)
print('Train dataset size: ', len(unseen_ds))
print('Sample pointcloud shape: ', unseen_ds[0]['pointcloud'].size())
print("Hard Voting Results: ", hard_voting(results))
print("Soft Voting Results: ", soft_voting(results))

PCD_PATH:  /content/drive/My Drive/KM3Net Data/ensemble/test/group_1063_mesh.off
Predicted Label:   0
------
PCD_PATH:  /content/drive/My Drive/KM3Net Data/ensemble/test/group_1063_mesh.off
Predicted Label:   0
------
PCD_PATH:  /content/drive/My Drive/KM3Net Data/ensemble/test/group_1063_mesh.off
Predicted Label:   0
------
Results From 3 Models:  [[0], [0], [0]]
Train dataset size:  1
PCD_PATH:  /content/drive/My Drive/KM3Net Data/ensemble/test/group_1063_mesh.off
Sample pointcloud shape:  torch.Size([8550, 3])
Hard Voting Results:  [0]
Soft Voting Results:  [0]


In [9]:
curr_time

370.8993225097656