<a href="https://colab.research.google.com/github/tabba98/neural-network/blob/main/nndl_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import matplotlib.pyplot as plt
import os
import torch
import torch.nn as nn
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor
from torchvision.utils import make_grid
from torch.utils.data import DataLoader
import time as time
import numpy as np
from IPython import display

In [None]:
!pip install path.py;
from path import Path

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting path.py
  Downloading path.py-12.5.0-py3-none-any.whl (2.3 kB)
Collecting path
  Downloading path-16.6.0-py3-none-any.whl (26 kB)
Installing collected packages: path, path.py
Successfully installed path-16.6.0 path.py-12.5.0


In [None]:
!wget http://3dvision.princeton.edu/projects/2014/3DShapeNets/ModelNet10.zip

--2023-01-11 07:38:08--  http://3dvision.princeton.edu/projects/2014/3DShapeNets/ModelNet10.zip
Resolving 3dvision.princeton.edu (3dvision.princeton.edu)... 128.112.136.74
Connecting to 3dvision.princeton.edu (3dvision.princeton.edu)|128.112.136.74|:80... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://3dvision.princeton.edu/projects/2014/3DShapeNets/ModelNet10.zip [following]
--2023-01-11 07:38:09--  https://3dvision.princeton.edu/projects/2014/3DShapeNets/ModelNet10.zip
Connecting to 3dvision.princeton.edu (3dvision.princeton.edu)|128.112.136.74|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 473402300 (451M) [application/zip]
Saving to: ‘ModelNet10.zip’


2023-01-11 07:39:01 (8.72 MB/s) - ‘ModelNet10.zip’ saved [473402300/473402300]



In [None]:
!unzip -q ModelNet10.zip;

In [None]:
path = Path("ModelNet10")

In [None]:
folders = [dir for dir in sorted(os.listdir(path)) if os.path.isdir(path/dir)]
classes = {folder: i for i, folder in enumerate(folders)};
classes

{'bathtub': 0,
 'bed': 1,
 'chair': 2,
 'desk': 3,
 'dresser': 4,
 'monitor': 5,
 'night_stand': 6,
 'sofa': 7,
 'table': 8,
 'toilet': 9}

In [None]:
def read_off(file):
    if 'OFF' != file.readline().strip():
        raise('Not a valid OFF header')
    n_verts, n_faces, __ = tuple([int(s) for s in file.readline().strip().split(' ')])
    verts = [[float(s) for s in file.readline().strip().split(' ')] for i_vert in range(n_verts)]
    faces = [[int(s) for s in file.readline().strip().split(' ')][1:] for i_face in range(n_faces)]
    return verts, faces

In [None]:
with open(path/"bed/train/bed_0001.off", 'r') as f:
  verts, faces = read_off(f)

In [None]:
i,j,k = np.array(faces).T
x,y,z = np.array(verts).T

In [None]:
def visualize_rotate(data):
    x_eye, y_eye, z_eye = 1.25, 1.25, 0.8
    frames=[]

    def rotate_z(x, y, z, theta):
        w = x+1j*y
        return np.real(np.exp(1j*theta)*w), np.imag(np.exp(1j*theta)*w), z

    for t in np.arange(0, 10.26, 0.1):
        xe, ye, ze = rotate_z(x_eye, y_eye, z_eye, -t)
        frames.append(dict(layout=dict(scene=dict(camera=dict(eye=dict(x=xe, y=ye, z=ze))))))
    fig = go.Figure(data=data,
                    layout=go.Layout(
                        updatemenus=[dict(type='buttons',
                                    showactive=False,
                                    y=1,
                                    x=0.8,
                                    xanchor='left',
                                    yanchor='bottom',
                                    pad=dict(t=45, r=10),
                                    buttons=[dict(label='Play',
                                                    method='animate',
                                                    args=[None, dict(frame=dict(duration=50, redraw=True),
                                                                    transition=dict(duration=0),
                                                                    fromcurrent=True,
                                                                    mode='immediate'
                                                                    )]
                                                    )
                                            ]
                                    )
                                ]
                    ),
                    frames=frames
            )

    return fig
     

In [None]:
import plotly.graph_objects as go
import plotly.express as px
visualize_rotate([go.Mesh3d(x=x, y=y, z=z, color='lightpink', opacity=0.50, i=i,j=j,k=k)]).show()

In [None]:
visualize_rotate([go.Scatter3d(x=x, y=y, z=z,
                                   mode='markers')]).show()

In [None]:
def pcshow(xs,ys,zs):
    data=[go.Scatter3d(x=xs, y=ys, z=zs,
                                   mode='markers')]
    fig = visualize_rotate(data)
    fig.update_traces(marker=dict(size=2,
                      line=dict(width=2,
                      color='DarkSlateGrey')),
                      selector=dict(mode='markers'))
    fig.show()

In [None]:
pcshow(x,y,z)

In [None]:
class PointSampler(object):
    def __init__(self, output_size):
        assert isinstance(output_size, int)
        self.output_size = output_size
    
    def triangle_area(self, pt1, pt2, pt3):
        side_a = np.linalg.norm(pt1 - pt2)
        side_b = np.linalg.norm(pt2 - pt3)
        side_c = np.linalg.norm(pt3 - pt1)
        s = 0.5 * ( side_a + side_b + side_c)
        return max(s * (s - side_a) * (s - side_b) * (s - side_c), 0)**0.5

    def sample_point(self, pt1, pt2, pt3):
        # barycentric coordinates on a triangle
        # https://mathworld.wolfram.com/BarycentricCoordinates.html
        s, t = sorted([random.random(), random.random()])
        f = lambda i: s * pt1[i] + (t-s)*pt2[i] + (1-t)*pt3[i]
        return (f(0), f(1), f(2))
        
    
    def __call__(self, mesh):
        verts, faces = mesh
        verts = np.array(verts)
        areas = np.zeros((len(faces)))

        for i in range(len(areas)):
            areas[i] = (self.triangle_area(verts[faces[i][0]],
                                           verts[faces[i][1]],
                                           verts[faces[i][2]]))
            
        sampled_faces = (random.choices(faces, 
                                      weights=areas,
                                      cum_weights=None,
                                      k=self.output_size))
        
        sampled_points = np.zeros((self.output_size, 3))

        for i in range(len(sampled_faces)):
            sampled_points[i] = (self.sample_point(verts[sampled_faces[i][0]],
                                                   verts[sampled_faces[i][1]],
                                                   verts[sampled_faces[i][2]]))
        
        return sampled_points

In [None]:
import random
pointcloud = PointSampler(3000)((verts, faces))

In [None]:
pcshow(*pointcloud.T)

In [None]:
class Normalize(object):
    def __call__(self, pointcloud):
        assert len(pointcloud.shape)==2
        
        norm_pointcloud = pointcloud - np.mean(pointcloud, axis=0) 
        norm_pointcloud /= np.max(np.linalg.norm(norm_pointcloud, axis=1))

        return  norm_pointcloud

In [None]:
norm_pointcloud = Normalize()(pointcloud)

In [None]:
pcshow(*norm_pointcloud.T)

In [None]:
class RandRotation_z(object):
    def __call__(self, pointcloud):
        assert len(pointcloud.shape)==2

        theta = random.random() * 2. * math.pi
        rot_matrix = np.array([[ math.cos(theta), -math.sin(theta),    0],
                               [ math.sin(theta),  math.cos(theta),    0],
                               [0,                             0,      1]])
        
        rot_pointcloud = rot_matrix.dot(pointcloud.T).T
        return  rot_pointcloud
    
class RandomNoise(object):
    def __call__(self, pointcloud):
        assert len(pointcloud.shape)==2

        noise = np.random.normal(0, 0.02, (pointcloud.shape))
    
        noisy_pointcloud = pointcloud + noise
        return  noisy_pointcloud
     

In [None]:
import math
rot_pointcloud = RandRotation_z()(norm_pointcloud)
noisy_rot_pointcloud = RandomNoise()(rot_pointcloud)
     

In [None]:
pcshow(*noisy_rot_pointcloud.T)

In [None]:
class ToTensor(object):
    def __call__(self, pointcloud):
        assert len(pointcloud.shape)==2

        return torch.from_numpy(pointcloud)

In [None]:
ToTensor()(noisy_rot_pointcloud)

tensor([[ 0.1324,  0.8097, -0.1277],
        [-0.3608, -0.3714, -0.1667],
        [-0.0138, -0.1218,  0.1437],
        ...,
        [ 0.1356, -0.4361, -0.0431],
        [-0.0766, -0.5308,  0.3230],
        [ 0.0781, -0.1721, -0.1266]], dtype=torch.float64)

In [None]:
def default_transforms():
    return transforms.Compose([
                                PointSampler(1024),
                                Normalize(),
                                ToTensor()
                              ])

In [None]:
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
class PointCloudData(Dataset):
    def __init__(self, root_dir, valid=False, folder="train", transform=default_transforms()):
        self.root_dir = root_dir
        folders = [dir for dir in sorted(os.listdir(root_dir)) if os.path.isdir(root_dir/dir)]
        self.classes = {folder: i for i, folder in enumerate(folders)}
        self.transforms = transform if not valid else default_transforms()
        self.valid = valid
        self.files = []
        for category in self.classes.keys():
            new_dir = root_dir/Path(category)/folder
            for file in os.listdir(new_dir):
                if file.endswith('.off'):
                    sample = {}
                    sample['pcd_path'] = new_dir/file
                    sample['category'] = category
                    self.files.append(sample)

    def __len__(self):
        return len(self.files)

    def __preproc__(self, file):
        verts, faces = read_off(file)
        if self.transforms:
            pointcloud = self.transforms((verts, faces))
        return pointcloud

    def __getitem__(self, idx):
        pcd_path = self.files[idx]['pcd_path']
        category = self.files[idx]['category']
        with open(pcd_path, 'r') as f:
            pointcloud = self.__preproc__(f)
        return {'pointcloud': pointcloud, 
                'category': self.classes[category]}

In [None]:
train_transforms = transforms.Compose([
                    PointSampler(1024),
                    Normalize(),
                    RandRotation_z(),
                    RandomNoise(),
                    ToTensor()
                    ])
     

In [None]:
train_ds = PointCloudData(path, transform=train_transforms)
valid_ds = PointCloudData(path, valid=True, folder='test', transform=train_transforms)

In [None]:
inv_classes = {i: cat for cat, i in train_ds.classes.items()};
inv_classes

{0: 'bathtub',
 1: 'bed',
 2: 'chair',
 3: 'desk',
 4: 'dresser',
 5: 'monitor',
 6: 'night_stand',
 7: 'sofa',
 8: 'table',
 9: 'toilet'}

In [None]:
train_loader = DataLoader(dataset=train_ds, batch_size=32, shuffle=True)
valid_loader = DataLoader(dataset=valid_ds, batch_size=64)

In [None]:
import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F

class Tnet(nn.Module):
   def __init__(self, k=3):
      super().__init__()
      self.k=k
      self.conv1 = nn.Conv1d(k,64,1)
      self.conv2 = nn.Conv1d(64,128,1)
      self.conv3 = nn.Conv1d(128,1024,1)
      self.fc1 = nn.Linear(1024,512)
      self.fc2 = nn.Linear(512,256)
      self.fc3 = nn.Linear(256,k*k)

      self.bn1 = nn.BatchNorm1d(64)
      self.bn2 = nn.BatchNorm1d(128)
      self.bn3 = nn.BatchNorm1d(1024)
      self.bn4 = nn.BatchNorm1d(512)
      self.bn5 = nn.BatchNorm1d(256)
       

   def forward(self, input):
      # input.shape == (bs,n,3)
      bs = input.size(0)
      xb = F.relu(self.bn1(self.conv1(input)))
      xb = F.relu(self.bn2(self.conv2(xb)))
      xb = F.relu(self.bn3(self.conv3(xb)))
      pool = nn.MaxPool1d(xb.size(-1))(xb)
      flat = nn.Flatten(1)(pool)
      xb = F.relu(self.bn4(self.fc1(flat)))
      xb = F.relu(self.bn5(self.fc2(xb)))
      
      #initialize as identity
      init = torch.eye(self.k, requires_grad=True).repeat(bs,1,1)
      if xb.is_cuda:
        init=init.cuda()
      matrix = self.fc3(xb).view(-1,self.k,self.k) + init
      return matrix


class Transform(nn.Module):
   def __init__(self):
        super().__init__()
        self.input_transform = Tnet(k=3)
        self.feature_transform = Tnet(k=64)
        self.conv1 = nn.Conv1d(3,64,1)

        self.conv2 = nn.Conv1d(64,128,1)
        self.conv3 = nn.Conv1d(128,1024,1)
       

        self.bn1 = nn.BatchNorm1d(64)
        self.bn2 = nn.BatchNorm1d(128)
        self.bn3 = nn.BatchNorm1d(1024)
       
   def forward(self, input):
        matrix3x3 = self.input_transform(input)
        # batch matrix multiplication
        xb = torch.bmm(torch.transpose(input,1,2), matrix3x3).transpose(1,2)

        xb = F.relu(self.bn1(self.conv1(xb)))

        matrix64x64 = self.feature_transform(xb)
        xb = torch.bmm(torch.transpose(xb,1,2), matrix64x64).transpose(1,2)

        xb = F.relu(self.bn2(self.conv2(xb)))
        xb = self.bn3(self.conv3(xb))
        xb = nn.MaxPool1d(xb.size(-1))(xb)
        output = nn.Flatten(1)(xb)
        return output, matrix3x3, matrix64x64

class PointNet(nn.Module):
    def __init__(self, classes = 10):
        super().__init__()
        self.transform = Transform()
        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, classes)
        

        self.bn1 = nn.BatchNorm1d(512)
        self.bn2 = nn.BatchNorm1d(256)
        self.dropout = nn.Dropout(p=0.3)
        self.logsoftmax = nn.LogSoftmax(dim=1)

    def forward(self, input):
        xb, matrix3x3, matrix64x64 = self.transform(input)
        xb = F.relu(self.bn1(self.fc1(xb)))
        xb = F.relu(self.bn2(self.dropout(self.fc2(xb))))
        output = self.fc3(xb)
        return self.logsoftmax(output), matrix3x3, matrix64x64
     

In [None]:
def pointnetloss(outputs, labels, m3x3, m64x64, alpha = 0.0001):
    criterion = torch.nn.NLLLoss()
    bs=outputs.size(0)
    id3x3 = torch.eye(3, requires_grad=True).repeat(bs,1,1)
    id64x64 = torch.eye(64, requires_grad=True).repeat(bs,1,1)
    if outputs.is_cuda:
        id3x3=id3x3.cuda()
        id64x64=id64x64.cuda()
    diff3x3 = id3x3-torch.bmm(m3x3,m3x3.transpose(1,2))
    diff64x64 = id64x64-torch.bmm(m64x64,m64x64.transpose(1,2))
    return criterion(outputs, labels) + alpha * (torch.norm(diff3x3)+torch.norm(diff64x64)) / float(bs)

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)


cuda:0


In [None]:
pointnet = PointNet()
pointnet.to(device);

In [None]:
optimizer = torch.optim.Adam(pointnet.parameters(), lr=0.001)

In [None]:
def train(model, train_loader, val_loader=None,  epochs=5, save=True):
    for epoch in range(epochs): 
        pointnet.train()
        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data['pointcloud'].to(device).float(), data['category'].to(device)
            optimizer.zero_grad()
            outputs, m3x3, m64x64 = pointnet(inputs.transpose(1,2))

            loss = pointnetloss(outputs, labels, m3x3, m64x64)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 10 == 9:    # print every 10 mini-batches
                    print('[Epoch: %d, Batch: %4d / %4d], loss: %.3f' %
                        (epoch + 1, i + 1, len(train_loader), running_loss / 10))
                    running_loss = 0.0

        pointnet.eval()
        correct = total = 0

        # validation
        if val_loader:
            with torch.no_grad():
                for data in val_loader:
                    inputs, labels = data['pointcloud'].to(device).float(), data['category'].to(device)
                    outputs, __, __ = pointnet(inputs.transpose(1,2))
                    _, predicted = torch.max(outputs.data, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()
            val_acc = 100. * correct / total
            print('Valid accuracy: %d %%' % val_acc)

        # save the model
        if save:
            torch.save(pointnet.state_dict(), "save_" + str(epoch) + ".pth")

In [None]:
train(pointnet, train_loader, valid_loader, save=True)

[Epoch: 1, Batch:   10 /  125], loss: 2.190
[Epoch: 1, Batch:   20 /  125], loss: 1.750
[Epoch: 1, Batch:   30 /  125], loss: 1.557
[Epoch: 1, Batch:   40 /  125], loss: 1.494
[Epoch: 1, Batch:   50 /  125], loss: 1.222
[Epoch: 1, Batch:   60 /  125], loss: 1.260
[Epoch: 1, Batch:   70 /  125], loss: 1.109
[Epoch: 1, Batch:   80 /  125], loss: 1.030
[Epoch: 1, Batch:   90 /  125], loss: 1.138
[Epoch: 1, Batch:  100 /  125], loss: 1.010
[Epoch: 1, Batch:  110 /  125], loss: 1.165
[Epoch: 1, Batch:  120 /  125], loss: 0.958
Valid accuracy: 60 %
[Epoch: 2, Batch:   10 /  125], loss: 0.884
[Epoch: 2, Batch:   20 /  125], loss: 0.979
[Epoch: 2, Batch:   30 /  125], loss: 0.947
[Epoch: 2, Batch:   40 /  125], loss: 0.917
[Epoch: 2, Batch:   50 /  125], loss: 0.787
[Epoch: 2, Batch:   60 /  125], loss: 0.789
[Epoch: 2, Batch:   70 /  125], loss: 0.786
[Epoch: 2, Batch:   80 /  125], loss: 0.687
[Epoch: 2, Batch:   90 /  125], loss: 0.767
[Epoch: 2, Batch:  100 /  125], loss: 0.813
[Epoch: 2, 

In [None]:
from sklearn.metrics import confusion_matrix
     

In [None]:
pointnet = PointNet()
pointnet.load_state_dict(torch.load('save.pth'))
pointnet.eval();

FileNotFoundError: ignored

In [None]:
all_preds = []
all_labels = []
with torch.no_grad():
    for i, data in enumerate(valid_loader):
        print('Batch [%4d / %4d]' % (i+1, len(valid_loader)))
                   
        inputs, labels = data['pointcloud'].float(), data['category']
        outputs, __, __ = pointnet(inputs.transpose(1,2))
        _, preds = torch.max(outputs.data, 1)
        all_preds += list(preds.numpy())
        all_labels += list(labels.numpy())

Batch [   1 /   15]
Batch [   2 /   15]
Batch [   3 /   15]
Batch [   4 /   15]
Batch [   5 /   15]
Batch [   6 /   15]
Batch [   7 /   15]
Batch [   8 /   15]
Batch [   9 /   15]
Batch [  10 /   15]
Batch [  11 /   15]
Batch [  12 /   15]
Batch [  13 /   15]
Batch [  14 /   15]
Batch [  15 /   15]


In [None]:
cm = confusion_matrix(all_labels, all_preds);
cm

array([[ 1,  4,  3,  3,  1, 18,  0,  3,  0, 17],
       [ 1, 11,  4,  5, 15, 35,  2,  5,  1, 21],
       [ 4,  3,  9,  7,  8, 32,  1,  6,  1, 29],
       [ 2, 12,  7,  4,  7, 29,  0,  4,  2, 19],
       [ 2, 12,  8,  5,  7, 22,  2,  4,  4, 20],
       [ 2,  5, 11,  6,  6, 40,  0,  8,  0, 22],
       [ 1,  3,  6,  3,  6, 33,  0,  6,  3, 25],
       [ 5,  3,  6,  4, 10, 39,  2,  3,  0, 28],
       [ 4, 10, 11,  0, 10, 25,  1,  5,  1, 33],
       [ 3,  5,  6, 11,  9, 30,  1,  8,  1, 26]])

In [2]:
pip install torchmetrics

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torchmetrics
  Downloading torchmetrics-0.11.0-py3-none-any.whl (512 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m512.4/512.4 KB[0m [31m20.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torchmetrics
Successfully installed torchmetrics-0.11.0


In [4]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchmetrics.classification import Accuracy
from torchmetrics import ConfusionMatrix



#for plotting
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from sklearn.metrics import confusion_matrix

import scipy
from scipy.ndimage import rotate

In [5]:
%%capture
!wget https://www.dropbox.com/s/ja56cvf3x4mkf1t/modelnet10_voxelized_32.npz                   

%%capture
!wget https://www.dropbox.com/s/sfs9kd33qnvo7n0/modelnet10_voxelized_64.npz

In [6]:
class VoxelDataset(Dataset):
    def __init__(self, train = True, size=32):
        if size == 32:
            if train:
                tmp = np.load("modelnet10_voxelized_32.npz")
                self.data = tmp["X_train"]
                self.label = tmp["Y_train"]
                del tmp
            else:
                tmp = np.load("modelnet10_voxelized_32.npz")
                self.data = tmp["X_test"]
                self.label = tmp["Y_test"]
                del tmp
        else:
            if train:
                tmp = np.load("modelnet10_voxelized_64.npz")
                self.data = tmp["X_train"]
                self.label = tmp["Y_train"]
                del tmp
            else:
                tmp = np.load("modelnet10_voxelized_64.npz")
                self.data = tmp["X_test"]
                self.label = tmp["Y_test"]
                del tmp
                
    def __len__(self):
        return len(self.label)

    def __preproc__(self, voxels):
        
        #flip x
        if np.random.randint(2):
            voxels = np.flip(voxels, axis=0)
        
        #flip y
        if np.random.randint(2):
            voxels = np.flip(voxels, axis=1)
        
        angle = 360 * np.random.random_sample(1)[0]
        
        voxels = rotate(voxels, axes=(0, 1), angle=angle, cval=0.0, reshape=False)
        
        
        return voxels.copy()

    def __getitem__(self, idx):
        label = self.label[idx]
        voxels = self.data[idx]
        voxels = self.__preproc__(voxels)
        voxels = np.expand_dims(voxels, axis=0)
        voxels = torch.tensor(voxels).float()
        return voxels, label

In [7]:
class VoxelNet(nn.Module):
    def __init__(self, model_parameters, n_classes=10, data_size=32):
        super().__init__()
        self.n_classes = n_classes
        self.data_size = data_size
        self.model_parameters = model_parameters
        
        #features
        self.conv3d_1 = nn.Conv3d(in_channels=1, out_channels=model_parameters["features1"], kernel_size=5, stride=2)
        self.dropout1 = nn.Dropout(p=model_parameters["dropout"])
        self.conv3d_2 = nn.Conv3d(in_channels=model_parameters["features1"], out_channels=model_parameters["features2"], kernel_size=3)
        self.dropout2 = nn.Dropout(p=model_parameters["dropout"])
        self.maxpool = nn.MaxPool3d(2)
        
        
        
        if data_size==64:
            dim=351232
            351232
        else:
            dim=27648
            
        #calculate dim after pooling for fc layer
        x = torch.rand((1,1,data_size,data_size,data_size))
        x = F.relu(self.conv3d_1(x))
        x = self.dropout1(x)
        x = F.relu(self.conv3d_2(x))
        x = self.dropout2(x)
        x = self.maxpool(x)
        dim = x.shape[0] * x.shape[1] * x.shape[2] * x.shape[3] * x.shape[4]
        
        #mlp
        self.fc1 = nn.Linear(dim, model_parameters["fc1"])
        self.dropout3 = nn.Dropout(p=model_parameters["dropout"])
        self.fc2 = nn.Linear(model_parameters["fc1"], self.n_classes)
        self.logsoftmax = nn.LogSoftmax(dim=1)
        
        
    def forward(self, x):
        #features
        x = F.relu(self.conv3d_1(x))
        x = self.dropout1(x)
        x = F.relu(self.conv3d_2(x))
        x = self.dropout2(x)
        
        #maxpool
        x = self.maxpool(x)
        
        #flatten
        x = x.view(x.size(0), -1)
        
        #mlp
        x = F.relu(self.fc1(x))
        x = self.dropout3(x)
        x = self.fc2(x)
        
        return self.logsoftmax(x)

In [8]:
def smooth(x, w=0.95):
    last = x[0]
    smoothed = []
    for point in x:
        smoothed_val = w * last + (1 - w) * point
        smoothed.append(smoothed_val)
        last = smoothed_val
        
    return smoothed

In [19]:
class VoxNet_Trainer():
    def __init__(self, hyperparameters, model_hyperparameters):
        
        #Hypreparameters
        self.learning_rate = hyperparameters["learning_rate"]
        self.batch_size_train = hyperparameters["train_batch_size"]
        self.batch_size_test = hyperparameters["valid_batch_size"]
        self.data_size = hyperparameters["data_size"]
        self.sgd_momentum = hyperparameters["sgd_momentum"]
        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        self.epochs = hyperparameters["epochs"]
        self.model_hyperparameters = model_hyperparameters
        
        #Logging
        self.print_epoch_rate = hyperparameters["print_epoch_rate"]
        self.verbose = hyperparameters["verbose"]
        
        if self.verbose:
            print("loading fataset...\n")
            
        #Dataset
        self.initDataset()
        self.class_names = ["bathtub", "bed", "chair", "desk", "dresser", "monitor", "night_stand", "sofa", "table", "toilet"]
        self.num_classes = len(self.class_names)
        
        if self.verbose:
            print("loading model...\n")
            
        #Model
        self.lr_scheduler_step = hyperparameters["lr_scheduler_step"]
        self.lr_scheduler_gamma = hyperparameters["lr_scheduler_gamma"]
        self.initModel()
        
    def initDataset(self):
        self.train_ds = VoxelDataset(train=True, size=self.data_size)
        self.test_ds = VoxelDataset(train=False, size=self.data_size)
        self.train_dataloader = DataLoader(dataset=self.train_ds, batch_size=self.batch_size_train, shuffle=True, drop_last=True)
        self.test_dataloader = DataLoader(dataset=self.test_ds, batch_size=self.batch_size_test)
    
    def initModel(self):
        self.model = VoxelNet(model_parameters=self.model_hyperparameters, data_size=self.data_size)
        self.model.to(self.device)
        #self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.learning_rate)
        self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.learning_rate, momentum=self.sgd_momentum)
        self.lr_scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=self.lr_scheduler_step, gamma=self.lr_scheduler_gamma)
        self.loss_func = nn.NLLLoss()
    
    def numberOfParameters(self):
        return  sum(p.numel() for p in self.model.parameters())
    
    def train(self):
        train_loss_history, valid_loss_history = [], []
        train_acc_history, valid_acc_history = [], []

        train_accuracy = Accuracy(task='multiclass', num_classes=10)
        valid_accuracy = Accuracy(task='multiclass', num_classes=10)

        
        if self.verbose:
            print("started training...\n")
            
        for epoch in range(0, self.epochs):            
            
            train_loss = []                                                         #track training loss
            valid_loss = []                                                         #track valid loss
            
            #track loss for 10 batch
            batch_loss=0
            
            #training on batches
            self.model.train()
            for i, data in enumerate(self.train_dataloader, 0):
                x, y = data[0].to(self.device), data[1].to(self.device)              #send to device
                self.optimizer.zero_grad()
                pred = self.model(x)                                     #predict class
                loss = self.loss_func(pred, y)          #compute and track loss
                train_loss.append(loss.cpu().data)                                   #track loss
                acc = train_accuracy(torch.argmax(pred, 1).cpu(), y.cpu())           #track accuracy
                
                #back propagate and optimize
                loss.backward()
                self.optimizer.step()
                self.lr_scheduler.step()
                
            pred_conf = []                                                           #track for confusion matrix
            y_conf = []                                                              #track for confusion matrix
            
            
            #validation on batches
            self.model.eval()
            for x, y in self.test_dataloader:
                x, y = x.to(self.device), y.to(self.device)                           #send to device
                pred = self.model(x)                                      #predict class
                loss = self.loss_func(pred, y)           #compute and track loss
                valid_loss.append(loss.cpu().data)                                    #track loss
                valid_accuracy.update(torch.argmax(pred, 1).cpu(), y.cpu())           #track accuracy
                pred_conf.append(torch.argmax(pred, 1))
                y_conf.append(y)
                
            #compute confusion matrix
            a = torch.cat(pred_conf).cpu()
            b = torch.cat(y_conf).cpu()
            confmat = ConfusionMatrix(task='multiclass', num_classes=self.num_classes)
            self.conf_matrix = confmat(a, b)
            
            # total accuracy over all batches
            total_train_accuracy = train_accuracy.compute()
            total_valid_accuracy = valid_accuracy.compute()
            train_accuracy.reset()
            valid_accuracy.reset()
            
            #track loss and acc for plotting
            train_loss_history.append(torch.mean(torch.tensor(train_loss)))
            valid_loss_history.append(torch.mean(torch.tensor(valid_loss)))
            train_acc_history.append(total_train_accuracy)
            valid_acc_history.append(total_valid_accuracy)
            
            
            if epoch % self.print_epoch_rate == 0 and self.verbose:
                tmp1 = "epoch:{:3d}/{:3d}".format(epoch+1, self.epochs)
                tmp2 = "train-loss: {:4.2f}, train-acc: {:.2%}".format(train_loss_history[epoch], train_acc_history[epoch].item())
                tmp3 = "valid-loss: {:4.2f}, valid-acc: {:.2%}".format(valid_loss_history[epoch], valid_acc_history[epoch].item())
                print(tmp1, tmp2, tmp3)
                
            
            print({"train loss": train_loss_history[epoch], "epoch": epoch})
            print({"valid loss": valid_loss_history[epoch], "epoch": epoch})
            print({"train accuracy": train_acc_history[epoch].item(), "epoch": epoch})
            print({"valid accuracy": valid_acc_history[epoch].item(), "epoch": epoch})
            
            
            
        #save history
        self.history = {"train_loss": torch.tensor(train_loss_history), "train_acc": torch.tensor(train_acc_history), "valid_loss": torch.tensor(valid_loss_history), "valid_acc": torch.tensor(valid_acc_history)}
            
        #print end results
        if self.verbose:
            print("finished training\n")
            print("accuracy: {:.2%}".format(valid_acc_history[-1].item()))
            print("loss: {:4.2f}".format(valid_loss_history[-1]))
      
    
    def generateConfusionMatrix(self):
        fig=plt.figure(figsize = (12,7))
        sns.heatmap(self.conf_matrix, annot=True, fmt='g', linewidths=.4, cbar=False)
        tick_marks = np.arange(10)
        plt.xticks(tick_marks, 10, rotation=45)
        plt.yticks(tick_marks, 10, rotation=0)
        plt.title("Confusion Matrix")
        plt.show()
    
           
    # figure to subfigure
    def showResults(self):
        eps = range(0, len(self.history["train_loss"].cpu()))
        
        sns.set_theme()
        fig, ax = plt.subplots(1, 2, figsize=(14, 4))
        fig.suptitle('Results')
        
        ax[0].plot(eps, smooth(self.history["train_loss"].cpu()), 'g', label='Training Loss')
        ax[0].plot(eps, smooth(self.history["valid_loss"].cpu()), 'b', label='Valid Loss')
        ax[0].set_title('Loss History')
        ax[0].set(xlabel='Epochs', ylabel='Loss')
        ax[0].legend()
        
        ax[1].plot(eps, smooth(self.history["train_acc"].cpu()), 'g', label='Training Accuracy')
        ax[1].plot(eps, smooth(self.history["valid_acc"].cpu()), 'b', label='Valid Accuracy')
        ax[1].set_title('Loss History')
        ax[1].set(xlabel='Epochs', ylabel='Accuracy')
        ax[1].legend()
        
        plt.show()

In [21]:
training_hyperparameters = {
            "learning_rate": 0.1,
            "sgd_momentum": 0.9,
            "data_size": 32,
            "epochs": 20,
            "train_batch_size": 64,
            "valid_batch_size": 64,
            "lr_scheduler_step": 20,
            "lr_scheduler_gamma": 0.5,
            "print_epoch_rate": 1,
            "verbose":0
        }        
        
model_hyperparameters = {
            "dropout": 0.2,
            "features1": 64,
            "features2": 64,
            "fc1": 64
        }
        
trainer = VoxNet_Trainer(training_hyperparameters, model_hyperparameters)

In [17]:
trainer.train()

{'train loss': tensor(1.3582), 'epoch': 0}
{'valid loss': tensor(0.7942), 'epoch': 0}
{'train accuracy': 0.5372983813285828, 'epoch': 0}
{'valid accuracy': 0.7235682606697083, 'epoch': 0}
{'train loss': tensor(0.6749), 'epoch': 1}
{'valid loss': tensor(0.7074), 'epoch': 1}
{'train accuracy': 0.7759576439857483, 'epoch': 1}
{'valid accuracy': 0.75, 'epoch': 1}
{'train loss': tensor(0.5794), 'epoch': 2}
{'valid loss': tensor(0.6706), 'epoch': 2}
{'train accuracy': 0.8127520084381104, 'epoch': 2}
{'valid accuracy': 0.7544052600860596, 'epoch': 2}
{'train loss': tensor(0.5808), 'epoch': 3}
{'valid loss': tensor(0.6710), 'epoch': 3}
{'train accuracy': 0.8051915168762207, 'epoch': 3}
{'valid accuracy': 0.7533039450645447, 'epoch': 3}
{'train loss': tensor(0.5682), 'epoch': 4}
{'valid loss': tensor(0.6679), 'epoch': 4}
{'train accuracy': 0.8125, 'epoch': 4}
{'valid accuracy': 0.7676211595535278, 'epoch': 4}
{'train loss': tensor(0.5703), 'epoch': 5}
{'valid loss': tensor(0.6739), 'epoch': 5}


In [22]:
trainer.generateConfusionMatrix()

AttributeError: ignored

<Figure size 864x504 with 0 Axes>

In [None]:
trainer.showResults()