In [1]:
import torch
import torch.nn as nn


NUM_POINTS = 2048
NUM_CUTS = 32
SIZE_SUB = 16
SIZE_TOP = 16
SIZE_IMG = SIZE_SUB*SIZE_SUB


class Inception(nn.Module):
    def __init__(self, size_in, size_out):
        super().__init__()
        self.size_in, self.size_out = size_in, size_out
        
        self.cv1 = nn.Conv2d(size_in, size_out, 1, padding = 'same')
        self.cv2 = nn.Conv2d(size_in, size_out, 3, padding = 'same')
        self.relu = nn.ReLU()
        
    def forward(self, x):
        x0 = self.cv1(x)
        x0 = self.relu(x0)
        
        x1 = self.cv2(x)
        x1 = self.relu(x1)
        
        x = torch.cat((x0, x1))
        return x   
    
    
    

class MultiSacleUNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.inception_1 = Inception(3, 64)
        self.inception_2 = Inception(128, 128)
        self.inception_3 = Inception(512, 128)
        self.inception_4 = Inception(384, 64)
        
        self.maxPool_1 = nn.MaxPool2d(SIZE_SUB, padding = 'valid')
        self.maxPool_2 = nn.MaxPool2d(SIZE_TOP, padding = 'valid')

        self.upSample_1 = nn.Upsample(size = SIZE_SUB)
        self.upSample_2 = nn.Upsample(size = SIZE_TOP)
        
        self.fc1 = nn.Linear(256,256)
        self.fc2 = nn.Linear(128,50)
        
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax()

    def forward(self, x):
        
        x0 = self.inception_1(x)
        x1 = self.maxPool_1(x0)
        
        x1 = self.inception_2(x1)
        x2 = self.maxPool_2(x1)
        
        xg = x2
        xg = self.fc1(xg)
        y2 = xg
        
        y1 = self.upSample_1(y2)
        y1 = torch.cat((x1, y1), dim=1)
        y1 = self.inception_3(y1)
        
        
        y0 = self.upSample_2(y1)
        y0 = torch.cat((x0, y0), dim=1)
        y0 = self.inception_4(y0)
        
        
        outputs = self.fc2(y0)
        
        
        return  outputs
    


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
%load_ext autoreload
%autoreload 2
%reload_ext autoreload


In [3]:
from pathlib import Path

import numpy as np
import torch
import torch.nn as nn

from model import MultiSacleUNet
from dataset import ShapeNetPartDataset


def train(model, train_dataloader, val_dataloader, device, config):
    # Declare loss and move to device;     
    loss = nn.CrossEntropyLoss()
    loss.to(device)
    
    # Declare optimizer with learning rate given in config
    optimizer = torch.optim.Adam(model.parameters(), lr=config['learning_rate'])
    
    # TODO: Set model to train
    model.train()
    best_loss_val = np.inf

    # Keep track of running average of train loss for printing
    train_loss_running = 0.

    for epoch in range(config['max_epochs']):
        for batch_idx, batch in enumerate(train_dataloader):
            print(batch.keys())
            # TODO: Move batch to device
            
            # set optimizer gradients to zero, perform forward pass
            optimizer.zero_grad()
            predicted = model(batch_val['input_sdf'])
            

            # TODO: Compute loss, Compute gradients, Update network parameters
            loss = loss(predicted, )  

            loss.backward()

            optimizer.step()
                
            # Logging
            train_loss_running += loss.item()
            iteration = epoch * len(train_dataloader) + batch_idx

            if iteration % config['print_every_n'] == (config['print_every_n'] - 1):
                print(f'[{epoch:03d}/{batch_idx:05d}] train_loss: {train_loss_running / config["print_every_n"]:.6f}')
                train_loss_running = 0.

            # Validation evaluation and logging
            if iteration % config['validate_every_n'] == (config['validate_every_n'] - 1):
                # Set model to eval
                model.eval()
                # Evaluation on entire validation set
                loss_val = 0.
                for batch_val in val_dataloader:
                    # TODO: Move batch to device
                    
                    # TODO: validationf forward loss
                    with torch.no_grad():
                        prediction = model(batch_val['input_sdf'])

                    loss_val += loss_criterion_test(reconstruction, ).item()

                loss_val /= len(val_dataloader)
                if loss_val < best_loss_val:
                    torch.save(model.state_dict(), f'/runs/{config["experiment_name"]}/model_best.ckpt')
                    best_loss_val = loss_val

                print(f'[{epoch:03d}/{batch_idx:05d}] val_loss: {loss_val:.6f} | best_loss_val: {best_loss_val:.6f}')

                # Set model back to train
                model.train()

In [4]:
def main(config):
    """
    Function for training multi-scale U-Net on ShapeNetPart
    :param config: configuration for training - has the following keys
                   'experiment_name': name of the experiment, checkpoint will be saved to folder "/runs/<experiment_name>"
                   'device': device on which model is trained, e.g. 'cpu' or 'cuda:0'
                   'batch_size': batch size for training and validation dataloaders
                   'resume_ckpt': None if training from scratch, otherwise path to checkpoint (saved weights)
                   'learning_rate': learning rate for optimizer
                   'max_epochs': total number of epochs after which training should stop
                   'print_every_n': print train loss every n iterations
                   'validate_every_n': print validation loss and validation accuracy every n iterations
                   'is_overfit': if the training is done on a small subset of data specified in exercise_2/split/overfit.txt,
                                 train and validation done on the same set, so error close to 0 means a good overfit. Useful for debugging.
    """

    # Declare device
    device = torch.device('cpu')
    if torch.cuda.is_available() and config['device'].startswith('cuda'):
        device = torch.device(config['device'])
        print('Using device:', config['device'])
    else:
        print('Using CPU')

    # Create Dataloaders
    train_dataset = ShapeNetPartDataset(path = 'shapenet_prepared.h5', split = 'train' if not config['is_overfit'] else 'overfit')
    train_dataloader = torch.utils.data.DataLoader(
        train_dataset,   # Datasets return data one sample at a time; Dataloaders use them and aggregate samples into batches
        batch_size=config['train_batch_size'],   # The size of batches is defined here
        shuffle=True,    # Shuffling the order of samples is useful during training to prevent that the network learns to depend on the order of the input data
        num_workers=4,   # Data is usually loaded in parallel by num_workers
        pin_memory=True,  # This is an implementation detail to speed up data uploading to the GPU
    )

    val_dataset = ShapeNetPartDataset(path = 'shapenet_prepared.h5',split = 'val' if not config['is_overfit'] else 'overfit')
    val_dataloader = torch.utils.data.DataLoader(
        val_dataset,     # Datasets return data one sample at a time; Dataloaders use them and aggregate samples into batches
        batch_size=config['val_batch_size'],   # The size of batches is defined here
        shuffle=False,   # During validation, shuffling is not necessary anymore
        num_workers=4,   # Data is usually loaded in parallel by num_workers
        pin_memory=True,  # This is an implementation detail to speed up data uploading to the GPU
    )

    # Instantiate model
    model = MultiSacleUNet()

    # Load model if resuming from checkpoint
#     if config['resume_ckpt'] is not None:
#         model.load_state_dict(torch.load(config['resume_ckpt'], map_location='cpu'))

    # Move model to specified device
    model.to(device)

    # Create folder for saving checkpoints
    Path(f'./runs/{config["experiment_name"]}').mkdir(exist_ok=True, parents=True)

    # Start training
    train(model, train_dataloader, val_dataloader, device, config)

In [5]:
config = {
"experiment_name": 'test_1',
"is_overfit": False,
"device": 'cuda:0',
"max_epochs": 1,
"train_batch_size": 4,
"val_batch_size": 1,
"learning_rate": 0.0001,
"resume_ckpt": False,
"print_every_n": 1,
"validate_every_n": 1 
}

In [6]:
main(config)

Using device: cuda:0


TypeError: Caught TypeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/home/ahmed/anaconda3/envs/3d-seg-in-2d/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 302, in _worker_loop
    data = fetcher.fetch(index)
  File "/home/ahmed/anaconda3/envs/3d-seg-in-2d/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 49, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/ahmed/anaconda3/envs/3d-seg-in-2d/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 49, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/ahmed/Desktop/ss21/ML3D/project/3d-segmentation-in-2d/dataset.py", line 61, in __getitem__
    points_3d_image[points_2d_item[:, 0],
TypeError: can't assign a numpy.ndarray to a torch.FloatTensor


In [30]:
import h5py

data = h5py.File('shapenet_prepared.h5')

In [31]:
data.keys()

<KeysViewHDF5 ['mean node loss_test', 'mean node loss_train', 'mean node loss_val', 'num_repeats_test', 'num_repeats_train', 'num_repeats_val', 'p_test', 'p_train', 'p_val', 's_test', 's_train', 's_val', 'x_test', 'x_train', 'x_val', 'y_test', 'y_train', 'y_val']>

In [37]:
train = data['x_train']

In [38]:
train, type(train)

(<HDF5 dataset "x_train": shape (60685, 2048, 3), type "<f4">,
 h5py._hl.dataset.Dataset)

In [39]:
train_dataset = ShapeNetPartDataset(path = 'shapenet_prepared.h5', split = 'train' if not config['is_overfit'] else 'overfit')


In [40]:
train_dataloader = torch.utils.data.DataLoader(
    train_dataset,  
    batch_size=4,
    shuffle=True,    # Shuffling the order of samples is useful during training to prevent that the network learns to depend on the order of the input data
    num_workers=4,   # Data is usually loaded in parallel by num_workers
    pin_memory=True,  # This is an implementation detail to speed up data uploading to the GPU
)


In [43]:
for idx,i in enumerate(train_dataloader):
    print(type(i))

(2048, 2) (256, 256, 3) (2048, 3)
(2048, 2) (256, 256, 3) (2048, 3)
(2048, 2) (256, 256, 3) (2048, 3)
(2048, 2) (256, 256, 3) (2048, 3)


ValueError: Caught ValueError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/home/ahmed/anaconda3/envs/3d-seg-in-2d/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 302, in _worker_loop
    data = fetcher.fetch(index)
  File "/home/ahmed/anaconda3/envs/3d-seg-in-2d/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 49, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/ahmed/anaconda3/envs/3d-seg-in-2d/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 49, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/ahmed/Desktop/ss21/ML3D/project/3d-segmentation-in-2d/dataset.py", line 65, in __getitem__
    parts_image[points_2d_item[:, 0],
ValueError: shape mismatch: value array of shape (2048,) could not be broadcast to indexing result of shape (2048,3)


(2048, 2) (256, 256, 3) (2048, 3)
(2048, 2) (256, 256, 3) (2048, 2)(2048, 3) 
(256, 256, 3)(2048, 2)  (2048, 3)
(256, 256, 3) (2048, 3)
(2048, 2) (256, 256, 3) (2048, 3)
