## ModelNet

In [37]:
from __future__ import (
    division,
    absolute_import,
    with_statement,
    print_function,
    unicode_literals,
)
import torch
import torch.utils.data as data
import numpy as np
import os
import h5py
import subprocess
import shlex

BASE_DIR = os.getcwd()


def _get_data_files(list_filename):
    with open(list_filename) as f:
        return [line.rstrip()[5:] for line in f]


def _load_data_file(name):
    f = h5py.File(name)
    data = f["data"][:]
    label = f["label"][:]
    return data, label


class ModelNet40Cls(data.Dataset):
    def __init__(self, num_points, transforms=None, train=True, download=True):
        super().__init__()

        self.transforms = transforms

        self.folder = "modelnet40_ply_hdf5_2048"
        self.data_dir = os.path.join(BASE_DIR, self.folder)
        self.url = "https://shapenet.cs.stanford.edu/media/modelnet40_ply_hdf5_2048.zip"

        if download and not os.path.exists(self.data_dir):
            zipfile = os.path.join(BASE_DIR, os.path.basename(self.url))
            subprocess.check_call(
                shlex.split("curl {} -o {}".format(self.url, zipfile))
            )

            subprocess.check_call(
                shlex.split("unzip {} -d {}".format(zipfile, BASE_DIR))
            )

            subprocess.check_call(shlex.split("rm {}".format(zipfile)))

        self.train = train
        if self.train:
            self.files = _get_data_files(os.path.join(self.data_dir, "train_files.txt"))
        else:
            self.files = _get_data_files(os.path.join(self.data_dir, "test_files.txt"))

        point_list, label_list = [], []
        for f in self.files:
            points, labels = _load_data_file(os.path.join(BASE_DIR, f))
            point_list.append(points)
            label_list.append(labels)

        self.points = np.concatenate(point_list, 0)
        self.labels = np.concatenate(label_list, 0)
        self.set_num_points(num_points)

    def __getitem__(self, idx):
        pt_idxs = np.arange(0, self.num_points)
        np.random.shuffle(pt_idxs)

        current_points = self.points[idx, pt_idxs].copy()
        label = torch.from_numpy(self.labels[idx]).type(torch.LongTensor)

        if self.transforms is not None:
            current_points = self.transforms(current_points)

        return current_points, label

    def __len__(self):
        return self.points.shape[0]

    def set_num_points(self, pts):
        self.num_points = min(self.points.shape[1], pts)

    def randomize(self):
        pass

In [38]:
from torchvision import transforms
import data_utils as d_utils
import numpy as np

transforms = transforms.Compose([
        d_utils.PointcloudToTensor(),
        d_utils.PointcloudRotate(axis=np.array([1, 0, 0])),
        d_utils.PointcloudScale(),
        d_utils.PointcloudTranslate(),
        d_utils.PointcloudJitter(),
    ])
dset = ModelNet40Cls(16, train=True, transforms=None)



In [40]:
print(dset.points.shape,dset.labels.shape)

(9840, 2048, 3) (9840, 1)


<font color='red'> 9840 samples in total, each sample has 2048 points

In [42]:
np.max(dset.points[0])

0.9004929

In [43]:
np.min(dset.points[0])

-0.88815236

## ScanNet

In [2]:
import sys
sys.path.append('../../')
from pointnet2.models import Pointnet2ClsMSG as Pointnet
from pointnet2.models.pointnet2_msg_cls import model_fn_decorator
import pointnet2.data.data_utils as d_utils

In [3]:
from torchvision import transforms
transforms = transforms.Compose(
    [
        d_utils.PointcloudToTensor(),
        d_utils.PointcloudScale(),
        d_utils.PointcloudRotate(),
        d_utils.PointcloudRotatePerturbation(),
        d_utils.PointcloudTranslate(),
        d_utils.PointcloudJitter(),
        d_utils.PointcloudRandomInputDropout(),
    ]
)

In [1]:
import torch, numpy as np, glob, math, torch.utils.data, scipy.ndimage, multiprocessing as mp
import os
import plyfile

def get_scene_type_id(type_name, type_mapping):
    name = type_name.strip().lower()
    name=name.replace(' ','')
    if name in type_mapping:
        return type_mapping[name]
    return -1


def get_field_from_info_file(filename, field_name):
    lines = open(filename).read().splitlines()
    lines = [line.split(' = ') for line in lines]
    mapping = { x[0]:x[1] for x in lines }
    if field_name in mapping:
        return mapping[field_name]
    else:
        logger.info('Failed to find %s in info file %s' % (field_name, filename))

# input: scene_types.txt or scene_types_all.txt
def read_scene_types_mapping(filename, remove_spaces=True):
    assert os.path.isfile(filename)
    mapping = dict()
    lines = open(filename).read().splitlines()
    lines = [line.split('\t') for line in lines]
    if remove_spaces:
        mapping = { x[1].strip().replace(' ',''):int(x[0]) for x in lines }
    else:
        mapping = { x[1]:int(x[0]) for x in lines }        
    return mapping



# get the list of files
def get_files(base_dir):
    path_data=sorted(glob.glob(base_dir+'/*/sparse.ply'))   # only use xyz and raw RGB
    path_label=sorted(glob.glob(base_dir+'/*/*.txt'))  # we can access the scene type from here
    return path_data,path_label

class ScanNet(torch.utils.data.Dataset):
    def __init__(self,base_dir,transforms):
        torch.utils.data.Dataset.__init__(self)
        point_list,label_list=[],[]
        
        # get scene class
        path_scene_types_all='/net/pf-pc27/scratch3/scannet/tasks/scene_types_all.txt'
        self.scene_type_mapping = read_scene_types_mapping(path_scene_types_all, remove_spaces=True)
        
        # load data, here we only take xyz
        path_data,path_label=get_files(base_dir)
        assert len(path_data)==len(path_label)
        for i in range(len(path_data)):
            # get label
            path_info_file=path_label[i]
            scene_name = os.path.splitext(os.path.basename(path_info_file))[0]
            type_name = get_field_from_info_file(path_info_file, 'sceneType')
            type_id = get_scene_type_id(type_name, self.scene_type_mapping)
            label=type_id-1
            
            # get data
            path_points=path_data[i]
            a=plyfile.PlyData().read(path_points)
            v=np.array([list(x) for x in a.elements[0]])
            pts=v[:,:3]
            pts-=pts.mean(0) # center the coordinates
            pts/=4   # scale the coordinates
            point_list.append(np.expand_dims(pts,0))
            label_list.append(label)
        
        self.points=np.concatenate(point_list,0)
        print(self.points.shape)
        self.labels=np.expand_dims(np.array(label_list),1)
        print(self.labels.shape)
        self.transforms=transforms
    
    def __getitem__(self,idx):
        current_points = self.points[idx].copy()
        label = torch.from_numpy(self.labels[idx]).type(torch.LongTensor)
        if self.transforms is not None:
            current_points = self.transforms(current_points)

        return current_points, label
    
    def __len__(self):
        return self.points.shape[0]

In [4]:
base_train='/net/pf-pc27/scratch3/scannet/train'
base_val='/net/pf-pc27/scratch3/scannet/val'

test_set = ScanNet(base_val, transforms=transforms)

(500, 4096, 3)
(500, 1)


In [7]:
pts=test_set[0][0]

In [12]:
np.max(test_set.points[0],0)

array([0.77187437, 0.89486996, 0.44139409])

In [13]:
np.min(test_set.points[0],0)

array([-0.86286551, -0.92208023, -0.11601438])

In [15]:
from torch.utils.data import DataLoader
test_loader = DataLoader(
        test_set,
        batch_size=16,
        shuffle=True,
        num_workers=2,
        pin_memory=True,
    )

In [16]:
iter_loader=iter(test_loader)

In [18]:
batch=next(iter_loader)

In [19]:
type(batch)

list

In [20]:
pts=batch[0][0]
label=batch[0][1]

In [21]:
pts.max(0)

torch.return_types.max(
values=tensor([0.7703, 0.5847, 0.7663]),
indices=tensor([  41, 1219, 3284]))

In [22]:
pts.min(0)

torch.return_types.min(
values=tensor([-0.4514, -0.6611, -0.8184]),
indices=tensor([3300,    2, 3997]))