In [1]:
%matplotlib inline

# Exploratory Data Analysis for RoofN3D dataset

## CAUTION: Add data folder in root directory and copy the RoofN3D data

In [2]:
import pandas as pd
import numpy as np
import os
import pptk

In [3]:
data_folder = '../../data_final/roofn3d_raw/'

In [4]:
def convert_to_nparray(point):
    points = point.split('(')[1].split(')')[0].split(',')
    points = [[float(point) for point in points[idx].split(' ')] for idx, _ in enumerate(points)]
    points_np = np.array(points)
    return points_np

# Buildings

In [5]:
buildings = pd.read_csv(os.path.join(data_folder, 'roofn3d_buildings.csv'))
buildings.head()

Unnamed: 0,id,class,points,outline,unassignedsurfacegrowingpoints,unassignedsubsurfacegrowingpoints,brep
0,3,Saddleback roof,"MULTIPOINT Z (601671.52 4494846.06 3.98,601671...",MULTIPOLYGON(((601678.25853501 4494845.2224567...,"MULTIPOINT Z (601671.52 4494846.06 3.98,601674...","MULTIPOINT Z (601671.52 4494846.06 3.98,601674...",MULTIPOLYGON Z (((601677.943095 4494848.082989...
1,21,Two-sided hip roof,"MULTIPOINT Z (605909.29 4495092.37 12.12,60590...",MULTIPOLYGON(((605920.421651644 4495095.002278...,"MULTIPOINT Z (605911.4 4495090.41 6.52,605911....","MULTIPOINT Z (605911.4 4495090.41 6.52,605911....",MULTIPOLYGON Z (((605915.046751 4495094.635086...
2,22,Saddleback roof,"MULTIPOINT Z (597163.6 4492549.68 5.94,597163....",MULTIPOLYGON(((597178.791577574 4492556.250948...,"MULTIPOINT Z (597163.5 4492549.71 2.74,597162....","MULTIPOINT Z (597163.5 4492549.71 2.74,597162....",MULTIPOLYGON Z (((597176.527273 4492560.564063...
3,23,Saddleback roof,"MULTIPOINT Z (596686.37 4491940.82 6.16,596686...",MULTIPOLYGON(((596701.419817058 4491947.174949...,"MULTIPOINT Z (596686.37 4491940.82 6.16,596686...","MULTIPOINT Z (596686.37 4491940.82 6.16,596686...",MULTIPOLYGON Z (((596699.893287 4491950.075607...
4,25,Saddleback roof,"MULTIPOINT Z (604024.32 4494686.47 8.67,604025...",MULTIPOLYGON(((604038.991861539 4494688.568083...,"MULTIPOINT Z (604023.84 4494692.3 1.09,604023....","MULTIPOINT Z (604023.84 4494692.3 1.09,604023....",MULTIPOLYGON Z (((604038.264868 4494691.678397...


In [6]:
buildings.shape

(118073, 7)

In [7]:
num_pts = []
for t in buildings.itertuples():
    points_np = convert_to_nparray(t.points)
    num_pts.append(points_np.shape[0])

In [8]:
buildings['num_pts'] = num_pts

In [9]:
# avergae number of points in point-cloud
buildings['num_pts'].mean()

368.77328432410457

In [9]:
num_recs = []
for i in range(100, 2000, 100):
    num_recs.append(buildings.loc[buildings.num_pts >= i].shape[0])

In [7]:
points_array = []
for t in buildings.itertuples():
    points_np = convert_to_nparray(t.points)
    points_array.append(points_np)

### standardization and normalization stats

In [8]:
arr = np.vstack(points_array)

In [9]:
arr.shape

(43542168, 3)

In [10]:
arr.std(axis=0)

array([13035.25088591, 10032.12078908,    14.47440804])

In [13]:
arr.mean(axis=0)

array([5.91746738e+05, 4.50369896e+06, 2.32425161e+01])

In [7]:
buildings['class'].value_counts()

Saddleback roof       89057
Two-sided hip roof    26830
Pyramid roof           2186
Name: class, dtype: int64

In [8]:
# number of unique IDs are less than number of rows. There are some duplications.
len(buildings.id.unique())

118064

In [9]:
import matplotlib.pyplot as plt
plt.plot(range(100, 2000, 100), num_recs)
plt.xlabel('n_points')
plt.ylabel('data points')
plt.title('number of instances having points greater than n_points')

NameError: name 'num_recs' is not defined

In [13]:
buildings.loc[buildings.num_pts >= 700]['class'].value_counts()

Two-sided hip roof    5923
Saddleback roof       5225
Pyramid roof            41
Name: class, dtype: int64

# Building Parts

In [9]:
buildingparts = pd.read_csv(os.path.join(data_folder, 'roofn3d_buildingparts.csv'))
buildingparts.head()

Unnamed: 0,id,class,orientation,brep,fk_buildings
0,499,Saddleback roof,275.627383,MULTIPOLYGON Z (((601677.943095 4494848.082989...,3
1,500,Saddleback roof,294.591882,MULTIPOLYGON Z (((597176.527273 4492560.564063...,22
2,501,Saddleback roof,294.685781,MULTIPOLYGON Z (((596699.893287 4491950.075607...,23
3,502,Saddleback roof,278.95887,MULTIPOLYGON Z (((604038.264868 4494691.678397...,25
4,503,Saddleback roof,8.385665,MULTIPOLYGON Z (((601126.359775 4493781.626578...,36


In [10]:
buildingparts['class'].value_counts()

Saddleback roof       89057
Two-sided hip roof    26830
Pyramid roof           2186
Name: class, dtype: int64

In [11]:
buildingparts['class'].value_counts()

Saddleback roof       89057
Two-sided hip roof    26830
Pyramid roof           2186
Name: class, dtype: int64

# Surface Growing

In [88]:
surfacegrowing = pd.read_csv(os.path.join(data_folder, 'roofn3d_surfacegrowing.csv'), nrows=100)
surfacegrowing.head()

Unnamed: 0,id,points,outline,plane_a,plane_b,plane_c,plane_d,fk_buildings
0,40786,"MULTIPOINT Z (601676.72 4494850.24 4.65,601676...","POLYGON Z ((601671.27 4494849.83 4.667314,6016...",-0.021077,0.393345,0.919149,-1755349.0,3
1,40787,"MULTIPOINT Z (601673.32 4494845.95 4.95,601673...","POLYGON Z ((601671.62 4494846.24 5.17033,60167...",0.056648,-0.422755,0.904472,1866129.0,3
2,40850,"MULTIPOINT Z (605914.22 4495096.36 12.65,60591...","POLYGON Z ((605909.84 4495093.22 12.401669,605...",-0.212646,0.221878,0.951605,-868530.3,21
3,40851,"MULTIPOINT Z (605916.79 4495094.07 12.68,60591...","POLYGON Z ((605912.91 4495091.67 12.98974,6059...",0.197077,-0.209702,0.957698,823206.7,21
4,40852,"MULTIPOINT Z (605915.6 4495098.54 12.21,605915...","POLYGON Z ((605914.62 4495098.43 12.449665,605...",0.225219,0.234676,0.945623,-1191366.0,21


# Sub-Surface Growing

In [12]:
subsurfacegrowing = pd.read_csv(os.path.join(data_folder, 'roofn3d_subsurfacegrowing.csv'))
subsurfacegrowing.head()

Unnamed: 0,id,points,surfacepoints,subsurfacepoints,outline,plane_a,plane_b,plane_c,plane_d,fk_buildings
0,12215,"MULTIPOINT Z (601676.72 4494850.24 4.65,601676...","MULTIPOINT Z (601676.72 4494850.24 4.65,601676...","MULTIPOINT Z (601672.59 4494847.68 5.617663,60...","POLYGON Z ((601671.27 4494849.83 4.667404,6016...",-0.021118,0.392669,0.919437,-1752287.0,3
1,12216,"MULTIPOINT Z (601673.32 4494845.95 4.95,601673...","MULTIPOINT Z (601673.32 4494845.95 4.95,601673...","MULTIPOINT Z (601672.09 4494847.39 5.678409,60...","POLYGON Z ((601671.62 4494846.24 5.16899,60167...",0.056397,-0.421766,0.904949,1861834.0,3
2,12275,"MULTIPOINT Z (605914.22 4495096.36 12.65,60591...","MULTIPOINT Z (605914.22 4495096.36 12.65,60591...","MULTIPOINT Z (605909.61 4495093.71 12.236024,6...","POLYGON Z ((605909.33 4495093.41 12.247475,605...",-0.21173,0.221395,0.951922,-866912.5,21
3,12276,"MULTIPOINT Z (605916.79 4495094.07 12.68,60591...","MULTIPOINT Z (605916.79 4495094.07 12.68,60591...","MULTIPOINT Z (605913.5 4495088.76 12.231141,60...","POLYGON Z ((605912.91 4495091.67 12.997197,605...",0.197953,-0.205221,0.958488,802533.8,21
4,12277,"MULTIPOINT Z (605915.6 4495098.54 12.21,605915...","MULTIPOINT Z (605915.6 4495098.54 12.21,605915...","MULTIPOINT Z (605919.84 4495094.47 12.189174,6...","POLYGON Z ((605914.62 4495098.43 12.441393,605...",0.223619,0.235946,0.945687,-1196105.0,21


In [13]:
# get maximum number of parts for a building
subsurfacegrowing.fk_buildings.value_counts().max()

4

# map parts to buildings

In [19]:
# get all parts for building_id == 3
building_id = 3
parts = subsurfacegrowing.loc[subsurfacegrowing.fk_buildings == building_id]
parts.shape

(2, 10)

In [20]:
parts

Unnamed: 0,id,points,surfacepoints,subsurfacepoints,outline,plane_a,plane_b,plane_c,plane_d,fk_buildings
0,12215,"MULTIPOINT Z (601676.72 4494850.24 4.65,601676...","MULTIPOINT Z (601676.72 4494850.24 4.65,601676...","MULTIPOINT Z (601672.59 4494847.68 5.617663,60...","POLYGON Z ((601671.27 4494849.83 4.667404,6016...",-0.021118,0.392669,0.919437,-1752287.0,3
1,12216,"MULTIPOINT Z (601673.32 4494845.95 4.95,601673...","MULTIPOINT Z (601673.32 4494845.95 4.95,601673...","MULTIPOINT Z (601672.09 4494847.39 5.678409,60...","POLYGON Z ((601671.62 4494846.24 5.16899,60167...",0.056397,-0.421766,0.904949,1861834.0,3


In [23]:
parts_np = []
for part in parts.itertuples():
    parts_np.append(convert_to_nparray(part.surfacepoints))

In [24]:
building_points = convert_to_nparray(buildings.loc[buildings.id == building_id].points.item())
print('total points in building: {}'.format(building_points.shape[0]))
parts_points = sum(part.shape[0] for part in parts_np)
print('total points in parts: {}'.format(parts_points))
unassigned_points = convert_to_nparray(buildings.loc[buildings.id == building_id].unassignedsurfacegrowingpoints.item())
print('total unassigned points in buildings: {}'.format(unassigned_points.shape[0]))

total points in building: 266
total points in parts: 247
total unassigned points in buildings: 19


In [36]:
# check if parts belong to building
building_list = building_points.tolist()
for part in parts_np:
    for point in part.tolist():
        if point not in building_list:
            print('point {} not in building'.format(point))
            break

# Visualization

In [13]:
building_id = 3

## visualize building

In [16]:
sample_point = buildings.loc[buildings['class'] == 'Saddleback roof'].iloc[0].points
# sample_point = buildings.loc[buildings['id'] == building_id].points.item()
print(sample_point)
sample_points_split = sample_point.split('(')[1].split(')')[0].split(',')

# points in the dataframe are in String format. Change them to List of List of floats. Each list is of format x, y, z
points = [[float(point) for point in sample_points_split[idx].split(' ')] for idx, _ in enumerate(sample_points_split)]
print('sample point conversion: ', points[:2])
print('{} points in building ID: {}'.format(len(points), building_id))
# convert the L_O_L to np.array for PPTK viewing
points_np = np.array(points)
print('shape after conversion: {}'.format(points_np.shape))
print('viewing building..')

points_sub = np.subtract(points_np, np.array([5.91746738e+05, 4.50369896e+06, 2.32425161e+01]))
points_std = np.divide(points_sub, np.array([13035.25088591, 10032.12078908,    14.47440804]))
v = pptk.viewer(points_sub)
# control the thickness of the points
v.set(point_size=0.1)

MULTIPOINT Z (601671.52 4494846.06 3.98,601671.65 4494844.94 4.26,601671.84 4494845.64 4.95,601671.87 4494845.2 4.55,601671.88 4494845.67 4.88,601671.9 4494845.74 4.98,601671.91 4494845.72 5.11,601672.12 4494845.2 4.75,601672.19 4494844.96 4.61,601672.3 4494845.95 5.03,601672.54 4494844.95 4.52,601672.6 4494845.06 4.68,601672.68 4494844.93 4.23,601672.93 4494845.31 4.71,601672.99 4494845.48 4.72,601673.03 4494845.19 4.57,601673.32 4494845.95 4.95,601673.36 4494846.08 5.07,601673.42 4494845.78 4.83,601673.5 4494845.73 4.78,601673.63 4494846.16 5.1,601673.82 4494845.74 4.82,601673.96 4494846.04 4.92,601674.16 4494845.05 2.12,601674.16 4494845.31 4.69,601674.2 4494845.18 4.51,601674.24 4494844.99 4.49,601674.34 4494845.49 4.75,601674.35 4494845.95 4.86,601674.44 4494845.07 4.49,601674.53 4494845.04 4.44,601674.69 4494845.79 4.84,601674.8 4494845.74 4.69,601675.04 4494846.2 4.95,601675.15 4494845.03 2.34,601675.36 4494845.94 4.82,601675.37 4494845.17 4.48,601675.61 4494845.6 4.58,601675.64

In [34]:
points_np_cent = np.subtract(points_np, points_np.mean(axis=0)).astype(np.float32)
print('shape after conversion: {}'.format(points_np.shape))
print('viewing building..')
v = pptk.viewer(points_np_cent)
# control the thickness of the points
v.set(point_size=0.1)

shape after conversion: (266, 3)
viewing building..


## visualize building brep

In [35]:
sample_brep = buildings.loc[buildings.id == building_id].brep.item()
sample_brep

'MULTIPOLYGON Z (((601677.943095 4494848.082989 5.658002,601671.468862 4494847.44506 5.658002,601671.735305 4494844.740967 4.441582,601678.209538 4494845.378895 4.441582,601677.943095 4494848.082989 5.658002)),((601671.735305 4494844.740967 4.441582,601671.468862 4494847.44506 5.658002,601671.197017 4494850.203969 4.416923,601671.197017 4494850.203969 0,601671.735305 4494844.740967 0,601671.735305 4494844.740967 4.441582)),((601671.468862 4494847.44506 5.658002,601677.943095 4494848.082989 5.658002,601677.67125 4494850.841897 4.416923,601671.197017 4494850.203969 4.416923,601671.468862 4494847.44506 5.658002)),((601677.943095 4494848.082989 5.658002,601678.209538 4494845.378895 4.441582,601678.209538 4494845.378895 0,601677.67125 4494850.841897 0,601677.67125 4494850.841897 4.416923,601677.943095 4494848.082989 5.658002)),((601678.209538 4494845.378895 4.441582,601671.735305 4494844.740967 4.441582,601671.735305 4494844.740967 0,601678.209538 4494845.378895 0,601678.209538 4494845.3788

In [36]:
polygons = []
for poly in sample_brep.partition('(')[2].rpartition(')')[0].split('))'):
    poly = poly.split('((')
    if len(poly)==2:
        polygons.append([[float(p) for p in point.split(' ')] for p_idx, point in enumerate(poly[1].split(','))])

In [37]:
polygons[0]

[[601677.943095, 4494848.082989, 5.658002],
 [601671.468862, 4494847.44506, 5.658002],
 [601671.735305, 4494844.740967, 4.441582],
 [601678.209538, 4494845.378895, 4.441582],
 [601677.943095, 4494848.082989, 5.658002]]

In [53]:
v = pptk.viewer(np.array([polygons[5], polygons[6], polygons[4]]))
# control the thickness of the points
v.set(point_size=0.1)

## visualizing building parts brep

In [29]:
sample_brep = buildingparts.loc[buildingparts.fk_buildings == building_id].brep.item()
print(' sample brep: ', sample_brep)
polygons = []
for poly in sample_brep.partition('(')[2].rpartition(')')[0].split('))'):
    poly = poly.split('((')
    if len(poly)==2:
        polygons.append([[float(p) for p in point.split(' ')] for p_idx, point in enumerate(poly[1].split(','))])

 sample brep:  MULTIPOLYGON Z (((601677.943095 4494848.082989 5.658002,601671.468862 4494847.44506 5.658002,601671.735305 4494844.740967 4.441582,601678.209538 4494845.378895 4.441582,601677.943095 4494848.082989 5.658002)),((601671.735305 4494844.740967 4.441582,601671.468862 4494847.44506 5.658002,601671.197017 4494850.203969 4.416923,601671.197017 4494850.203969 0,601671.735305 4494844.740967 0,601671.735305 4494844.740967 4.441582)),((601671.468862 4494847.44506 5.658002,601677.943095 4494848.082989 5.658002,601677.67125 4494850.841897 4.416923,601671.197017 4494850.203969 4.416923,601671.468862 4494847.44506 5.658002)),((601677.943095 4494848.082989 5.658002,601678.209538 4494845.378895 4.441582,601678.209538 4494845.378895 0,601677.67125 4494850.841897 0,601677.67125 4494850.841897 4.416923,601677.943095 4494848.082989 5.658002)),((601678.209538 4494845.378895 4.441582,601671.735305 4494844.740967 4.441582,601671.735305 4494844.740967 0,601678.209538 4494845.378895 0,601678.20953

In [30]:
polygons

[[[601677.943095, 4494848.082989, 5.658002],
  [601671.468862, 4494847.44506, 5.658002],
  [601671.735305, 4494844.740967, 4.441582],
  [601678.209538, 4494845.378895, 4.441582],
  [601677.943095, 4494848.082989, 5.658002]],
 [[601671.735305, 4494844.740967, 4.441582],
  [601671.468862, 4494847.44506, 5.658002],
  [601671.197017, 4494850.203969, 4.416923],
  [601671.197017, 4494850.203969, 0.0],
  [601671.735305, 4494844.740967, 0.0],
  [601671.735305, 4494844.740967, 4.441582]],
 [[601671.468862, 4494847.44506, 5.658002],
  [601677.943095, 4494848.082989, 5.658002],
  [601677.67125, 4494850.841897, 4.416923],
  [601671.197017, 4494850.203969, 4.416923],
  [601671.468862, 4494847.44506, 5.658002]],
 [[601677.943095, 4494848.082989, 5.658002],
  [601678.209538, 4494845.378895, 4.441582],
  [601678.209538, 4494845.378895, 0.0],
  [601677.67125, 4494850.841897, 0.0],
  [601677.67125, 4494850.841897, 4.416923],
  [601677.943095, 4494848.082989, 5.658002]],
 [[601678.209538, 4494845.378895,

In [31]:
v = pptk.viewer(np.array(polygons[6]))
# control the thickness of the points
v.set(point_size=0.1)

## visualize subsurfacegrowing 

In [25]:
sample_ssg = subsurfacegrowing.loc[subsurfacegrowing.fk_buildings == building_id].points[1]
print(sample_ssg)
sample_ssg_split = sample_ssg.split('(')[1].split(')')[0].split(',')
# points in the dataframe are in String format. Change them to List of List of floats. Each list is of format x, y, z
points = [[float(point) for point in sample_ssg_split[idx].split(' ')] for idx, _ in enumerate(sample_ssg_split)]
print('sample point conversion: ', points[:2])
print('{} points in building sample_idx: {}'.format(len(points), building_id))
# convert the L_O_L to np.array for PPTK viewing
points_np = np.array(points)
print('shape after conversion: {}'.format(points_np.shape))
print('viewing building..')
v = pptk.viewer(points_np)
# control the thickness of the points
v.set(point_size=0.1)

MULTIPOINT Z (601673.32 4494845.95 4.95,601673.36 4494846.08 5.07,601672.99 4494845.48 4.72,601673.03 4494845.19 4.57,601672.3 4494845.95 5.03,601671.91 4494845.72 5.11,601673.5 4494845.73 4.78,601673.42 4494845.78 4.83,601673.11 4494846.44 5.21,601672.43 4494846.3 5.19,601673.63 4494846.16 5.1,601673.96 4494846.04 4.92,601672.72 4494846.54 5.19,601673.82 4494845.74 4.82,601674.34 4494845.49 4.75,601674.35 4494845.95 4.86,601674.44 4494845.07 4.49,601674.2 4494845.18 4.51,601674.69 4494845.79 4.84,601674.8 4494845.74 4.69,601675.36 4494845.94 4.82,601675.69 4494845.98 4.74,601675.61 4494845.6 4.58,601675.83 4494845.85 4.8,601676 4494845.68 4.73,601672.12 4494845.2 4.75,601676.64 4494845.55 4.47,601677.02 4494845.81 4.6,601676.68 4494845.83 4.69,601677.41 4494845.93 4.69,601676.39 4494845.94 4.75,601677.35 4494846.19 4.83,601675.37 4494845.17 4.48,601672.94 4494846.83 5.5,601673.48 4494847 5.38,601672.4 4494847.13 5.59,601672.74 4494846.21 5.06,601673.79 4494846.54 5.02,601671.94 449484

# Multi-Task Dataset Creation

In [35]:
import os
from collections import defaultdict
from sklearn.model_selection import train_test_split
import random

In [36]:
op_data_dir = '../../data_final/roofn3d_data_damage_all'
if not os.path.exists(op_data_dir):
    os.makedirs(op_data_dir)

In [37]:
pLimit = 0
cls_limit = 10000000000
nlog = 1000
random_state = 42
damage = True

In [38]:
possible_buildings = buildings.loc[buildings.num_pts >= pLimit]
print('shape: ', possible_buildings.shape)
print('class distribution: ', possible_buildings['class'].value_counts())

shape:  (118073, 8)
class distribution:  Saddleback roof       89057
Two-sided hip roof    26830
Pyramid roof           2186
Name: class, dtype: int64


In [39]:
train_buildings, test_buildings = train_test_split(possible_buildings, 
                                                   test_size=0.1, 
                                                   stratify=possible_buildings['class'],
                                                   random_state=random_state)
train_buildings, val_buildings = train_test_split(train_buildings, 
                                                   test_size=0.1, 
                                                   stratify=train_buildings['class'],
                                                   random_state=random_state)
print('train: ', train_buildings.shape)
print('val: ', val_buildings.shape)
print('test: ', test_buildings.shape)

train:  (95638, 8)
val:  (10627, 8)
test:  (11808, 8)


In [40]:
all_splits = {'train':train_buildings, 'val':val_buildings, 'test':test_buildings}

In [41]:
def seg_points(building_id, building_points=np.array([])):
    building_points_seg = []
    
    # get the building points from original data if not given
    if building_points.size == 0:
        print('fetching data from original because no points were passed')
        building_points = convert_to_nparray(buildings.loc[buildings.id == building_id].points.item())
    
    # get the parts for that building
    building_parts = subsurfacegrowing.loc[subsurfacegrowing.fk_buildings == building_id]
    
    # for each part, check if building point belong to that part.
    parts_list = []
    for part in building_parts.itertuples():
        parts_list.append(convert_to_nparray(part.surfacepoints))
    for point_id, point in enumerate(building_points):
        assigned = False
        for part_id, part in enumerate(parts_list):
            if point.tolist() in part.tolist():
                assigned = True
                building_points_seg.append(part_id+1)
                break
        if not assigned:
            building_points_seg.append(len(building_parts)+1)
    return building_points_seg

In [42]:
def get_damaged_points(pts, num_scans):
    new = []
    for n in range(num_scans):
        circle_radius = random.randint(1, 3)
        i = np.random.choice(len(pts))
        random_pt = pts[i]
        new_pts = []
        for point in pts:
            if np.linalg.norm(point-random_pt) > circle_radius:
                new_pts.append(point)
        new.append(np.array(new_pts))
    return new

In [1]:
import torch
a = torch.randn(10)
a.shape

torch.Size([10])

In [2]:
a

tensor([ 0.3879,  1.3448,  0.1129, -0.1109, -0.8349, -0.1394, -0.5484, -1.0674,
         1.6182,  1.1007])

In [3]:
a[[1, 2, 3]]

tensor([ 1.3448,  0.1129, -0.1109])

In [43]:
for split_name, possible_buildings in all_splits.items():
    print('processing split: ', split_name)

    # subset dataframe for each class
    building_points_seg = defaultdict(list)
    building_cls = dict()
    classes = buildings['class'].value_counts().keys().tolist()
    
    # class distribution
    print('class distribution: ', possible_buildings['class'].value_counts())

    for cls in classes:
        print('processing class: {}'.format(cls))

        buildings_cls = possible_buildings.loc[possible_buildings['class'] == cls]
        cls = str(cls.replace(' ', '_'))

        # create 3 folders: points, points_label, seg_img
        cls_folders = [os.path.join(op_data_dir, split_name, cls, 'points'), 
                       os.path.join(op_data_dir, split_name, cls, 'points_label'),
                       os.path.join(op_data_dir, split_name, cls, 'seg_img')]
        for f in cls_folders:
            if not os.path.exists(f):
                os.makedirs(f)

        # for each record in each category, convert into np array and convert into text file
        for building_idx, building in enumerate(buildings_cls.itertuples()):
            
            try:

                if building_idx % nlog == 0:
                    print('processing: {}'.format(building_idx))

                points_np = convert_to_nparray(building.points)


                ###### CLASSIFICATION data
                building_cls[building.id] = cls
                points_sub = np.subtract(points_np, points_np.mean(axis=0))
                if damage:
                    damaged_pts = get_damaged_points(points_np, 1)[0]
                    damaged_pts_sub = np.subtract(damaged_pts, damaged_pts.mean(axis=0))
                    points_sub = damaged_pts_sub
                np.savetxt(os.path.join(op_data_dir, split_name, cls, 'points', str(building.id)+'.pts'), 
                           points_sub, 
                           delimiter=' ',
                           fmt='%1.5f')


                ###### SEGMENTATION data
                # get the segmented points
                if not damage:
                    building_points_seg[building.id] = seg_points(building.id, building_points=points_np)
                if damage:
                    building_points_seg[building.id] = seg_points(building.id, building_points=damaged_pts)
                # save the point dictionary for that building
                np.savetxt(os.path.join(op_data_dir, split_name, cls, 'points_label', str(building.id)+'.seg'),
                           np.array(building_points_seg[building.id]),
                           fmt='%i')
            except:
                print('exception occurred at: ', building_idx)
                pass

            # break if cls_limit limit is reached.
            if building_idx == cls_limit:
                break

        print('exited at: {}'.format(building_idx))
        # add category in sysnetcategory.txt as categoryname -> folder_name
        with open(os.path.join(op_data_dir, split_name, 'synsetoffset2category.txt'), 'a') as category_file:
            string = str(cls + '\t' + cls) + '\n'
            category_file.write(string)

processing split:  train
class distribution:  Saddleback roof       72136
Two-sided hip roof    21732
Pyramid roof           1770
Name: class, dtype: int64
processing class: Saddleback roof
processing: 0
processing: 1000
processing: 2000
processing: 3000
processing: 4000
processing: 5000
processing: 6000
processing: 7000
processing: 8000
processing: 9000
processing: 10000
processing: 11000
processing: 12000
processing: 13000
processing: 14000
processing: 15000
processing: 16000
processing: 17000
processing: 18000
processing: 19000
processing: 20000
processing: 21000
processing: 22000
processing: 23000
processing: 24000
processing: 25000
processing: 26000
processing: 27000
processing: 28000
processing: 29000
processing: 30000
processing: 31000
processing: 32000
processing: 33000
processing: 34000
processing: 35000
processing: 36000
processing: 37000
processing: 38000
processing: 39000
processing: 40000
processing: 41000
processing: 42000
processing: 43000
processing: 44000
processing: 4

In [12]:
import numpy as np
a = np.array([1])
a

array([1])

In [13]:
print(a)

[1]


In [14]:
a.shape

(1,)

In [23]:
a = np.array(1)
len(a.shape)

0

In [24]:
a.shape

()

In [25]:
a = np.atleast_1d(a)
a.shape

(1,)

In [26]:
a = np.array([1,2, 3])

In [27]:
len(a)

3

In [28]:
a.shape

(3,)

In [29]:
a = np.atleast_2d(a)
a.shape

(1, 3)

In [10]:
y = np.expand_dims(a, axis=0)
y.shape

(1, 3)

In [15]:
y.size

3

In [30]:
d = {'a':1, 'B':2}
d

{'a': 1, 'B': 2}

In [31]:
d_i = {v:k for k,v in d.items()}
d_i

{1: 'a', 2: 'B'}

# Damaged Dataset

In [53]:
num_partial_scans = 8

In [54]:
from sklearn.model_selection import train_test_split

In [55]:
random_state = 42

In [56]:
possible_buildings = buildings.loc[buildings.num_pts >= 1000]

In [57]:
train_buildings, test_buildings = train_test_split(possible_buildings, 
                                                   test_size=0.1, 
                                                   stratify=possible_buildings['class'],
                                                   random_state=random_state)

In [58]:
train_buildings, valid_buildings = train_test_split(train_buildings,
                                                    test_size=0.1,
                                                    stratify=train_buildings['class'],
                                                    random_state=random_state)

In [59]:
test_buildings.shape

(339, 8)

In [60]:
classes = possible_buildings['class'].unique()

In [61]:
import random, string
from open3d import *

In [62]:
def get_damaged_points(pts, num_scans):
    new = []
    for n in range(num_scans):
        circle_radius = random.randint(1, 3)
        i = np.random.choice(len(pts))
        random_pt = pts[i]
        new_pts = []
        for point in pts:
            if np.linalg.norm(point-random_pt) > circle_radius:
                new_pts.append(point)
        new.append(np.array(new_pts))
    return new

In [63]:
def remove_unassigned(all_points, to_remove):
    return np.array([x for x in all_points if x not in to_remove])

In [64]:
def get_hash():
    import random, string
    x = ''.join(random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits) for _ in range(32))
    return x

In [65]:
def write_pcd(pts, path):
    pcd = PointCloud()
    pcd.points = Vector3dVector(pts)
    write_point_cloud(path, pcd)

In [66]:
nlog=500
cls_limit=10000

In [76]:
part='val'
include_unassigned=False
own_sub = True

In [77]:
if part == 'train':
    part_buildings = train_buildings
elif part == 'val':
    part_buildings = valid_buildings
elif part == 'test':
    part_buildings = test_buildings
list_file = []
damaged_op_data_dir = os.path.join('../data/damaged_v10_1000pts_meansub', part)
complete_op_data_dir = os.path.join(damaged_op_data_dir, 'complete')
partial_op_data_dir = os.path.join(damaged_op_data_dir, 'partial')
if not os.path.exists(damaged_op_data_dir):
    os.makedirs(damaged_op_data_dir)
if not os.path.exists(complete_op_data_dir):
    os.makedirs(complete_op_data_dir)
if not os.path.exists(partial_op_data_dir):
    os.makedirs(partial_op_data_dir)
for cls in list(classes):
    print('processing class {}'.format(cls))

    buildings_in_class = part_buildings.loc[part_buildings['class'] == cls]
    cls = str(cls.replace(' ', '_'))
    complete_cls_dir = os.path.join(complete_op_data_dir, str(cls))
    partial_cls_dir = os.path.join(partial_op_data_dir, str(cls))
    if not os.path.exists(complete_cls_dir):
        os.makedirs(complete_cls_dir)
    if not os.path.exists(partial_cls_dir):
        os.makedirs(partial_cls_dir)
    for b_idx, building in enumerate(buildings_in_class.itertuples()):
        if b_idx==cls_limit:
            print('reached class limit, breaking')
            break
        if b_idx % nlog == 0:
            print('processing b_idx {}'.format(b_idx))

        building_complete_points = convert_to_nparray(building.points)
        
        unassigned_points = convert_to_nparray(building.unassignedsurfacegrowingpoints).tolist()
        building_minus_unassigned = remove_unassigned(building_complete_points.tolist(), unassigned_points)
        
        if not include_unassigned:
            building_complete_points = building_minus_unassigned
        building_partial_points = get_damaged_points(building_complete_points, num_partial_scans)
        
        random_hash = get_hash()
        # save the complete
        if own_sub:
            building_complete_points = np.subtract(building_complete_points, building_complete_points.mean(axis=0))
        write_pcd(building_complete_points, os.path.join(complete_cls_dir, str(random_hash)+'.pcd'))

        
        # save the partials
        for partial_idx, partial in enumerate(building_partial_points):
            partial_dir = os.path.join(partial_cls_dir, str(random_hash))
            if not os.path.exists(partial_dir):
                os.makedirs(partial_dir)
            try:
                if own_sub:
                    partial = np.subtract(partial, partial.mean(axis=0))
                write_pcd(partial, os.path.join(partial_dir, str(partial_idx)+'.pcd'))
            except Exception as e:
                print(e)
                pass
        
        # add record to list file
        list_file.append(str(cls)+'/'+str(random_hash))

with open(os.path.join(damaged_op_data_dir, part+'.list'), 'w') as f:
    for item in list_file:
        f.write("%s\n" % item)

processing class Saddleback roof
processing b_idx 0
processing class Two-sided hip roof
processing b_idx 0
processing class Pyramid roof
processing b_idx 0
