# Prepare Data for Train-Test

In [3]:
def housekeeping(files):
    """
    Checks and removes unwanted files or folders
    1. .ipynb_checkpoints/
    2. .DS_Store
    3. train/
    4. test/
    """
    
    if ".ipynb_checkpoints" in files:
        print("Removing .ipynb_checkpoints...")
        shutil.rmtree(path + ".ipynb_checkpoints")
    else:
        print("No action taken")
        
    if ".DS_Store" in files:
        os.remove(path + ".DS_Store")
    else:
        print("No action taken")
        
    if 'train' or 'test' in files:
        return
#         try:
#             shutil.rmtree(path + 'train')
#         except OSError as e:
#             print ("Note: %s - %s." % (e.filename, e.strerror))
#         try:
#             shutil.rmtree(path + 'test')
#         except OSError as e:
#             print ("Note: %s - %s." % (e.filename, e.strerror))
    
    return


def randomize_files(files):
    """
    Shuffles files in a random order
    """
    return shuffle(files)


def prepare_split(files):
    """
    Identifies the ratios for files to be split into
    
    **Note**
    Current setting is for 80-20
    """
    eighty = int(0.8 * len(files))
    twenty = int(len(files) - eighty)
    files = np.array(files)
    
    return eighty, twenty, files


def generate_ids(eighty, twenty):
    """
    Assigns 1 to 80% of the files and 0 to 20% of the files
    """
    idx = np.hstack((np.ones(eighty),
                     np.zeros(twenty)))
    return idx


def train_test_split(files, idx):
    """
    Files tagged as 1 are categorised as training files
    Files tagged as 0 are categorised as test files
    """
    train = files[idx == 1]
    test = files[idx == 0]
    print("TRAIN SET: {0}".format(train))
    print("TEST SET: {0}".format(test))
    return train, test


def save_mixed_files():
    """
    Saves all mixed groups in the respective train and test folders
    """
    dir_mixed_paths = [PATH_MIXED + "train/", PATH_MIXED + "test/"]

    for paths, category in zip(dir_mixed_paths, categories):
        if not os.path.exists(paths):
            os.makedirs(paths)
        
        if os.path.exists(paths):
            for file in category:
                file_path = PATH_MIXED + file
                print(f'file_path: {file_path}')
                shutil.move(file_path, paths)
                
                
def save_noise_files():
    """
    Saves all noise files in its respective train and test folders
    """
    dir_noise_paths = [PATH_NOISE + "train/", PATH_NOISE + "test/"]

    for paths, category in zip(dir_noise_paths, categories):
        if not os.path.exists(paths):
            os.makedirs(paths)

        if os.path.exists(paths):
            for noise_file in category:
                file_path = PATH_NOISE + noise_file
                print(f'file_path: {file_path}')
                shutil.move(file_path, paths)

In [4]:
import pandas as pd
import numpy as np
import os
import random
from random import shuffle
import shutil

PATH_MIXED = "../../../data/unsampled/xzt/processed_points/mixed/"
PATH_NOISE = "../../../data/unsampled/xzt/processed_points/noise/"

dir_paths = [PATH_MIXED, PATH_NOISE]
save_types = [save_mixed_files, save_noise_files]


for path, types in zip(dir_paths, save_types):
    print("Working on {}".format(path))
    
    # Cleanup unwanted files/folders    
    files = os.listdir(path)
    
    housekeeping(files)
    
#     # First: Randomize data 
#     housekeeping(files)
    randomize_files(files)
    
#     # Calculate Split
    eighty, twenty, files = prepare_split(files)

#     # Generate 1s and 0s as IDs
    idx = generate_ids(eighty, twenty)

#     # Split data 
    train, test = train_test_split(files, idx) 
#     housekeeping(files)
    categories = [train, test]
    types()

Working on ../../../data/unsampled/xzt/processed_points/mixed/
No action taken
No action taken
TRAIN SET: ['group_4532.xyz' 'group_857.xyz' 'group_2338.xyz' 'group_4243.xyz'
 'group_5795.xyz' 'group_2759.xyz' 'group_4679.xyz' 'group_4741.xyz'
 'group_4562.xyz' 'group_691.xyz' 'group_809.xyz' 'group_6378.xyz'
 'group_355.xyz' 'group_281.xyz' 'group_3094.xyz' 'group_1658.xyz'
 'group_1038.xyz' 'group_394.xyz' 'group_4460.xyz' 'group_5116.xyz'
 'group_2211.xyz' 'group_1141.xyz' 'group_6038.xyz' 'group_2367.xyz'
 'group_3025.xyz' 'group_2731.xyz' 'group_1825.xyz' 'group_607.xyz'
 'group_6553.xyz' 'group_5815.xyz' 'group_6564.xyz' 'group_6534.xyz'
 'group_5857.xyz' 'group_5866.xyz' 'group_1488.xyz' 'group_440.xyz'
 'group_5000.xyz' 'group_615.xyz' 'group_3923.xyz' 'group_2185.xyz'
 'group_554.xyz' 'group_1454.xyz' 'group_4417.xyz' 'group_3727.xyz'
 'group_3514.xyz' 'group_5010.xyz' 'group_4735.xyz' 'group_3063.xyz'
 'group_1618.xyz' 'group_2517.xyz' 'group_6084.xyz' 'group_1172.xyz'
 'group

In [2]:
import os

In [3]:
import open3d as o3d
folder = "../../../data/unsampled/xyt/meshes/noise/train/"

In [None]:
avg_points = []
avg_triangles = []

for file in os.listdir(folder):
    mesh = o3d.io.read_triangle_mesh(folder+file)
    avg_points += [len(mesh.vertices)]
    avg_triangles += [len(mesh.triangles)]

In [None]:
sum(avg_points)/len(avg_points)
# avg_triangles 

In [None]:
sum(avg_triangles)/len(avg_triangles)

In [12]:
mesh = o3d.io.read_triangle_mesh(folder+"group_1101_mesh.off")
mesh

geometry::TriangleMesh with 5137 points and 8247 triangles.

In [16]:
def check_properties(mesh):
    mesh.compute_vertex_normals()

    edge_manifold = mesh.is_edge_manifold(allow_boundary_edges=True)
    edge_manifold_boundary = mesh.is_edge_manifold(allow_boundary_edges=False)
    vertex_manifold = mesh.is_vertex_manifold()
    self_intersecting = mesh.is_self_intersecting()
    watertight = mesh.is_watertight()
    orientable = mesh.is_orientable()
    print("Mesh Properties for Event Groups (Training Data): \n")
    print(f"  vertex_manifold:        {vertex_manifold}")
    print(f"  self_intersecting:      {self_intersecting}")
    print(f"  watertight:             {watertight}")
    print(f"  orientable:             {orientable}")

In [17]:
check_properties(mesh)

Mesh Properties for Event Groups (Training Data): 

  vertex_manifold:        False
  self_intersecting:      True
  watertight:             False
  orientable:             False
