# Prepare Data for Train-Test

In [1]:
import pandas as pd
import numpy as np
import os
import random
from random import shuffle
import shutil

In [5]:
def randomize_files(files):
    return shuffle(files)


def prepare_split(files):
    eighty = int(0.8 * len(files))
    twenty = int(len(files) - eighty)
    files = np.array(files)
    
    return eighty, twenty, files


def generate_ids(eighty, twenty):
    idx = np.hstack((np.ones(eighty),
                     np.zeros(twenty)))
    return idx


def train_test_split(files, idx):
    train = files[idx == 1]
    test = files[idx == 0]
    print("TRAIN SET: {0}".format(train))
    print("TEST SET: {0}".format(test))
    return train, test

In [6]:
PATH_MIXED = "../data/meshes/mixed/"
PATH_NOISE = "../data/meshes/noise/"

# -------> Change this!!!!
files = os.listdir(PATH_NOISE)
    
if ".ipynb_checkpoints" in files:
    files.remove(".ipynb_checkpoints")
    
# First: Randomize data 
randomize_files(files)

# Calculate Split
eighty, twenty, files = prepare_split(files)

# Generate 1s and 0s as IDs
idx = generate_ids(eighty, twenty)

# Split data 
train, test = train_test_split(files, idx)

TRAIN SET: ['group_0_mesh.off' 'group_601_mesh.off' 'group_1846_mesh.off'
 'group_3704_mesh.off' 'group_2533_mesh.off' 'group_3609_mesh.off'
 'group_2128_mesh.off' 'group_5550_mesh.off' 'group_5991_mesh.off'
 'group_5701_mesh.off' 'group_5006_mesh.off' 'group_4267_mesh.off'
 'group_3042_mesh.off' 'group_1108_mesh.off' 'group_1667_mesh.off'
 'group_3355_mesh.off' 'group_4274_mesh.off' 'group_1189_mesh.off'
 'group_5097_mesh.off' 'group_4647_mesh.off' 'group_903_mesh.off'
 'group_4444_mesh.off' 'group_4179_mesh.off' 'group_4897_mesh.off'
 'group_5038_mesh.off' 'group_4870_mesh.off' 'group_2093_mesh.off'
 'group_1304_mesh.off' 'group_909_mesh.off' 'train' 'group_449_mesh.off'
 'group_4848_mesh.off' 'group_1481_mesh.off' 'group_2719_mesh.off'
 'group_5337_mesh.off' 'group_4874_mesh.off' 'group_6427_mesh.off'
 'group_1016_mesh.off' 'group_3594_mesh.off' 'test' 'group_3799_mesh.off']
TEST SET: ['group_1955_mesh.off' 'group_5941_mesh.off' 'group_3763_mesh.off'
 'group_4571_mesh.off' 'group_35

In [7]:
for file in train:
    print(file)

group_0_mesh.off
group_601_mesh.off
group_1846_mesh.off
group_3704_mesh.off
group_2533_mesh.off
group_3609_mesh.off
group_2128_mesh.off
group_5550_mesh.off
group_5991_mesh.off
group_5701_mesh.off
group_5006_mesh.off
group_4267_mesh.off
group_3042_mesh.off
group_1108_mesh.off
group_1667_mesh.off
group_3355_mesh.off
group_4274_mesh.off
group_1189_mesh.off
group_5097_mesh.off
group_4647_mesh.off
group_903_mesh.off
group_4444_mesh.off
group_4179_mesh.off
group_4897_mesh.off
group_5038_mesh.off
group_4870_mesh.off
group_2093_mesh.off
group_1304_mesh.off
group_909_mesh.off
train
group_449_mesh.off
group_4848_mesh.off
group_1481_mesh.off
group_2719_mesh.off
group_5337_mesh.off
group_4874_mesh.off
group_6427_mesh.off
group_1016_mesh.off
group_3594_mesh.off
test
group_3799_mesh.off


In [8]:
dir_paths = [PATH_MIXED + "train/", PATH_MIXED + "test/"]
dir_noise_paths = [PATH_NOISE + "train/", PATH_NOISE + "test/"]
categories = [train, test]

In [None]:
# Save MIXED data
for paths, category in zip(dir_paths, categories):
    if not os.path.exists(paths):
        os.makedirs(paths)
        
    if os.path.exists(paths):
        for file in category:
            file_path = PATH_MIXED + file
            print(f'file_path: {file_path}')
            shutil.move(file_path, paths)

In [11]:
# Save NOISE data
for paths, category in zip(dir_noise_paths, categories):
#     print(paths, category)
    if not os.path.exists(paths):
        os.makedirs(paths)
        
    if os.path.exists(paths):
        for noise_file in category:
            file_path = PATH_NOISE + noise_file
            print(f'file_path: {file_path}')
            shutil.move(file_path, paths)

file_path: ../data/meshes/noise/group_0_mesh.off
file_path: ../data/meshes/noise/group_601_mesh.off
file_path: ../data/meshes/noise/group_1846_mesh.off
file_path: ../data/meshes/noise/group_3704_mesh.off
file_path: ../data/meshes/noise/group_2533_mesh.off
file_path: ../data/meshes/noise/group_3609_mesh.off
file_path: ../data/meshes/noise/group_2128_mesh.off
file_path: ../data/meshes/noise/group_5550_mesh.off
file_path: ../data/meshes/noise/group_5991_mesh.off
file_path: ../data/meshes/noise/group_5701_mesh.off
file_path: ../data/meshes/noise/group_5006_mesh.off
file_path: ../data/meshes/noise/group_4267_mesh.off
file_path: ../data/meshes/noise/group_3042_mesh.off
file_path: ../data/meshes/noise/group_1108_mesh.off
file_path: ../data/meshes/noise/group_1667_mesh.off
file_path: ../data/meshes/noise/group_3355_mesh.off
file_path: ../data/meshes/noise/group_4274_mesh.off
file_path: ../data/meshes/noise/group_1189_mesh.off
file_path: ../data/meshes/noise/group_5097_mesh.off
file_path: ../da