# Prepare Data for Train-Test

In [20]:
import pandas as pd
import numpy as np
import os
import random
from random import shuffle
import shutil

In [21]:
def randomize_files(files):
    return shuffle(files)


def prepare_split(files):
    eighty = int(0.8 * len(files))
    twenty = int(len(files) - eighty)
    files = np.array(files)
    
    return eighty, twenty, files


def generate_ids(eighty, twenty):
    idx = np.hstack((np.ones(eighty),
                     np.zeros(twenty)))
    return idx


def train_test_split(files, idx):
    train = files[idx == 1]
    test = files[idx == 0]
    print("TRAIN SET: {0}".format(train))
    print("TEST SET: {0}".format(test))
    return train, test

In [29]:
PATH_MIXED = "../data/meshes/mixed/"
PATH_NOISE = "../data/meshes/noise/"

# -------> Change this!!!!
files = os.listdir(PATH_NOISE)
    
if ".ipynb_checkpoints" in files:
    files.remove(".ipynb_checkpoints")
    
# First: Randomize data 
randomize_files(files)

# Calculate Split
eighty, twenty, files = prepare_split(files)

# Generate 1s and 0s as IDs
idx = generate_ids(eighty, twenty)

# Split data 
train, test = train_test_split(files, idx)

TRAIN SET: ['group_4391_mesh.off' 'group_5009_mesh.off' 'group_5905_mesh.off'
 'group_5922_mesh.off' 'group_3507_mesh.off' 'group_5149_mesh.off'
 'group_3180_mesh.off' 'group_1583_mesh.off' 'group_4796_mesh.off'
 'group_2956_mesh.off' 'group_3704_mesh.off' 'group_762_mesh.off'
 'group_5977_mesh.off' 'group_3355_mesh.off' 'group_5308_mesh.off'
 'group_6432_mesh.off' 'group_1391_mesh.off' 'group_5701_mesh.off'
 'group_5067_mesh.off' 'group_6181_mesh.off' 'group_6396_mesh.off'
 'group_3815_mesh.off' 'group_3222_mesh.off' 'group_3569_mesh.off'
 'group_6333_mesh.off' 'group_437_mesh.off' 'group_5840_mesh.off'
 'group_6048_mesh.off' 'group_5097_mesh.off' 'group_3609_mesh.off'
 'group_3925_mesh.off' 'group_2875_mesh.off' 'group_4989_mesh.off'
 'group_4799_mesh.off' 'group_6589_mesh.off' 'group_1108_mesh.off'
 'group_4463_mesh.off' 'group_3594_mesh.off' 'group_5412_mesh.off'
 'group_5038_mesh.off' 'group_1681_mesh.off' 'group_3123_mesh.off'
 'group_4590_mesh.off' 'group_1151_mesh.off' 'group_4

In [30]:
dir_paths = [PATH_MIXED + "train/", PATH_MIXED + "test/"]
dir_noise_paths = [PATH_NOISE + "train/", PATH_NOISE + "test/"]
categories = [train, test]

In [28]:
# Save MIXED data
for paths, category in zip(dir_paths, categories):
    if not os.path.exists(paths):
        os.makedirs(paths)
        
    if os.path.exists(paths):
        for file in category:
            file_path = PATH_MIXED + file
            print(f'file_path: {file_path}')
            shutil.move(file_path, paths)

file_path: ../data/meshes/mixed/group_5010_mesh.off
file_path: ../data/meshes/mixed/group_2211_mesh.off
file_path: ../data/meshes/mixed/group_231_mesh.off
file_path: ../data/meshes/mixed/group_50_mesh.off
file_path: ../data/meshes/mixed/group_5215_mesh.off
file_path: ../data/meshes/mixed/group_1918_mesh.off
file_path: ../data/meshes/mixed/group_2374_mesh.off
file_path: ../data/meshes/mixed/group_6314_mesh.off
file_path: ../data/meshes/mixed/group_5422_mesh.off
file_path: ../data/meshes/mixed/group_3398_mesh.off
file_path: ../data/meshes/mixed/group_3967_mesh.off
file_path: ../data/meshes/mixed/group_4735_mesh.off
file_path: ../data/meshes/mixed/group_857_mesh.off
file_path: ../data/meshes/mixed/group_4282_mesh.off
file_path: ../data/meshes/mixed/group_1653_mesh.off
file_path: ../data/meshes/mixed/group_5211_mesh.off
file_path: ../data/meshes/mixed/group_5312_mesh.off
file_path: ../data/meshes/mixed/group_3923_mesh.off
file_path: ../data/meshes/mixed/group_5847_mesh.off
file_path: ../da

In [31]:
# Save NOISE data
for paths, category in zip(dir_noise_paths, categories):
#     print(paths, category)
    if not os.path.exists(paths):
        os.makedirs(paths)
        
    if os.path.exists(paths):
        for noise_file in category:
            file_path = PATH_NOISE + noise_file
            print(f'file_path: {file_path}')
            shutil.move(file_path, paths)

file_path: ../data/meshes/noise/group_4391_mesh.off
file_path: ../data/meshes/noise/group_5009_mesh.off
file_path: ../data/meshes/noise/group_5905_mesh.off
file_path: ../data/meshes/noise/group_5922_mesh.off
file_path: ../data/meshes/noise/group_3507_mesh.off
file_path: ../data/meshes/noise/group_5149_mesh.off
file_path: ../data/meshes/noise/group_3180_mesh.off
file_path: ../data/meshes/noise/group_1583_mesh.off
file_path: ../data/meshes/noise/group_4796_mesh.off
file_path: ../data/meshes/noise/group_2956_mesh.off
file_path: ../data/meshes/noise/group_3704_mesh.off
file_path: ../data/meshes/noise/group_762_mesh.off
file_path: ../data/meshes/noise/group_5977_mesh.off
file_path: ../data/meshes/noise/group_3355_mesh.off
file_path: ../data/meshes/noise/group_5308_mesh.off
file_path: ../data/meshes/noise/group_6432_mesh.off
file_path: ../data/meshes/noise/group_1391_mesh.off
file_path: ../data/meshes/noise/group_5701_mesh.off
file_path: ../data/meshes/noise/group_5067_mesh.off
file_path: ..