# Prepare Data for Train-Test

In [25]:
import pandas as pd
import numpy as np
import os
import random
from random import shuffle
import shutil

In [26]:
def randomize_files(files):
    return shuffle(files)


def prepare_split(files):
    eighty = int(0.8 * len(files))
    twenty = int(len(files) - eighty)
    files = np.array(files)
    
    return eighty, twenty, files


def generate_ids(eighty, twenty):
    idx = np.hstack((np.ones(eighty),
                     np.zeros(twenty)))
    return idx


def train_test_split(files, idx):
    train = files[idx == 1]
    test = files[idx == 0]
    print("TRAIN SET: {0}".format(train))
    print("TEST SET: {0}".format(test))
    return train, test

In [27]:
PATH_MIXED = "../data/points/mixed/"
PATH_NOISE = "../data/points/noise/"

# -------> Change this!!!!
files = os.listdir(PATH_NOISE)
    
if ".ipynb_checkpoints" in files:
    files.remove(".ipynb_checkpoints")
    
# First: Randomize data 
randomize_files(files)

# Calculate Split
eighty, twenty, files = prepare_split(files)

# Generate 1s and 0s as IDs
idx = generate_ids(eighty, twenty)

# Split data 
train, test = train_test_split(files, idx)

TRAIN SET: ['group_463.xyz' 'group_137.xyz' 'group_364.xyz' 'group_291.xyz'
 'group_307.xyz' 'group_106.xyz' 'group_62.xyz' 'group_199.xyz'
 'group_48.xyz' 'group_54.xyz' 'group_91.xyz' 'group_196.xyz'
 'group_363.xyz' 'group_155.xyz' 'group_8.xyz' 'group_122.xyz'
 'group_304.xyz' 'group_173.xyz' 'group_335.xyz' 'group_126.xyz'
 'group_482.xyz' 'group_232.xyz' 'group_286.xyz' 'group_208.xyz'
 'group_127.xyz' 'group_119.xyz' 'group_330.xyz' 'group_63.xyz'
 'group_170.xyz' 'group_473.xyz' 'group_19.xyz' 'group_479.xyz'
 'group_213.xyz' 'group_168.xyz' 'group_0.xyz' 'group_424.xyz'
 'group_282.xyz' 'group_446.xyz' 'group_315.xyz' 'group_367.xyz'
 'group_314.xyz' 'group_387.xyz' 'group_172.xyz' 'group_302.xyz'
 'group_80.xyz' 'group_417.xyz' 'group_353.xyz' 'group_96.xyz'
 'group_325.xyz' 'group_293.xyz' 'group_239.xyz' 'group_405.xyz'
 'group_194.xyz' 'group_346.xyz' 'group_262.xyz' 'group_273.xyz'
 'group_109.xyz' 'group_342.xyz' 'group_265.xyz' 'group_114.xyz'
 'group_336.xyz' 'group_11

In [28]:
dir_paths = [PATH_MIXED + "train/", PATH_MIXED + "test/"]
dir_noise_paths = [PATH_NOISE + "train/", PATH_NOISE + "test/"]
categories = [train, test]

In [22]:
# Save MIXED data
for paths, category in zip(dir_paths, categories):
    if not os.path.exists(paths):
        os.makedirs(paths)
        
    if os.path.exists(paths):
        for file in category:
            file_path = PATH_MIXED + file
            print(f'file_path: {file_path}')
            shutil.move(file_path, paths)

file_path: ../data/points/mixed/group_334.xyz
file_path: ../data/points/mixed/group_66.xyz
file_path: ../data/points/mixed/group_171.xyz
file_path: ../data/points/mixed/group_214.xyz
file_path: ../data/points/mixed/group_222.xyz
file_path: ../data/points/mixed/group_11.xyz
file_path: ../data/points/mixed/group_85.xyz
file_path: ../data/points/mixed/group_117.xyz
file_path: ../data/points/mixed/group_236.xyz
file_path: ../data/points/mixed/group_25.xyz
file_path: ../data/points/mixed/group_72.xyz
file_path: ../data/points/mixed/group_34.xyz
file_path: ../data/points/mixed/group_280.xyz
file_path: ../data/points/mixed/group_156.xyz
file_path: ../data/points/mixed/group_79.xyz
file_path: ../data/points/mixed/group_269.xyz
file_path: ../data/points/mixed/group_211.xyz
file_path: ../data/points/mixed/group_74.xyz
file_path: ../data/points/mixed/group_121.xyz
file_path: ../data/points/mixed/group_322.xyz
file_path: ../data/points/mixed/group_323.xyz
file_path: ../data/points/mixed/group_188.

file_path: ../data/points/mixed/group_33.xyz
file_path: ../data/points/mixed/group_191.xyz
file_path: ../data/points/mixed/group_289.xyz
file_path: ../data/points/mixed/group_136.xyz
file_path: ../data/points/mixed/group_308.xyz
file_path: ../data/points/mixed/group_22.xyz
file_path: ../data/points/mixed/group_59.xyz
file_path: ../data/points/mixed/group_200.xyz
file_path: ../data/points/mixed/group_195.xyz
file_path: ../data/points/mixed/group_225.xyz
file_path: ../data/points/mixed/group_76.xyz
file_path: ../data/points/mixed/group_7.xyz
file_path: ../data/points/mixed/group_242.xyz
file_path: ../data/points/mixed/group_163.xyz
file_path: ../data/points/mixed/group_132.xyz
file_path: ../data/points/mixed/group_203.xyz
file_path: ../data/points/mixed/group_5.xyz
file_path: ../data/points/mixed/group_240.xyz
file_path: ../data/points/mixed/group_16.xyz
file_path: ../data/points/mixed/group_230.xyz


In [29]:
# Save NOISE data
for paths, category in zip(dir_noise_paths, categories):
#     print(paths, category)
    if not os.path.exists(paths):
        os.makedirs(paths)
        
    if os.path.exists(paths):
        for noise_file in category:
            file_path = PATH_NOISE + noise_file
            print(f'file_path: {file_path}')
            shutil.move(file_path, paths)

file_path: ../data/points/noise/group_463.xyz
file_path: ../data/points/noise/group_137.xyz
file_path: ../data/points/noise/group_364.xyz
file_path: ../data/points/noise/group_291.xyz
file_path: ../data/points/noise/group_307.xyz
file_path: ../data/points/noise/group_106.xyz
file_path: ../data/points/noise/group_62.xyz
file_path: ../data/points/noise/group_199.xyz
file_path: ../data/points/noise/group_48.xyz
file_path: ../data/points/noise/group_54.xyz
file_path: ../data/points/noise/group_91.xyz
file_path: ../data/points/noise/group_196.xyz
file_path: ../data/points/noise/group_363.xyz
file_path: ../data/points/noise/group_155.xyz
file_path: ../data/points/noise/group_8.xyz
file_path: ../data/points/noise/group_122.xyz
file_path: ../data/points/noise/group_304.xyz
file_path: ../data/points/noise/group_173.xyz
file_path: ../data/points/noise/group_335.xyz
file_path: ../data/points/noise/group_126.xyz
file_path: ../data/points/noise/group_482.xyz
file_path: ../data/points/noise/group_23