In [16]:
import numpy as np
import pandas as pd
import shutil
import random

import os, sys

import generate_image_lists as gil

In [59]:
hard_label_dict = {
                     'arch_1x6x2':6,'brick_2x2_round':2,'brick_corner_1x2x2':2,'corner_brick_2x2_45_outside':2,
                     'corner_plate_1x2x2':2,'plate_1x1_round':1,'plate_1x1_w_clip_vertical':1,'plate_1x2_w_1_knob':2,
                     'roof_tile_1x2_45':2,'roof_tile_1x2_inv':2,'roof_tile_1x3_25':3,'roof_tile_1x3_25_inv':3,'roof_tile_2x2_45':2,
                     'technic_brick_1x1_w_hole':1,'technic_brick_1x2_w_hole':2,
                  }

def check_dir( inp_dir ):
    if not os.path.exists(inp_dir):
        os.makedirs(inp_dir)

def train_test_split(
                        img_path,
                        label,
                        frac_train
                    ):
    
    rand_val = random.uniform( 0.0, 1.0 )

    if ( rand_val < frac_train ):
        out_path = _TRAIN_DIR + label + '/' + WHITE_DIRTY + img_path.split('/')[-1]
    else:
        out_path = _TEST_DIR  + label + '/' + WHITE_DIRTY + img_path.split('/')[-1]
    shutil.copy( img_path, out_path )

# Copies images into directories w/class labels
# Will split into train/test based on input
def gen_height_dirs( 
                        inp_img_list,
                        inp_labels,
                        train_frac,
                   ):
    

    # Make sure directories exist
    for     tt in [train_dir, test_dir]:
        for dd in ['height_brick','height_plate','height_other']:
            check_dir( tt + dd + '/' )
    

    # For each image, test to see which 
    #  label it matches
    for inp_img in inp_img_list:
        
        true_label = inp_labels[inp_img]
        
        # Find which label matches the current dir
        # If can't find anything, other
        for label in ['brick','plate','other']:
            if ( 
                    ( label in true_label ) or
                    ( label == 'other'    )
               ):
                train_test_split( inp_img, 'height_'+label, train_frac )
                break
                
# 
def gen_shape_dirs( 
                        inp_img_list,
                        inp_labels,
                        train_frac,
                  ):
    
    # Make sure directories exist
    for     tt in [train_dir, test_dir]:
        for dd in ['shape_corner','shape_round','shape_square']:
            check_dir( tt + dd + '/' )
    

    # For each image, test to see which 
    #  label it matches
    for inp_img in inp_img_list:
        
        true_label = inp_labels[inp_img]
        
        # Find which label matches the current dir
        # If can't find anything, other
        for label in ['corner','round','square']:
            if ( 
                    ( label in true_label ) or
                    ( label == 'square'    )
               ):
                train_test_split( inp_img, 'shape_'+label, train_frac )
                break

    
# 
def gen_short_dirs( 
                        inp_img_list,
                        inp_labels,
                        train_frac,
                  ):
    
    # Make sure directories exist
    for     tt in [train_dir, test_dir]:
        for dd in ['short_1','short_2','short_4','short_6','short_8']:
            check_dir( tt + dd + '/' )
    

    # For each image, test to see which 
    #  label it matches
    for inp_img in inp_img_list:
        
        true_label = inp_labels[inp_img]
        
        # Find which label matches the current dir
        # If can't find anything, other
        for label in ['1','2','4','6','8']:
            if ( ('_'+label+'x') in true_label ):
                train_test_split( inp_img, 'short_'+label, train_frac )
                break
                

# 
def gen_long_dirs( 
                        inp_img_list,
                        inp_labels,
                        train_frac,
                  ):
    
    # Make sure directories exist
    for     tt in [train_dir, test_dir]:
        for dd in ['1','2','3','4','6','8','10','12']:
            check_dir( tt + 'long_' + dd + '/' )
    

    # For each image, test to see which 
    #  label it matches
    for inp_img in inp_img_list:
        
        true_label = inp_labels[inp_img]
        # Find which label matches the current dir
        # If can't find anything, other
        for label in ['10','12','1','2','3','4','6','8']:
            if ( 
                    ( ( 'x'+label ) in true_label ) and
                    ( true_label not in hard_label_dict)
               ):
                train_test_split( inp_img, 'long_'+label, train_frac )
                break
            elif ( true_label in hard_label_dict ):
                train_test_split( inp_img, 'long_'+str(hard_label_dict[true_label]), train_frac )
                break
                
# Pass an input dataframe with img_path, labels in dict for 
#  img_path values, and will return labels 1 hot encoded
def gen_all_labels( 
                        inp_df, 
                        inp_labels, 
                        frac
                  ):
    
    gen_height_dirs( inp_df, inp_labels, frac )
    gen_shape_dirs ( inp_df, inp_labels, frac )
    gen_short_dirs ( inp_df, inp_labels, frac )
    gen_long_dirs  ( inp_df, inp_labels, frac )

In [41]:
_OUT_DIR   = '/home/sean/Insight/legos/classification/'
_TRAIN_DIR = _OUT_DIR + 'data/train/'
_TEST_DIR  = _OUT_DIR + 'data/test/'
train_fraction = 0.8

In [60]:
img_list, img_labels = gil.get_white_images_labels()
WHITE_DIRTY = 'white_'
gen_all_labels( img_list, img_labels, train_fraction )

In [61]:
img_list, img_labels = gil.get_dirty_images_labels()
WHITE_DIRTY = 'dirty_'
gen_all_labels( img_list, img_labels, train_fraction )