In [1]:
import pib
print("PIB version: ", pib.__version__)
import os 
import numpy as np

PIB version:  1.0


In [2]:
# http://www.iro.umontreal.ca/~lisa/twiki/bin/view.cgi/Public/DeepVsShallowComparisonICML2007

# data_utils.py
def amat_to_np(amat_file):
    data = []
    with open(amat_file, 'r') as f:
        line = f.readline()
        while line:
            data.append(list(map(float, line.split())) )
            line = f.readline()
    data = np.array(data, dtype=np.float32)
    return data[:, :-1], data[:,-1]


def list_all_files(dir_name):
    """Lists all files that have the current directory as their root. 
    """
    all_files = []
    for entry in os.listdir(dir_name): 
        full_path_entry = os.path.join(dir_name, entry)
        if os.path.isdir(full_path_entry):
            all_files = all_files + list_all_files(full_path_entry) # Merge two lists
        else:
            all_files.append(full_path_entry) # Appends an element to a list
    return all_files

def data_prepro(data_path, target_path, verbose=True):
    """Processes mnist-variant amat into mnist-variant numpy arrays. 
    
    It preserves the same directory tree structure of `data_path` to `target_path`. 
    
    # Arguments
        data_path: A full path to the mnist-variant data directory which is organized after `./download.sh`. 
        target_path: A full path to a directory where the numpy arrays will be saved. 
    """
    variants = os.listdir(data_path)
    variants = [ variant for variant in variants if os.path.isdir(os.path.join(data_path, variant)) ] 
    for variant in variants:
        if verbose:
            print(variant)
        # Make dir within target_path
        variant_target = os.path.join(target_path,  variant)
        if not os.path.exists(variant_target):
            os.makedirs(variant_target)
        variant_files = list_all_files(os.path.join(data_path, variant))
        
        for amat in variant_files:
            postfix = amat.split('/')[-1].split('.')[0].split('_')[-1]
            if postfix == 'valid':
                postfix = 'train'
            x_name = os.path.join(variant_target, 'X_{}.npy'.format(postfix))
            y_name = os.path.join(variant_target, 'Y_{}.npy'.format(postfix))
            if os.path.exists(x_name) and  os.path.exists(y_name):
                continue 
            x,y = amat_to_np(amat)
            np.save(x_name, x)
            np.save(y_name, y)

In [3]:
data_path= 'data/mnist-variant/amat'
target_path = 'data/mnist-variant/np'
data_prepro(data_path, target_path)

mnist_rotation_back_image_new
mnist_rotation_new
mnist_background_random
rectangles
mnist
convex
mnist_background_images
rectangles_images
