In [2]:
from PIL import Image 
import os  
import pickle 
import numpy as np 

In [3]:
dst_path = os.path.join(os.path.abspath(os.path.join(os.path.abspath(os.curdir), os.pardir)), 'data', 'raw')

In [4]:
dst_path

'/media/migbernardo/Data/nova_ims/dl/dl_project/data/raw'

Function that joins all images with the respective labels into a tuple of tensors (img, labels)

- img tensor dimensions: (num_images, 64, 64, 3) 

- labels tensor dimensions: (num_images, 10)

In [5]:
def data_gen(src_path, size):   
    
    num_img = 0 
    train_count = 0 
    val_count = 0 
    test_count = 0
    # save list of labels corresponding to each dir's name
    labels = os.listdir(path=src_path) 
    labels.sort() 
    # go through each lab/dir and add number of images inside to num_img
    for i in labels: 
        num_img += len(os.listdir(path=os.path.join(src_path, i)))
    
    # initialize arrays filled with zeros corresponding to the output dimensions of the images and its labels
    X_train = np.zeros(shape=[int(num_img*0.6),size,size,3])
    y_train = np.zeros(shape=[int(num_img*0.6),len(labels)])  
    X_val = np.zeros(shape=[int(num_img*0.2),size,size,3])
    y_val = np.zeros(shape=[int(num_img*0.2),len(labels)]) 
    X_test = np.zeros(shape=[int(num_img*0.2),size,size,3]) 
    y_test = np.zeros(shape=[int(num_img*0.2),len(labels)])
    
    # go through labels, save the path to its corresponding dir, and save the number of images inside
    for index, label in enumerate(labels): 
        cur_path = os.path.join(src_path, label)  
        n_img = len(os.listdir(path=cur_path))   

        # open each image, convert into array, and add data and labels to the corresponding split arrays
        for i in range(1, n_img + 1): 
            if i <= int(n_img * 0.6):
                train_count += 1
                img = Image.open(os.path.join(cur_path, labels[index] + '_{}.jpg'.format(i))) 
                img_arr = np.array(img)  
                label_arr = np.zeros(len(labels)) 
                np.put(label_arr, index, 1)
                X_train[train_count-1][:size][:size][:size] = img_arr  
                y_train[train_count-1] = label_arr   
                
            elif i <= int(n_img * 0.8):  
                val_count += 1
                img = Image.open(os.path.join(cur_path, labels[index] + '_{}.jpg'.format(i))) 
                img_arr = np.array(img)  
                label_arr = np.zeros(len(labels)) 
                np.put(label_arr, index, 1)
                X_val[val_count-1][:size][:size][:size] = img_arr  
                y_val[val_count-1] = label_arr 
                
            else: 
                test_count += 1
                img = Image.open(os.path.join(cur_path, labels[index] + '_{}.jpg'.format(i))) 
                img_arr = np.array(img)  
                label_arr = np.zeros(len(labels)) 
                np.put(label_arr, index, 1)
                X_test[test_count-1][:size][:size][:size] = img_arr  
                y_test[test_count-1] = label_arr 
    
    # return list of tensor tuples       
    return [(X_train, y_train), (X_val, y_val), (X_test, y_test)]

Test function and load some data points

In [6]:
data = data_gen(dst_path, 64)

In [7]:
data[2][1][0]

array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [8]:
data[1][1][-1]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1.])

In [18]:
data[2][1][-1]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1.])

In [9]:
data[2][0].shape

(5400, 64, 64, 3)

Save and load data in pickle format for later use

In [10]:
with open('X_train.pickle', 'wb') as f: 
    pickle.dump(data[0][0], f) 

In [11]:
with open('y_train.pickle', 'wb') as f: 
    pickle.dump(data[0][1], f) 

In [12]:
with open('X_val.pickle', 'wb') as f: 
    pickle.dump(data[1][0], f) 

In [13]:
with open('y_val.pickle', 'wb') as f: 
    pickle.dump(data[1][1], f) 

In [14]:
with open('X_test.pickle', 'wb') as f: 
    pickle.dump(data[2][0], f) 

In [19]:
with open('y_test.pickle', 'wb') as f: 
    pickle.dump(data[2][1], f)

In [19]:
with open('name', 'rb') as f: 
    var = pickle.load(f)