### food-101 Data Generation

The food-101 dataset was downloaded and preprocessed (images were padded and resized) by running the algorithms found in https://github.com/neurodata/LLF_tidy_images. This notebook serves to take those processed images and convert them into numpy arrays that can be read and used by the progressive learning algorithms.

In [1]:
# Download all necessary packages
import numpy as np
import os
import matplotlib.pyplot as plt

In [2]:
# Load data set
data_dir = "food-101_dataset/images/"
foods_sorted = sorted(os.listdir(data_dir))

Start reading the image data into numpy arrays. Only the first 100 out of the 101 sorted food classes will be used in order to make it easier to split up the samples into tasks later on.

In [3]:
# Initialize data_x1 with the first image in the first class, then concatenate to acquire all images from the first class
food_class = os.listdir(os.path.join(data_dir,foods_sorted[0]))
data_x1 = [plt.imread(os.path.join(data_dir, foods_sorted[0], food_class[0]))]

for i in range(1,1000):
        data_x1 = np.concatenate([data_x1, [(plt.imread(os.path.join(data_dir, foods_sorted[0], food_class[i])))]])

print(data_x1.shape)

(1000, 32, 32, 3)


In [4]:
# Add to the data_x1 array initialized in the previous cell block until it contains all images from the first 10 classes
# Concatenating more than 10000 images per batch increases the run time by a lot
for j in range(1,10):
    food_class = os.listdir(os.path.join(data_dir,foods_sorted[j]))
    for i in range(0,1000):
        data_x1 = np.concatenate([data_x1, [(plt.imread(os.path.join(data_dir, foods_sorted[j], food_class[i])))]])
        
print(data_x1.shape)

(10000, 32, 32, 3)


This process of initializing each x data array with some images and then concatenating to get the next batch of 10000 images is repeated 10 times, resulting in 10 numpy arrays each containing all the images from 10 of the food-101 classes.

In [5]:
food_class = os.listdir(os.path.join(data_dir,foods_sorted[10]))
data_x2 = [plt.imread(os.path.join(data_dir, foods_sorted[10], food_class[0]))]

for i in range(1,1000):
        data_x2 = np.concatenate([data_x2, [(plt.imread(os.path.join(data_dir, foods_sorted[10], food_class[i])))]])

print(data_x2.shape)

(1000, 32, 32, 3)


In [6]:
for j in range(11,20):
    food_class = os.listdir(os.path.join(data_dir,foods_sorted[j]))
    for i in range(0,1000):
        data_x2 = np.concatenate([data_x2, [(plt.imread(os.path.join(data_dir, foods_sorted[j], food_class[i])))]])
        
print(data_x2.shape)

(10000, 32, 32, 3)


In [7]:
food_class = os.listdir(os.path.join(data_dir,foods_sorted[20]))
data_x3 = [plt.imread(os.path.join(data_dir, foods_sorted[20], food_class[0]))]

for i in range(1,1000):
        data_x3 = np.concatenate([data_x3, [(plt.imread(os.path.join(data_dir, foods_sorted[20], food_class[i])))]])

print(data_x3.shape)

(1000, 32, 32, 3)


In [8]:
for j in range(21,30):
    food_class = os.listdir(os.path.join(data_dir,foods_sorted[j]))
    for i in range(0,1000):
        data_x3 = np.concatenate([data_x3, [(plt.imread(os.path.join(data_dir, foods_sorted[j], food_class[i])))]])
        
print(data_x3.shape)

(10000, 32, 32, 3)


In [9]:
food_class = os.listdir(os.path.join(data_dir,foods_sorted[30]))
data_x4 = [plt.imread(os.path.join(data_dir, foods_sorted[30], food_class[0]))]

for i in range(1,1000):
        data_x4 = np.concatenate([data_x4, [(plt.imread(os.path.join(data_dir, foods_sorted[30], food_class[i])))]])

print(data_x4.shape)

(1000, 32, 32, 3)


In [10]:
for j in range(31,40):
    food_class = os.listdir(os.path.join(data_dir,foods_sorted[j]))
    for i in range(0,1000):
        data_x4 = np.concatenate([data_x4, [(plt.imread(os.path.join(data_dir, foods_sorted[j], food_class[i])))]])
        
print(data_x4.shape)

(10000, 32, 32, 3)


In [11]:
food_class = os.listdir(os.path.join(data_dir,foods_sorted[40]))
data_x5 = [plt.imread(os.path.join(data_dir, foods_sorted[40], food_class[0]))]

for i in range(1,1000):
        data_x5 = np.concatenate([data_x5, [(plt.imread(os.path.join(data_dir, foods_sorted[40], food_class[i])))]])

print(data_x5.shape)

(1000, 32, 32, 3)


In [12]:
for j in range(41,50):
    food_class = os.listdir(os.path.join(data_dir,foods_sorted[j]))
    for i in range(0,1000):
        data_x5 = np.concatenate([data_x5, [(plt.imread(os.path.join(data_dir, foods_sorted[j], food_class[i])))]])
        
print(data_x5.shape)

(10000, 32, 32, 3)


In [13]:
food_class = os.listdir(os.path.join(data_dir,foods_sorted[50]))
data_x6 = [plt.imread(os.path.join(data_dir, foods_sorted[50], food_class[0]))]

for i in range(1,1000):
        data_x6 = np.concatenate([data_x6, [(plt.imread(os.path.join(data_dir, foods_sorted[50], food_class[i])))]])

print(data_x6.shape)

(1000, 32, 32, 3)


In [14]:
for j in range(51,60):
    food_class = os.listdir(os.path.join(data_dir,foods_sorted[j]))
    for i in range(0,1000):
        data_x6 = np.concatenate([data_x6, [(plt.imread(os.path.join(data_dir, foods_sorted[j], food_class[i])))]])
        
print(data_x6.shape)

(10000, 32, 32, 3)


In [15]:
food_class = os.listdir(os.path.join(data_dir,foods_sorted[60]))
data_x7 = [plt.imread(os.path.join(data_dir, foods_sorted[60], food_class[0]))]

for i in range(1,1000):
        data_x7 = np.concatenate([data_x7, [(plt.imread(os.path.join(data_dir, foods_sorted[60], food_class[i])))]])

print(data_x7.shape)

(1000, 32, 32, 3)


In [16]:
for j in range(61,70):
    food_class = os.listdir(os.path.join(data_dir,foods_sorted[j]))
    for i in range(0,1000):
        data_x7 = np.concatenate([data_x7, [(plt.imread(os.path.join(data_dir, foods_sorted[j], food_class[i])))]])
        
print(data_x7.shape)

(10000, 32, 32, 3)


In [17]:
food_class = os.listdir(os.path.join(data_dir,foods_sorted[70]))
data_x8 = [plt.imread(os.path.join(data_dir, foods_sorted[70], food_class[0]))]

for i in range(1,1000):
        data_x8 = np.concatenate([data_x8, [(plt.imread(os.path.join(data_dir, foods_sorted[70], food_class[i])))]])

print(data_x8.shape)

(1000, 32, 32, 3)


In [18]:
for j in range(71,80):
    food_class = os.listdir(os.path.join(data_dir,foods_sorted[j]))
    for i in range(0,1000):
        data_x8 = np.concatenate([data_x8, [(plt.imread(os.path.join(data_dir, foods_sorted[j], food_class[i])))]])
        
print(data_x8.shape)

(10000, 32, 32, 3)


In [19]:
food_class = os.listdir(os.path.join(data_dir,foods_sorted[80]))
data_x9 = [plt.imread(os.path.join(data_dir, foods_sorted[80], food_class[0]))]

for i in range(1,1000):
        data_x9 = np.concatenate([data_x9, [(plt.imread(os.path.join(data_dir, foods_sorted[80], food_class[i])))]])

print(data_x9.shape)

(1000, 32, 32, 3)


In [20]:
for j in range(81,90):
    food_class = os.listdir(os.path.join(data_dir,foods_sorted[j]))
    for i in range(0,1000):
        data_x9 = np.concatenate([data_x9, [(plt.imread(os.path.join(data_dir, foods_sorted[j], food_class[i])))]])
        
print(data_x9.shape)

(10000, 32, 32, 3)


In [21]:
food_class = os.listdir(os.path.join(data_dir,foods_sorted[90]))
data_x10 = [plt.imread(os.path.join(data_dir, foods_sorted[90], food_class[0]))]

for i in range(1,1000):
        data_x10 = np.concatenate([data_x10, [(plt.imread(os.path.join(data_dir, foods_sorted[90], food_class[i])))]])

print(data_x10.shape)

(1000, 32, 32, 3)


In [22]:
for j in range(91,100):
    food_class = os.listdir(os.path.join(data_dir,foods_sorted[j]))
    for i in range(0,1000):
        data_x10 = np.concatenate([data_x10, [(plt.imread(os.path.join(data_dir, foods_sorted[j], food_class[i])))]])
        
print(data_x10.shape)

(10000, 32, 32, 3)


In [23]:
# Combine individual numpy arrays for x data for each batch of 10 classes all into one big numpy array
data_x = np.concatenate([data_x1, data_x2, data_x3])
data_x = np.concatenate([data_x, data_x4, data_x5])
data_x = np.concatenate([data_x, data_x6, data_x7])
data_x = np.concatenate([data_x, data_x8, data_x9])
data_x = np.concatenate([data_x, data_x10])

print(data_x.shape)

(100000, 32, 32, 3)


In [24]:
# Save x data to 3 separate compressed numpy files since GitHub does not allow for the storage of files over 100 MB
np.savez_compressed('food_101_array_data_x_1.npz', data_x[0:34000])
np.savez_compressed('food_101_array_data_x_2.npz', data_x[34000:68000])
np.savez_compressed('food_101_array_data_x_3.npz', data_x[68000:100000])

In [25]:
# Create y data containing 100 class labels
data_y = np.full((1000), 0, dtype=int)
for i in range(1,100):
    data_y = np.concatenate([data_y, np.full((1000), i, dtype=int)])

print(data_y.shape)

(100000,)


In [26]:
# Save y data to a compressed numpy file
np.savez_compressed('food_101_array_data_y.npz', data_y)