In [37]:
# import all the necessary libraries
# make sure that you have installed all of them
# if not, please have a look at the README.md and the requirements.txt file

import os
import numpy as np
from matplotlib import pyplot as plt
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Layer, Conv2D, Dense, MaxPool2D, Input, Flatten
import tensorflow as tf
from importlib import reload
from PIL import Image

# Preprocessing

In [38]:
# set the amount of classes as well as support and query pictures
n_classes = 6
n_support = 6
n_query = 8
n_images = n_support + n_query + 10 

# define the class names for the 6 classes and add them to a list 
class_names = ['carpets_n03998194', 'dogs_n02089867', 'ipods_n03584254', 'oranges_n07747607', 'pans_n04596742', 'toucans_n01843383']
CLASS_LIST = []

# create a list of class directories based on class names
for i in range(0,n_classes): 
    CLASS_LIST.append(os.path.join("data_10classes", class_names[i]))
print(CLASS_LIST)

['data_10classes/carpets_n03998194', 'data_10classes/dogs_n02089867', 'data_10classes/ipods_n03584254', 'data_10classes/oranges_n07747607', 'data_10classes/pans_n04596742', 'data_10classes/toucans_n01843383']


In [39]:
# take n_images images from all of the previously specified classes
classes = []
for i in range(0,n_classes):
    classes.append(tf.data.Dataset.list_files(CLASS_LIST[i] + "/*.JPEG").take(n_images))

In [40]:
# the image_net data contains some images with only one channel (leading to problems with the training)
# we filter those out here

# define the size of the pictures (here: 105 x 105 pixels)
IMG_SIZE = 105

# define the preprocessing
def preprocess(file_path: str, img_size: int = IMG_SIZE) -> np.ndarray:
    # 1. read the file
    byte_img = tf.io.read_file(file_path)
    # 2. decode the jpeg
    img = tf.io.decode_jpeg(byte_img)
    # 3. resize the picture to the size that has been set on top of this cell
    img = tf.image.resize(img, [img_size, img_size])
    # 4. normalize the values so that they are between 0 and 1
    img = img / 255.0 

    return img

# set up a list that contains only images with 3 color channels
filtered_classes = []
# count the number of pictures that are sorted out
faulty = 0

for class_i in classes:
    # call the preprocessing that is defined above
    class_i = class_i.map(preprocess)
    class_list = list(class_i)
    # select those array that have the correct shape of (105, 105, 3)
    filtered_list = [array for array in class_list if array.shape == (105, 105, 3)]
    # add them to the filtered list
    filtered_classes.append(filtered_list)
    # make sure that faulty is the maximum over all classes
    if (n_images - len(filtered_list)) > faulty: 
        faulty = (n_images - len(filtered_list))

# calculate the new amount of images and print it
n_images_new = n_images - faulty
print(n_images_new)

# select n_images_new pictures from each filtered class
# example: if one class contains 98 images after filtering and another one 96
# make sure that all classes contain 96 images in the end as it needs to be the same for all classes
class_res = []
for filtered_class in filtered_classes:
    filtered_class = filtered_class[0:n_images_new]
    class_res.append(np.concatenate(filtered_class).reshape(n_images_new, IMG_SIZE, IMG_SIZE, 3))

2024-01-19 16:17:04.093340: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [600]
	 [[{{node Placeholder/_0}}]]
2024-01-19 16:17:04.093558: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [600]
	 [[{{node Placeholder/_0}}]]
2024-01-19 16:17:04.137867: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [600]
	

24


2024-01-19 16:17:04.304524: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [600]
	 [[{{node Placeholder/_0}}]]
2024-01-19 16:17:04.304707: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [600]
	 [[{{node Placeholder/_0}}]]


In [41]:
# define and print the names that are later used to create the folders containing the query and support images
query_names = []
for j in range(0, n_query): 
    query_names.append('query_' + str(j))

support_names = []
for k in range(0, n_support): 
    support_names.append('support_' + str(k))
    
print(query_names)
print(support_names)

['query_0', 'query_1', 'query_2', 'query_3', 'query_4', 'query_5', 'query_6', 'query_7']
['support_0', 'support_1', 'support_2', 'support_3', 'support_4', 'support_5']


105

In [43]:
# define the output folders and save the images inside

for i in range(0, n_classes):

    # to save the query images
    for j in range(0, n_query):
        # define the name of the folder
        folder_ij = os.path.join('preprocessed_data6', class_names[i], query_names[j])
        # create the folder with the name from above
        os.makedirs(folder_ij, exist_ok=True)
        # define an array for the image that shall be saved in the folder
        image_array = class_res[i][j]
        # scale pixel values to the range [0, 255]
        image = Image.fromarray((image_array * 255).astype(np.uint8))
        # save the image to the path
        image.save(os.path.join(folder_ij, f'{class_names[i]}_{i}_{j}.jpeg'))
        
    # to save the support images
    # works equivalent to the query images
    for k in range(0, n_support):
        folder_ik = os.path.join('preprocessed_data6', class_names[i], support_names[k])
        os.makedirs(folder_ik, exist_ok=True)
        image_array = class_res[i][n_query+k]
        image = Image.fromarray((image_array * 255).astype(np.uint8))
        image.save(os.path.join(folder_ik, f'{class_names[i]}_{i}_{k}.jpeg'))