In [1]:
import pandas as pd 
import numpy as np 
import os

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.preprocessing import MultiLabelBinarizer

In [2]:
categories = [str(x) for x in range(1,101)]
train_path = "UECFOOD100_CROP/train"
test_path =  "UECFOOD100_CROP/test"
multiple_labels_path = "labels/multiple_food.txt"
multiple_food = pd.DataFrame()

mlb = MultiLabelBinarizer(categories)

def build_multiple_food_df():
    with open(multiple_labels_path) as f:
        contents = f.read().split('\n')
        labels = []
        images = []
        for content in contents[1:]:
            label = content.split(" ")[1:]
            label = label[:len(label)-1]
            labels.append(label)
            images.append(content.split(" ")[0])

        multiple_food["image"] = images
        multiple_food["labels"] = labels

def build_category_files(df_path):
    category_files = []
    for category in categories:
        category_path = os.path.join(df_path, str(category))
        category_files = category_files + [os.path.join(str(category), i) for i in os.listdir(category_path) if os.path.isfile(os.path.join(category_path,i))]
    return category_files
        
def build_category_labels(category_files):
    category_labels = []
    for file in category_files:
        number = file.split("/")[1].split(".jpg")[0]
        category = file.split("/")[0]
        multiple = multiple_food[multiple_food['image'] == number]
        labels = []
        if len(multiple) > 0:
            labels = mlb.fit_transform(list(multiple_food[multiple_food['image'] == number]["labels"]))
        else:
            labels = mlb.fit_transform(list([[category]]))
        category_labels.append(labels[0])
    return category_labels
    




In [3]:
build_multiple_food_df()
print("Length of Multiple Food: " + str(len(multiple_food)))

Length of Multiple Food: 1174


In [4]:
# Train Images
train_category_files = build_category_files(train_path)
train_category_labels = build_category_labels(train_category_files)
train_df = pd.DataFrame(train_category_labels, columns=categories)
train_df["filename"] = train_category_files

# Test Images
test_category_files = build_category_files(test_path)
test_category_labels = build_category_labels(test_category_files)
test_df = pd.DataFrame(test_category_labels, columns=categories)
test_df["filename"] = test_category_files

In [5]:
print(train_df.shape)
print(test_df.shape)

(11514, 101)
(2902, 101)


In [6]:
train_df.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,92,93,94,95,96,97,98,99,100,filename
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1/63.jpg
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1/15259.jpg
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1/16036.jpg
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1/14621.jpg
4,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1/77.jpg


In [7]:
test_df.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,92,93,94,95,96,97,98,99,100,filename
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1/5109.jpg
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1/88.jpg
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1/15098.jpg
3,1,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,1/15113.jpg
4,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1/13947.jpg


## Image Loader and Augmentation

In [8]:
datagen=ImageDataGenerator(rescale=1./255.)
test_datagen=ImageDataGenerator(rescale=1./255.)

train_generator=datagen.flow_from_dataframe(
dataframe=train_df,
directory="./UECFOOD100_CROP/train",
x_col="filename",
y_col=categories,
batch_size=32,
seed=42,
shuffle=True,
class_mode="raw",
target_size=(128,128))

Found 11514 validated image filenames.
