# Food-101 – Mining Discriminative Components with Random Forests

- https://www.vision.ee.ethz.ch/datasets_extra/food-101/

- dataset description (taken from the URL above)

> We introduce a challenging data set of 101 food categories, with 101'000 images. For each class, 250 manually reviewed test images are provided as well as 750 training images. On purpose, the training images were not cleaned, and thus still contain some amount of noise. This comes mostly in the form of intense colors and sometimes wrong labels. All images were rescaled to have a maximum side length of 512 pixels.

# import modules

In [None]:
import os
import json

import numpy as np
import chainer
from chainer.datasets import LabeledImageDataset
from chainercv.visualizations import vis_image
from matplotlib import pyplot as plt

from ipywidgets import interact

# Observe dataset structure

In [None]:
# specify dataset directory
dataset_dir = os.path.expanduser("~/dataset/food-101/")

In [None]:
!tree - d - L 2 $dataset_dir

# Observe files in "meta"

In [None]:
meta_dir = os.path.join(dataset_dir, "meta")
!ls $meta_dir

In [None]:
class_file = os.path.join(meta_dir, "classes.txt")
!head - n 10 $class_file

In [None]:
# these images is not suited for our purpose
BLACKLIST = [
    "lasagna/3787908",
    "steak/1340977",
    "bread_pudding/1375816"
]

IMG_EXT = ".jpg"


def load_annotations(dataset_dir, mode):
    meta_dir = os.path.join(dataset_dir, "meta")
    class_file = os.path.join(meta_dir, "classes.txt")
    food_names = np.genfromtxt(
        class_file,
        str,
        delimiter="\n"
    )
    idx2name = {food_id: food_name for food_id, food_name in enumerate(food_names)}
    json_name = "train.json" if mode == "train" else "test.json"
    json_file = os.path.join(meta_dir, json_name)
    with open(json_file, 'r') as f:
        food2path = json.load(f)
    pairs = []
    for food_id, food_name in idx2name.items():
        for path in food2path[food_name]:
            if path in BLACKLIST:
                # ignore
                continue
            path = os.path.join(dataset_dir, "images", path+IMG_EXT)
            pairs.append((path, food_id))
    
    name2idx={name:idx for idx,name in idx2name.items()}
    annotations={
        "pairs":pairs,
        "name2idx":name2idx,
        "idx2name":idx2name,
    }
    return annotations


class Food101Dataset(LabeledImageDataset):
    def __init__(self, dataset_dir, mode="train"):
        annotations = load_annotations(dataset_dir, mode)
        self.mode = mode
        super(Food101Dataset, self).__init__(annotations["pairs"])
        self.idx2name = annotations["idx2name"]
        self.name2idx = annotations["name2idx"]
        
def get_food101_dataset(dataset_dir,mode="train"):
    return Food101Dataset(dataset_dir,mode=mode)

# visualize Food-101 Data Set

In [None]:
food_dataset = get_food101_dataset(dataset_dir,mode="train")

def visualize(i):
    example = food_dataset.get_example(i)
    img, food_idx = example
    name=food_dataset.idx2name[int(food_idx)]
    fig = plt.figure()
    ax=fig.add_subplot(111)
    ax.set_title(name)
    vis_image(img,ax)
    
sample=np.random.randint(0,len(food_dataset),size=100)
interact(visualize,i=sample)