# Initialization
As first step we mount the Google Drive directory. Then, in order to speed up the overall computation, we copy and unzip the **food** and the **distractor** datasets directly into Colab, in one single directory together. 

 

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
!pip install import-ipynb

Collecting import-ipynb
  Downloading https://files.pythonhosted.org/packages/63/35/495e0021bfdcc924c7cdec4e9fbb87c88dd03b9b9b22419444dc370c8a45/import-ipynb-0.1.3.tar.gz
Building wheels for collected packages: import-ipynb
  Building wheel for import-ipynb (setup.py) ... [?25l[?25hdone
  Created wheel for import-ipynb: filename=import_ipynb-0.1.3-cp36-none-any.whl size=2976 sha256=2ac7a080bb7b28cf908523f8e9ecaa3f1170fa0e201d672ec0250b45c048cc0d
  Stored in directory: /root/.cache/pip/wheels/b4/7b/e9/a3a6e496115dffdb4e3085d0ae39ffe8a814eacc44bbf494b5
Successfully built import-ipynb
Installing collected packages: import-ipynb
Successfully installed import-ipynb-0.1.3


In [None]:
# Unzipping dataset
!cp '/content/gdrive/MyDrive/[MIRCV]FoodWebSearch/deployment/food_dis102.zip' .
!unzip -q food_dis102.zip
!rm food_dis102.zip

In [None]:
%cd "/content/gdrive/MyDrive/[MIRCV]FoodWebSearch/notebooks"

/content/gdrive/MyDrive/[MIRCV]FoodWebSearch/notebooks


In [None]:
import import_ipynb
from utils import ids_and_labels_from_file
from utils import preprocess
import tensorflow as tf
import numpy as np
from os import listdir
import sklearn
from sklearn import preprocessing

SETS_DIR = '/content/food-101/'

BATCH_SIZE = 256

importing Jupyter notebook from utils.ipynb


To speed up the application we choose to use the GPU provided by the machine.

In [None]:
# check hardware acceleration
device_name = tf.test.gpu_device_name()
print('Found GPU: ' , device_name)

Found GPU:  /device:GPU:0


# Datasets

For the dataset we collect all the files. We use as **shuffle** parameters "False" in order to mantain the same order of the files respect to the original directory. As expected, the aggregate dataset has 126000 files, divided in 102 lables (101 for the original food dataset, and one for the distractor one).

In [None]:
dataset = tf.keras.preprocessing.image_dataset_from_directory(
    SETS_DIR,
    seed=123,
    shuffle=False,
    image_size=(224, 224),
    batch_size=BATCH_SIZE)

Found 126000 files belonging to 102 classes.


# Retrieving ids
Now we need to generate a structure that contains all the ids of the images. As unique identifier we choose to use the original name of each file contained in the datasets. 

To this aim, we defined a function **ids_and_labels_from_file()** in the **utils** file, in order to get the ids in alphanumerical order.

In [None]:
#generation of the ids
ids,_,_ = ids_and_labels_from_file(SETS_DIR)

print(len(ids))
print(ids)

Found 126000 files belonging to 102 classes.
126000
['1005649.jpg', '1011328.jpg', '101251.jpg', '1014775.jpg', '1026328.jpg', '1028787.jpg', '1034399.jpg', '103801.jpg', '1038694.jpg', '1043283.jpg', '1047447.jpg', '1050519.jpg', '1057749.jpg', '1057810.jpg', '1068632.jpg', '1072416.jpg', '1074856.jpg', '1074942.jpg', '1076891.jpg', '1077610.jpg', '1077964.jpg', '1088809.jpg', '1097378.jpg', '110043.jpg', '1103795.jpg', '1106961.jpg', '1109597.jpg', '1111062.jpg', '1112300.jpg', '1112838.jpg', '1113017.jpg', '1121884.jpg', '112378.jpg', '1133267.jpg', '1142597.jpg', '1147371.jpg', '1154371.jpg', '1158360.jpg', '1159801.jpg', '1165004.jpg', '1166116.jpg', '1166210.jpg', '116697.jpg', '116705.jpg', '1167170.jpg', '1174241.jpg', '1174330.jpg', '1174949.jpg', '1177254.jpg', '1180600.jpg', '118237.jpg', '1184568.jpg', '1185445.jpg', '1185654.jpg', '1191665.jpg', '1196628.jpg', '1199851.jpg', '1200079.jpg', '1214326.jpg', '1215650.jpg', '1218767.jpg', '1220194.jpg', '1230465.jpg', '1232311.

# Extracting features
We use a map function to apply the pre-processing step to all the images.
Then we call the "mobilenetv2.predict()" function on the dataset obtained to extract the features, saving them in **features**.

In [None]:
#Pre-Processing
dataset = dataset.map(preprocess, deterministic=True)

In [None]:
#extracting features

mobilenetv2 = tf.keras.applications.MobileNetV2(
    weights='imagenet',
    include_top=False,
    pooling = 'avg', 
    input_shape=(224,224,3)
)

features = mobilenetv2.predict(dataset, batch_size=BATCH_SIZE, verbose=1)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5


# Saving as two numpy files
We generate two **.npy** files, one for the ids and one for the features.

In [None]:
#save as 2 numpy files
np.save('/content/gdrive/MyDrive/[MIRCV]FoodWebSearch/deployment/mn_id.npy', ids)
np.save('/content/gdrive/MyDrive/[MIRCV]FoodWebSearch/deployment/mn_features.npy', features)


# Fine-tuned features extraction



As last step we extract the features from our fine-tuned model.

In [None]:
model = tf.keras.models.load_model('/content/gdrive/MyDrive/[MIRCV]FoodWebSearch/deployment/food_classifier.h5')

model = tf.keras.Model(inputs=model.input, outputs=model.get_layer('dense_hidden').output) #remove classifier from model

features_finetuned = model.predict(dataset, batch_size=BATCH_SIZE, verbose=1)



In [None]:
#save as 2 numpy files
np.save('/content/gdrive/MyDrive/[MIRCV]FoodWebSearch/deployment/ft_id.npy', ids)
np.save('/content/gdrive/MyDrive/[MIRCV]FoodWebSearch/deployment/ft_features.npy', features_finetuned)