In [None]:
import requests
import tarfile
from pathlib import Path

# Creating directories
data_path = Path("data")

if data_path.is_dir():
  print("Directory already exists, skipping creation.")
else:
  print("Creating directory...")
  data_path.mkdir(exist_ok = True)
  print(f"Directory '{data_path}' was created.")

# Writing data to the directory
link_to_data = "http://data.vision.ee.ethz.ch/cvl/food-101.tar.gz"
with open(data_path / "food101.tar.gz", "wb") as f:
  request = requests.get(link_to_data)
  print(f"Downloading '{link_to_data}'...")
  f.write(request.content)
  print("Download finished.")

In [None]:
# Extracting data from 'tar.gz' file
file_name = "data/food101.tar.gz"
print(f"Extracting {file_name}...")
tar = tarfile.open(file_name) #, "r:gz")
tar.extractall()
tar.close()
print(f"Finished extracting {file_name}.")

In [None]:
# Creating main directory paths
directory_name = Path("/kaggle/working/food-101")
image_path = directory_name / "images"
meta_path = directory_name / "meta"

# Getting list of files in the 'food-101/meta' directory
meta_filenames_list = sorted(list(meta_path.glob("*"))) # ignoring the README file
meta_filenames_list

In [None]:
import json
meta_files_data = []

for file_name in meta_filenames_list:
  if str(file_name)[-5:] == ".json":
    data = json.load(open(file_name))
    meta_files_data.append(data)
  elif str(file_name)[-4:] == ".txt":
    with open(str(file_name), 'r') as f:
      data = [line.strip() for line in f.readlines()]
    meta_files_data.append(data)

# Based the order off of order that file names appear in meta_filenames_list
class_names, labels, \
test_data_paths_dict, test_data_paths_lst, \
train_data_paths_dict, train_data_paths_lst \
= meta_files_data

In [None]:
train_path = image_path / "train"
test_path = image_path / "test"

In [None]:
image_paths_list = list(image_path.glob("*/*.jpg"))
image_paths_list[0], len(image_paths_list)

In [None]:
class_names[:5]

In [None]:
from tensorflow.keras.applications import EfficientNetV2L
from tensorflow.keras.applications import EfficientNetB2 as transf_model

model = transf_model(weights = 'imagenet',
                        include_top = False,
                        input_shape = (224, 224, 3),
                        classes=len(class_names)) # added num of classes

In [None]:
for layer in model.layers[:-5]:
  layer.trainable = False

In [None]:
from tensorflow import keras
from keras.models import Sequential
from keras.layers import *

my_model = Sequential([model,
                       GlobalAveragePooling2D(),
                       Dense(512, activation = 'relu'),
                       Dropout(0.2),
                       Dense(len(class_names), activation='softmax')]) # changed from sigmoid to softmax

my_model.summary()

In [None]:
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import img_to_array
from keras.utils import load_img
import tensorflow as tf

BATCH_SIZE = 32
SEED = 10
test_size = 0.25
input_size = 224
data_dir = "/kaggle/working/food-101/images"

train_datagen = ImageDataGenerator(
                             rotation_range=20, width_shift_range=0.2,height_shift_range=0.2,
                             horizontal_flip=True, vertical_flip=True, brightness_range=(0.5,1.5), 
                             zoom_range=[0.75, 1.25], channel_shift_range=75.0,
                             validation_split = test_size)

train_gen = train_datagen.flow_from_directory(data_dir,
    color_mode = 'rgb',
    batch_size = BATCH_SIZE,
    target_size = (input_size, input_size),
    shuffle = True,
    class_mode = 'categorical',
    subset = 'training',
    seed = SEED)

test_datagen = ImageDataGenerator(validation_split = test_size)

test_gen = test_datagen.flow_from_directory(data_dir,
    color_mode = 'rgb',
    batch_size = BATCH_SIZE,
    target_size = (input_size, input_size),
    shuffle = False,
    class_mode = 'categorical',
    subset = 'validation',
    seed = SEED)

In [None]:
from keras.metrics import *

metrics = ['accuracy', Precision(), Recall()]

from keras.optimizers import Adam
from keras.losses import CategoricalCrossentropy

my_model.compile(loss = CategoricalCrossentropy(),
                 optimizer = Adam(learning_rate = 0.001),
                 metrics = metrics)

In [None]:
from tensorflow import keras

def scheduler(epoch, lr):
  if epoch < 3:
    return lr
  else:
    return lr * tf.math.exp(-0.1)

callback = keras.callbacks.LearningRateScheduler(scheduler)

In [None]:
round(my_model.optimizer.lr.numpy(), 5)

In [None]:
history_1 = my_model.fit(train_gen,
            epochs = 5,
            validation_data = test_gen,
            callbacks=[callback])

## Creating function for saving model

In [None]:
from tensorflow import keras
import pickle as pkl
from pathlib import Path

def save_model_results(model, model_save_name, model_no, prev_history=None):
    model_save_path_str = "/kaggle/working/"+model_save_name+"_"+str(model_no)
    
    # === Saving Model Results & State ===
    # -- Create dict of results --
    if prev_history is None:
        model_dict = {}

        model_dict['history'] = model.history.history
        model_dict['epochs'] = model.history.epoch
    else:
        model_dict = {}
        
        model_dict['history'] = {}
        for key in prev_history['history'].keys():
            model_dict['history'][key] = prev_history['history'][key] + model.history.history[key]
        
        model_dict['epochs'] = prev_history['epochs'].copy()
        N = len(model_dict['epochs'])
        M = len(model.history.epoch)
        model_dict['epochs'].extend(list(range(N, N+M)))
        
    # -- Create directory for saving results --
    # Create full data directory path
    RESULTS_PATH = Path(model_save_path_str+"_results")

    # Create directories
    if RESULTS_PATH.is_dir():
      print("Path already exists. Skipping creation.")
    else:
      RESULTS_PATH.mkdir(parents = True, exist_ok = True)
      print(f"Directory {RESULTS_PATH} created.")

    with open(RESULTS_PATH/("model_"+str(model_no)), 'wb') as f:
      print(f"Saving the model to: {f}.")
      pkl.dump(model.get_weights(), f)

    with open(RESULTS_PATH/("opitimizer_"+str(model_no)), 'wb') as f:
      print(f"Saving the model to: {f}.")
      pkl.dump(model.optimizer.get_weights(), f)

    with open(RESULTS_PATH/("history_"+str(model_no)), 'wb') as f:
      print(f"Saving the model to: {f}.")
      pkl.dump(model_dict, f)
        
    return model_dict

## Saving the Model

In [None]:
save_model_results(model = my_model,
                      model_save_name = "effnetb2_model",
                      model_no = 1)

!zip -r /kaggle/working/effnetb2_model_1_results.zip /kaggle/working/effnetb2_model_1_results

In [None]:
import os
import shutil

# Delete files to save some space. Make sure to download the zipfile before running this cell
shutil.rmtree("/kaggle/working/effnetb2_model_1_results")
os.remove("/kaggle/working/effnetb2_model_1_results.zip")

In [None]:
prev_history = {}

prev_history['history'] = my_model.history.history
prev_history['epochs'] = my_model.history.epoch

prev_history

## Running for 5 more epochs

In [None]:
from tensorflow import keras

def scheduler(epoch, lr):
  if epoch < 0:
    return lr
  else:
    return lr * tf.math.exp(-0.1)

callback = keras.callbacks.LearningRateScheduler(scheduler)

In [None]:
round(my_model.optimizer.lr.numpy(), 5)

In [None]:
history_2 = my_model.fit(train_gen,
            epochs = 5,
            validation_data = test_gen,
            callbacks=[callback])

### Testing

In [None]:
def combine_histories(model, prev_history=None):
    if prev_history is None:
        model_dict = {}

        model_dict['history'] = model.history.history
        model_dict['epochs'] = model.history.epoch
    else:
        model_dict = {}
        
        model_dict['history'] = {}
        for key in prev_history['history'].keys():
            model_dict['history'][key] = prev_history[key] + model.history.history[key]
        
        model_dict['epochs'] = prev_history['epochs'].copy()
        N = len(model_dict['epochs'])
        M = len(model.history.epoch)
        model_dict['epochs'].extend(list(range(N, N+M)))
        
    return model_dict

In [None]:
new_history = combine_histories(my_model, prev_history)
new_history

## Saving

In [None]:
prev_history_2 = save_model_results(model = my_model,
                   model_save_name = "effnetb2_model",
                   model_no = 2,
                   prev_history = prev_history)

!zip -r /kaggle/working/effnetb2_model_2_results.zip /kaggle/working/effnetb2_model_2_results

In [None]:
prev_history_2

In [None]:
# Delete files to save some space. Make sure to download the zipfile before running this cell
shutil.rmtree("/kaggle/working/effnetb2_model_2_results")
os.remove("/kaggle/working/effnetb2_model_2_results.zip")

## Running for 5 more epochs

In [None]:
from tensorflow import keras

def scheduler(epoch, lr):
  if epoch < 2:
    return lr
  else:
    return lr * tf.math.exp(-0.1)

callback = keras.callbacks.LearningRateScheduler(scheduler)

In [None]:
round(my_model.optimizer.lr.numpy(), 6)

In [None]:
history_3 = my_model.fit(train_gen,
            epochs = 5,
            validation_data = test_gen,
            callbacks=[callback])

In [None]:
prev_history_3 = save_model_results(model = my_model,
                   model_save_name = "effnetb2_model",
                   model_no = 3,
                   prev_history = prev_history_2)

!zip -r /kaggle/working/effnetb2_model_3_results.zip /kaggle/working/effnetb2_model_3_results

In [None]:
# Delete files to save some space. Make sure to download the zipfile before running this cell
shutil.rmtree("/kaggle/working/effnetb2_model_3_results")
os.remove("/kaggle/working/effnetb2_model_3_results.zip")

In [None]:
prev_history_3

## Running for 5 more epochs

In [None]:
from tensorflow import keras

def scheduler(epoch, lr):
  if epoch < 0:
    return lr
  else:
    return lr * tf.math.exp(-0.1)

callback = keras.callbacks.LearningRateScheduler(scheduler)

In [None]:
round(my_model.optimizer.lr.numpy(), 6)

In [None]:
history_4 = my_model.fit(train_gen,
            epochs = 5,
            validation_data = test_gen,
            callbacks=[callback])

In [None]:
prev_history_4 = save_model_results(model = my_model,
                   model_save_name = "effnetb2_model",
                   model_no = 4,
                   prev_history = prev_history_3)

!zip -r /kaggle/working/effnetb2_model_4_results.zip /kaggle/working/effnetb2_model_4_results

In [None]:
prev_history_4

In [None]:
from tensorflow import keras

def scheduler(epoch, lr):
  if epoch < 0:
    return lr
  else:
    return lr * tf.math.exp(-0.3)

callback = keras.callbacks.LearningRateScheduler(scheduler)

In [None]:
round(my_model.optimizer.lr.numpy(), 6)

In [None]:
history_5 = my_model.fit(train_gen,
            epochs = 2,
            validation_data = test_gen,
            callbacks=[callback])

In [None]:
prev_history_5 = save_model_results(model = my_model,
                   model_save_name = "effnetb2_model",
                   model_no = 5,
                   prev_history = prev_history_4)

!zip -r /kaggle/working/effnetb2_model_5_results.zip /kaggle/working/effnetb2_model_5_results

In [None]:
prev_history_5