# Nutrition5k | Data Preparation and Training

Dataset: [https://github.com/google-research-datasets/Nutrition5k](https://github.com/google-research-datasets/Nutrition5k)

Paper: [https://arxiv.org/abs/2103.03375](https://arxiv.org/abs/2103.03375)



In [None]:
!pip install timm loguru

In [None]:
# import modules
import torch
import pandas as pd
from pathlib import Path
from PIL import Image
from torchvision import transforms
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import SGD, Adam
from torch.utils.data import Dataset, DataLoader
from matplotlib import pyplot as plt
from PIL import Image
import numpy as np
from timm import create_model
from timm.utils.model import get_state_dict, unwrap_model
from datetime import datetime
from tqdm import tqdm
from random import seed, shuffle
seed(2024)

In [None]:
# check whether installed
!gsutil --version

In [None]:
# download annotation data
!mkdir -p nutrition5k_dataset/metadata
!gsutil -m cp -r "gs://nutrition5k_dataset/nutrition5k_dataset/metadata/dish_metadata_cafe1.csv" ./nutrition5k_dataset/metadata/
!mkdir -p nutrition5k_dataset/dish_ids/splits
!gsutil -m cp -r "gs://nutrition5k_dataset/nutrition5k_dataset/dish_ids/dish_ids_all.txt" ./nutrition5k_dataset/dish_ids/
!gsutil -m cp -r "gs://nutrition5k_dataset/nutrition5k_dataset/dish_ids/splits/rgb_train_ids.txt" ./nutrition5k_dataset/dish_ids/splits/
!gsutil -m cp -r "gs://nutrition5k_dataset/nutrition5k_dataset/dish_ids/splits/rgb_test_ids.txt" ./nutrition5k_dataset/dish_ids/splits/

In [None]:
# split original train set in train and validation subsplits
with open("./nutrition5k_dataset/dish_ids/splits/rgb_train_ids.txt") as fp:
  train_ids = fp.read().split("\n")

print(len(train_ids))
cutoff = int(len(train_ids) * 0.8)
shuffle(train_ids)

print(f"Train/Train Split: {cutoff}, Train/Valid: Split: {len(train_ids)-cutoff}")

with open("./nutrition5k_dataset/dish_ids/splits/rgb_train_train_ids.txt", "w") as fp:
  for i, _id in enumerate(train_ids[:cutoff]):
    if i < cutoff-1:
      fp.write(f"{_id}\n")
    else:
      fp.write(f"{_id}")

with open("./nutrition5k_dataset/dish_ids/splits/rgb_train_val_ids.txt", "w") as fp:
  for i, _id in enumerate(train_ids[cutoff:]):
    if i < cutoff-1:
      fp.write(f"{_id}\n")
    else:
      fp.write(f"{_id}")

In [None]:
# download image data
!mkdir -p nutrition5k_dataset/imagery/realsense_overhead
!gsutil -m cp -r "gs://nutrition5k_dataset/nutrition5k_dataset/imagery/realsense_overhead" ./nutrition5k_dataset/imagery/

In [None]:
# pytorch data classes / custom transforms
class N5kRealSense(Dataset):
    def __init__(self, path_imagery, path_labels_csv, path_split_txt, transform=None, target_transform=None):
        self.path_imagery = Path(path_imagery)
        assert self.path_imagery.is_dir()

        dish_id_to_image_path = {}
        for path_dish in Path(path_imagery).glob("*"):
          dish_id = path_dish.name
          path_img = Path(path_dish, "rgb.png")
          assert path_img.is_file(), path_img
          dish_id_to_image_path[dish_id] = str(path_img)
        self.dish_id_to_image_path = dish_id_to_image_path

        self.labels = pd.read_csv(path_labels_csv, usecols=range(6), header=None, index_col=0)

        with open(path_split_txt, "r") as fp:
          _split_ids = fp.read()
        _split_ids = _split_ids.split("\n")
        self.split_ids = []
        for _split_id in _split_ids:
          if _split_id in self.dish_id_to_image_path:
            self.split_ids.append(_split_id)

        print(f"Split size: {len(self.split_ids)} (orginal: {len(_split_ids)})")

        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.split_ids)

    def __getitem__(self, idx):
        # get next dish id from split list
        dish_id = self.split_ids[idx]

        # get image for this dish_id
        path_image = self.dish_id_to_image_path[dish_id]
        image = Image.open(path_image)

        # get label for this dish_id
        target = self.labels.loc[dish_id].to_numpy()

        if self.transform:
            image = self.transform(image)

        if self.target_transform:
            target = self.target_transform(target)

        return image, target


class NumpyToTensor(object):
    """Convert a numpy.ndarray to a tensor."""

    def __call__(self, np_array):
        return torch.from_numpy(np_array).unsqueeze(0)

    def __repr__(self):
        return self.__class__.__name__ + '()'


class StandardizeTensor(object):
    def __init__(self, mean, std):
        self.mean = mean
        self.std = std

    def __call__(self, tensor):

        if not torch.is_tensor(tensor):
            raise TypeError('Input tensor should be a torch tensor. Got {}.'.format(type(tensor)))
        
        mean = torch.tensor(self.mean).reshape(1, 1, -1)
        std = torch.tensor(self.std).reshape(1, 1, -1)
        standardized_vectors = (tensor - mean) / std
        return standardized_vectors.squeeze(1)

    def __repr__(self):
        return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean, self.std)


def de_transform(x, mean=[255, 215, 12.7, 19.4, 18.0], std=[220, 161, 13.5, 21.6, 20]):
    mean = np.array(mean)
    std = np.array(std)
    x_ = (x * std) + mean
    return x_


In [None]:
# prepare for data loaders
input_transform_train = transforms.Compose([
    transforms.Resize(255),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    transforms.ColorJitter(hue=0.05),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.GaussianBlur(kernel_size=(5,5), sigma=(0.01, 0.3))
])

input_transform_valid = transforms.Compose([
    transforms.Resize(255),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# total_calories, total_mass, total_fat, total_carb, total_protein
# as stated in the paper
mean_paper = [255, 215, 12.7, 19.4, 18.0]
std_paper = [220, 161, 13.5, 21.6, 20]
target_transform = transforms.Compose([
    NumpyToTensor(),
    StandardizeTensor(mean=mean_paper, std=std_paper)
])

train_set = N5kRealSense(
    path_imagery="./nutrition5k_dataset/imagery/realsense_overhead",
    path_labels_csv="./nutrition5k_dataset/metadata/dish_metadata_cafe1.csv",
    path_split_txt="./nutrition5k_dataset/dish_ids/splits/rgb_train_train_ids.txt",
    transform=input_transform_train,
    target_transform=target_transform
)

valid_set = N5kRealSense(
    path_imagery="./nutrition5k_dataset/imagery/realsense_overhead",
    path_labels_csv="./nutrition5k_dataset/metadata/dish_metadata_cafe1.csv",
    path_split_txt="./nutrition5k_dataset/dish_ids/splits/rgb_train_val_ids.txt",
    transform=input_transform_valid,
    target_transform=target_transform
)

test_set = N5kRealSense(
    path_imagery="./nutrition5k_dataset/imagery/realsense_overhead",
    path_labels_csv="./nutrition5k_dataset/metadata/dish_metadata_cafe1.csv",
    path_split_txt="./nutrition5k_dataset/dish_ids/splits/rgb_test_ids.txt",
    transform=input_transform_valid,
    target_transform=target_transform
)

train_loader = DataLoader(
    train_set,
    batch_size=32,
    shuffle=True
)

valid_loader = DataLoader(
    valid_set,
    batch_size=32,
    shuffle=False
)

test_loader = DataLoader(
    test_set,
    batch_size=32,
    shuffle=False
)

In [None]:
# init model
model_name = "tiny_vit_5m_224"  # good modern vision transformer, others: efficientnet_b0, paper baseline: inception_resnet_v2
custom = False

class CustomModel(nn.Module):
    def __init__(self, base_model):
        super(CustomModel, self).__init__()
        self.backbone = backbone
        self.fc1 = nn.Linear(320, 32)
        self.fc2 = nn.Linear(32, 5)

    def forward(self, x):
        x = self.backbone(x)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

if custom:
  backbone = create_model(model_name, pretrained=True, num_classes=0)  # https://huggingface.co/docs/timm/feature_extraction
  model = CustomModel(backbone)
else:
  model = create_model(model_name, pretrained=True, num_classes=5)

In [None]:
# train model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
model.to(device)
optimizer = Adam(model.parameters(), lr=0.001)
loss_fn = nn.L1Loss()  # nn.MSELoss()

def train_one_epoch(model, loader):
  model.train()
  loss_acc = 0
  pbar = tqdm(loader, total=len(loader))
  n_samples = 0
  for input, target in pbar:
    n_samples += input.size(0)
    input = input.float().to(device)
    target = target.float().to(device)
    output = model(input).unsqueeze(0).permute(1, 0, 2)
    loss = loss_fn(output, target)
    loss_acc += loss.item()
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    loss_avg = loss_acc / n_samples
    pbar.set_description(f'Train Loss: {loss_avg:.06f}')

  return loss_avg

def valid_one_epoch(model, loader):
  model.eval()
  loss_acc = 0
  with torch.no_grad():
    pbar = tqdm(loader, total=len(loader))
    n_samples = 0
    for input, target in pbar:
      n_samples += input.size(0)
      input = input.float().to(device)
      target = target.float().to(device)
      output = model(input).unsqueeze(0).permute(1, 0, 2)
      loss = loss_fn(output, target)
      loss_acc += loss.item()
      loss_avg = loss_acc / n_samples
      pbar.set_description(f'Valid Loss: {loss_avg:.06f}')

  return loss_avg

epochs = 15
best_valid_loss_avg = np.inf
ts = datetime.now().strftime('%Y%m%d-%H%M%S')
for e in range(epochs):
  print(f"\n\n{e}/{epochs}")
  train_loss_avg = train_one_epoch(model, train_loader)
  valid_loss_avg = valid_one_epoch(model, valid_loader)
  print(f"\nTrain/Valid Loss: {train_loss_avg:.06f}/{valid_loss_avg:.06f}")
  if valid_loss_avg < best_valid_loss_avg:
    best_model = model
    best_valid_loss_avg = valid_loss_avg
    path_save = Path(f"./runs/{ts}-{model_name}/best-{e:03d}.pth")
    path_save.parent.mkdir(exist_ok=True, parents=True)
    save_state = get_state_dict(model, unwrap_model)
    torch.save(save_state, path_save)
  with open(Path(path_save.parent, "logs.csv"), "a") as fp:
    fp.write(f"{e},{train_loss_avg},{valid_loss_avg}\n")

In [None]:
# re-create model from checkpoint
def load_model(path_checkpoint, CustomModelClass=None):
  base_model = create_model("tiny_vit_5m_224", in_chans=3, num_classes=5, checkpoint_path=path_checkpoint)
  if CustomModelClass:
    custom_model = CustomModelClass(model)
    state_dict = torch.load(path_checkpoint)
    custom_model.load_state_dict(state_dict)
    return custom_model
  else:
    return base_model

# execute model in inference mode
def run_inference(model, loader):
  model.eval()
  loss_acc = 0
  n_samples = len(loader.dataset)
  targets = torch.empty((n_samples, 1, 5), device=device)
  outputs = torch.empty((n_samples, 1, 5), device=device)
  with torch.no_grad():
    pbar = tqdm(loader, total=len(loader))
    n_samples = 0
    n_end = 0
    for input, target in pbar:
      n_start = n_end
      n_end = n_start + input.size(0)
      input = input.float().to(device)
      output = model(input).unsqueeze(0).permute(1, 0, 2)
      target = target.float()

      # store results
      outputs[n_start:n_end,:,:] = output.detach()
      targets[n_start:n_end,:,:] = target.detach()

  return outputs.cpu().numpy(), targets.cpu().numpy()

# load model
model_from_checkpoint = load_model("/content/runs/20240321-231807-tiny_vit_5m_224/best-013.pth")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_from_checkpoint.to(device)

# evaluate model
test_loss_avg = valid_one_epoch(model_from_checkpoint, test_loader)

# generate inference results
outputs, targets = run_inference(model_from_checkpoint, test_loader)

# de-normalize outputs
outputs_transformed = de_transform(outputs).squeeze()
targets_transformed = de_transform(targets).squeeze()
print(outputs.shape, targets.shape)

# save results
with open(Path(path_save.parent, "best-outputs.npy"), "wb") as fp:
  np.save(fp, outputs_transformed)

with open(Path(path_save.parent, "best-targets.npy"), "wb") as fp:
  np.save(fp, targets_transformed)

In [None]:
# verify npy files
with open(Path(path_save.parent, "best-outputs.npy"), "rb") as fp:
  _outputs = np.load(fp)
  print(_outputs.shape)

with open(Path(path_save.parent, "best-targets.npy"), "rb") as fp:
  _targets = np.load(fp)
  print(_targets.shape)