# Food Scoring based on Semantic Image Segmentation


## Setup
This notebook provides the segmentation/classification model, the Nutrition5k test set, as well as some realistic data collected by our team members (and team members' friends/families) throughout the day.

*   `data_real` contains our own collected data
*   `nutrition5k` contains the Nutrition5k dataset test split




In [None]:
!pip install openfoodfacts

In [None]:
# imports

import csv
import os
import sys
import ast
import tensorflow as tf
import tensorflow_hub as hub
import matplotlib.pyplot as plt
from scipy import stats
import numpy as np
import tarfile
import os
import pandas as pd
import openfoodfacts
import json
import pandas as pd
import matplotlib.patches as mpatches
from PIL import Image
import requests
import torch
from transformers import DPTForDepthEstimation, DPTFeatureExtractor
from tqdm import tqdm

In [None]:
def computeVolume(filename):
    # # Check if filename is passed
    # if len(sys.argv) < 2:
    #     print("Please provide a filename as a command line argument.")
    #     return

    # print("Filename:", filename)

    # Rest of your function using filename

    model_filename = '/content/mobile-food-segmenter.tar.gz'
    extracted_folder_path = 'extracted_model'

    if not os.path.exists(extracted_folder_path):
        with tarfile.open(model_filename, 'r:gz') as tar:
            tar.extractall(path=extracted_folder_path)
        print("Model extracted")

    # Load the image
    image_path = filename
    image = tf.image.decode_image(tf.io.read_file(image_path))
    image = tf.image.resize(image, [513, 513])
    image = image / 255.0  # Normalize to [0, 1]
    print("Image loaded")

    # Check if the image is 3-channel RGB
    if image.shape[-1] != 3:
        print("Make sure your image is RGB.")

    # Expand dimensions for batch
    image_batch = tf.expand_dims(image, 0)

    # Load the local model with specified output keys
    m = hub.KerasLayer(extracted_folder_path, signature_outputs_as_dict=True)
    print("Model loaded")

    # Use the model
    results = m(image_batch)
    print("Model used")

    segmentation_probs = results['food_group_segmenter:semantic_probabilities'][0]
    segmentation_mask = results['food_group_segmenter:semantic_predictions'][0]

    return segmentation_probs, segmentation_mask

In [None]:
def read_csv_to_dict(file_path, example_path):
    data_dict = {}
    serving_sizes = {}
    example_foods = {}

    with open(file_path, 'r') as csvfile:
        csvreader = csv.reader(csvfile)
        next(csvreader)  # Skip the header row
        for row in csvreader:
            key, label, serving_size = row
            data_dict[int(key)] = label.split('|')
            serving_sizes[int(key)] = serving_size

    with open(example_path, 'r') as csvfile:
        csvreader = csv.reader(csvfile)
        next(csvreader)  # Skip the header row
        for row in csvreader:
            key, ex1, ex2, ex3 = row
            examples = [ex1, ex2, ex3]
            example_foods[int(key)] = examples

    return data_dict, serving_sizes, example_foods

In [None]:
def get_top_k_labels(segmentation_mask, k=3):
    # Flatten the segmentation mask
    flat_mask = segmentation_mask.flatten()
    exclude_labels = [0, 23, 24, 25] # exclude background, food containers, cutlery

    # Count the occurrence of each unique label in the mask
    labels, counts = np.unique(flat_mask, return_counts=True)

    # Sort the counts in descending order
    exclusion_mask = np.isin(labels, exclude_labels, invert=True)

    labels = labels[exclusion_mask]
    counts = counts[exclusion_mask]

    sorted_indices = np.argsort(-counts)

    # Get the top three labels
    top_three_labels = labels[sorted_indices[:k]]

    return top_three_labels

In [None]:
from scipy.ndimage import center_of_mass
import matplotlib.image as mpimg

def visualize_segmentation_mask(segmentation_mask, filename):
    k = 3

    # Load and preprocess the image as per the provided method
    image_path = filename
    image = tf.image.decode_image(tf.io.read_file(image_path))
    image = tf.image.resize(image, [513, 513])
    image = image / 255.0  # Normalize to [0, 1]
    print("Image loaded")

    # Assuming segmentation_mask is a TensorFlow tensor, convert it to numpy
    if isinstance(segmentation_mask, tf.Tensor):
        segmentation_mask = segmentation_mask.numpy()

    # Get the top k labels from the segmentation mask
    top_k_labels = get_top_k_labels(segmentation_mask, k)

    # Mask to only show top k labels in the segmentation
    top_k_mask = np.isin(segmentation_mask, top_k_labels)

    fig, ax = plt.subplots(1, 2, figsize=(10, 5))  # Adjusted for side-by-side view

    # Display the original image on the left
    ax[0].imshow(image)
    ax[0].set_title('Original Image')
    ax[0].axis('off')

    # Display the segmented image on the right, filtered by top k labels
    ax[1].imshow(image, alpha=0.5)  # Original image slightly transparent
    ax[1].imshow(top_k_mask * segmentation_mask, cmap='jet', alpha=0.8)  # Segmented top k labels
    ax[1].set_title(f'Top {k} Food Groups')
    ax[1].axis('off')

    unique_segments = np.unique(segmentation_mask[top_k_mask])
    for segment in unique_segments:
        # Find the center of mass for each top k segment
        centroid = center_of_mass(segmentation_mask == segment)

        # Display the segment number at its centroid on the segmented image
        ax[1].text(centroid[1], centroid[0], str(segment), color='white', ha='center', va='center')

    plt.show()

In [None]:
# User-Agent is mandatory
api = openfoodfacts.API(user_agent="Grocerly/Prototype")


def fetch_data_from_api(query):
    print(f"Query term: {query}")
    query_data = api.product.text_search(query).get("products")

    calories, carbs, fat, protein, sugar, salt = extract_nutrient_data(query_data)
    return calories, carbs, fat, protein, sugar, salt

    # print(f"API return: {json_data}")
    # if int(json_data["nutrients"]["nova-group"]) != 1:
    #     recommend_healthy_suggestions(json_data, int(json_data["nutrients"]["nova-group"]), 3)


def extract_nutrient_data(api_obj):
    allergens = api_obj[0].get("allergens")
    nutrients = api_obj[0].get("nutriments")

    json_data = {
        "name": api_obj[0].get("abbreviated_product_name"),
        "categories": [],
        "nutrients": {
            "nova-group": nutrients["nova-group"],
            "proteins_100g": nutrients["proteins_100g"],
            "saturated-fat_100g": nutrients["saturated-fat_100g"],
            "fat_100g": nutrients["fat_100g"],
            "energy_100g": nutrients["energy_100g"],
            "carbohydrates_100g": nutrients["carbohydrates_100g"]
        },
        "allergens": allergens
    }

    for i in range(3):
        if i < len(api_obj[0].get("categories_hierarchy")):
            json_data["categories"].append(api_obj[0].get("categories_hierarchy")[i])

    optional_nutrients = ["salt_100g", "sugars_100g", "ph_100g"]
    for nutrient in optional_nutrients:
        if nutrient in nutrients:
            json_data["nutrients"][nutrient] = nutrients[nutrient]

    json_string = json.dumps(json_data)

    # print(f"API Response: {json_string}")

    nutrients = json_data.get('nutrients', {})
    energy = nutrients.get('energy_100g', 'Not available') # kJ
    carbs = nutrients.get('carbohydrates_100g', 'Not available')
    fat = nutrients.get('fat_100g', 'Not available')
    protein = nutrients.get('proteins_100g', 'Not available')
    sugar = nutrients.get('sugars_100g', 'Not available')
    salt = nutrients.get('salt_100g', 'Not available')

    # print(calories, carbs, fat, protein)
    calories = energy / 4.184
    return calories, carbs, fat, protein, sugar, salt


In [None]:
directory_path = '/content/data_real/'

label_dict, serving_sizes, example_foods = read_csv_to_dict('/content/seefood_mobile_food_segmenter_V1_labelmap.csv', '/content/example_foods.csv')


for filename in os.listdir(directory_path):
    if os.path.isfile(os.path.join(directory_path, filename)):
      print(filename)

      total_cals = 0
      total_protein = 0
      total_carbs = 0
      total_fat = 0
      total_salt = 0
      total_sugar = 0

      with tqdm(total=100, desc="Computing Volume") as pbar:
        class_probabilities, mask = computeVolume(os.path.join(directory_path, filename))
        pbar.update(100)

        visualize_segmentation_mask(mask, os.path.join(directory_path, filename))
        top_labels = get_top_k_labels(mask.numpy(), k=3)

        # TODO: Improve presentation
        print("Top labels:")
        for top_label in top_labels:
            label_value = label_dict.get(top_label, "Label not found")  # Retrieve the value from label_dict or display a message if not found
            print(f"{label_value}")

            # API-Call
            calories, carbs, fat, protein, sugar, salt = fetch_data_from_api(np.random.choice(example_foods[top_label], 1))
            total_cals += calories
            total_carbs += carbs
            total_fat += fat
            total_protein += protein
            total_sugar += sugar
            total_salt += salt
    print(f"Totals for meal: {total_cals} kcal, {total_carbs} C, {total_protein} P, {total_fat} F, {total_sugar} sugar, {total_salt} salt")

