In [1]:
from product_info.api_fetcher import get_product_info

product_info = get_product_info(3017620422003)

In [2]:
product_info.keys()



In [3]:
product_info['abbreviated_product_name']

'Nutella t.400'

In [4]:
product_info['nutriments']

{'carbohydrates': 57.5,
 'carbohydrates_100g': 57.5,
 'carbohydrates_serving': 8.62,
 'carbohydrates_unit': 'g',
 'carbohydrates_value': 57.5,
 'carbon-footprint-from-known-ingredients_product': 135,
 'carbon-footprint-from-known-ingredients_serving': 5.07,
 'energy': 2252,
 'energy-kcal': 539,
 'energy-kcal_100g': 539,
 'energy-kcal_serving': 80.8,
 'energy-kcal_unit': 'kcal',
 'energy-kcal_value': 539,
 'energy-kcal_value_computed': 533.3,
 'energy-kj': 2252,
 'energy-kj_100g': 2252,
 'energy-kj_serving': 338,
 'energy-kj_unit': 'kJ',
 'energy-kj_value': 2252,
 'energy-kj_value_computed': 2227.9,
 'energy_100g': 2252,
 'energy_serving': 338,
 'energy_unit': 'kJ',
 'energy_value': 2252,
 'fat': 30.9,
 'fat_100g': 30.9,
 'fat_serving': 4.63,
 'fat_unit': 'g',
 'fat_value': 30.9,
 'fruits-vegetables-legumes-estimate-from-ingredients_100g': 0,
 'fruits-vegetables-legumes-estimate-from-ingredients_serving': 0,
 'fruits-vegetables-nuts-estimate-from-ingredients_100g': 13,
 'fruits-vegetabl

In [5]:
product_info['additives_tags']

['en:e322', 'en:e322i']

In [6]:
product_info['ingredients_text_en']

'blueberries, sugar, pectin, citric acid'

In [7]:
import pandas as pd

text = product_info.get('ingredients_text_en', '')
additives = product_info.get('additives_tags', [])
for additive in additives:
    text += ' ' + additive[3:]

nutriments = product_info["nutriments"]
data = {
    'energy-kcal_100g': nutriments.get('energy-kcal_100g', 0),
    'saturated-fat_100g': nutriments.get('saturated-fat_100g', 0),
    'trans-fat_100g': nutriments.get('trans-fat_100g', 0),
    'cholesterol_100g': nutriments.get('cholesterol_100g', 0),
    'sugars_100g': nutriments.get('sugars_100g', 0),
    'fiber_100g': nutriments.get('fiber_100g', 0),
    'proteins_100g': nutriments.get('proteins_100g', 0),
    'sodium_100g': nutriments.get('sodium_100g', 0),
    'calcium_100g': nutriments.get('calcium_100g', 0),
    'iron_100g': nutriments.get('iron_100g', 0),
    'other_carbohydrates_100g': nutriments.get('carbohydrates_100g', 0) - nutriments.get('sugars_100g', 0) - nutriments.get('fiber_100g', 0),
    'other_fat_100g': nutriments.get('fat_100g', 0) - nutriments.get('saturated-fat_100g', 0) - nutriments.get('trans-fat_100g', 0),
    'text': text
    }
# url = "https://smartfoodscan-805490564375.europe-west1.run.app/predict" # old url
# Make a DataFrame
input_dataframe = pd.DataFrame(data, index=[0])

input_dataframe

Unnamed: 0,energy-kcal_100g,saturated-fat_100g,trans-fat_100g,cholesterol_100g,sugars_100g,fiber_100g,proteins_100g,sodium_100g,calcium_100g,iron_100g,other_carbohydrates_100g,other_fat_100g,text
0,539,10.6,0,0,56.3,0,6.3,0.0428,0,0,1.2,20.3,"blueberries, sugar, pectin, citric acid e322 e..."


In [8]:
numerical_cols = input_dataframe.drop(columns=['text']).columns.tolist()

In [11]:
import pickle

# Load the scaler
with open("../../models/robust_scaler.pkl", "rb") as f:
    robust_scaler = pickle.load(f)

In [13]:
# Load TinyBERT tokenizer and model

from transformers import AutoTokenizer, AutoModel

tokenizer = AutoTokenizer.from_pretrained("prajjwal1/bert-tiny")
model = AutoModel.from_pretrained("prajjwal1/bert-tiny")

In [14]:
input_dataframe_scaled = robust_scaler.transform(input_dataframe[numerical_cols])
input_dataframe_scaled

array([[ 0.83      ,  1.20448179,  0.        ,  0.        ,  2.28558477,
        -0.38888889,  0.07139153, -0.50981169, -0.34951456, -0.38626609,
        -0.23940847,  1.42133816]])

In [15]:
import torch

def get_bert_embeddings(texts):
    inputs = tokenizer(texts, padding=True, truncation=True, max_length=128, return_tensors="pt") # Change max_length to 128
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state[:, 0, :].numpy()  # Take [CLS] token embeddings

In [None]:
import numpy as np

# Convert text to embeddings
input_dataframe_text = np.array([get_bert_embeddings(text) for text in input_dataframe['text']])
input_dataframe_text.shape

(1, 1, 128)

In [26]:
input_dataframe_text = input_dataframe_text.squeeze()
input_dataframe_text.shape

(128,)

In [27]:
input_dataframe_scaled.shape

(1, 12)