In [16]:
import pandas as pd
import numpy as np

# Load full CSV
df = pd.read_csv("../data/nutrition_filtered.csv", usecols=[0, 1, 7], header=0, names=["dish_id", "calories", "text"])

# Load train/test dish_ids
with open("../data/train_filtered.txt") as f:
    train_ids = set(line.strip() for line in f)

with open("../data/test_filtered.txt") as f:
    test_ids = set(line.strip() for line in f)

#print(df["dish_id"].head(3))
#print(list(train_ids)[:5])

# Filter based on dish_id column (which is named "dish_id")
train_df = df[df["dish_id"].isin(train_ids)].copy()
test_df  = df[df["dish_id"].isin(test_ids)].copy()

# Add image path column
train_df["image_path"] = train_df["dish_id"].apply(lambda x: f"../data/images/{x}.png")
#print(train_df["calories"].head(5))
test_df["image_path"]  = test_df["dish_id"].apply(lambda x: f"../data/images/{x}.png")



In [17]:
import tensorflow as tf

IMG_SIZE = (224, 224)

text_vectorizer = tf.keras.layers.TextVectorization(output_mode='int', output_sequence_length=20)
text_vectorizer.adapt(train_df["text"])

def preprocess(inputs, label):
    path, text = inputs
    img = tf.io.read_file(path)
    img = tf.image.decode_png(img, channels=3)
    img = tf.image.random_flip_left_right(img)
    img = tf.image.random_brightness(img, 0.1)
    img = tf.image.random_contrast(img, 0.9, 1.1)
    img = tf.image.resize(img, IMG_SIZE)
    img = img / 255.0  # normalize

    text = text_vectorizer(text)
    
    return (img, text), label

def make_dataset(df, batch_size=16, shuffle=True):
    paths = df["image_path"].values
    texts = df["text"].values
    labels = df["calories"].values.astype("float32")

    ds = tf.data.Dataset.from_tensor_slices(((paths, texts), labels))
    ds = ds.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)

    if shuffle:
        ds = ds.shuffle(buffer_size=len(df))

    ds = ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return ds

# create train and validation datasets
train_ds = make_dataset(train_df, batch_size=16, shuffle=True)
val_ds = make_dataset(test_df, batch_size=16, shuffle=False)

In [None]:
# 1. Imports for model
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras import layers, Model, Input, optimizers, regularizers
#import tensorflow_hub as hub

# Image branch
img_input = Input(shape=(224, 224, 3))
base_model = MobileNetV2(include_top=False, weights="imagenet", pooling="avg")
base_model.trainable = False
img_features = base_model(img_input)

# Text branch (e.g., embedded sequence)
vocab_size = len(text_vectorizer.get_vocabulary())
emb_dim = 128
text_input = Input(shape=(20,), dtype='int32', name='text')
text_emb = layers.Embedding(vocab_size, emb_dim)(text_input)
text_features = layers.GlobalAveragePooling1D()(text_emb)

# Project to common dimension
proj_img = layers.Dense(256)(img_features)
proj_text = layers.Dense(256)(text_features)
proj_img = layers.Lambda(lambda x: tf.expand_dims(x, axis=1))(proj_img)
proj_text = layers.Lambda(lambda x: tf.expand_dims(x, axis=1))(proj_text)

# Cross-attention: image attends to text
cross_attended = layers.MultiHeadAttention(
    num_heads=4, key_dim=64
)(query=proj_img, value=proj_text, key=proj_text)

# Flatten and fuse
fused = layers.Flatten()(cross_attended)
combined = layers.Concatenate()([fused, img_features, text_features])
x = layers.Dense(128, activation='relu')(combined)
x = layers.Dropout(0.3)(x)
x = layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(1e-4))(x)
x = layers.Dropout(0.3)(x)
output = layers.Dense(1)(x)

model = Model(inputs=[img_input, text_input], outputs=output)
model.compile(optimizer=optimizers.Adam(), loss='mse', metrics=['mae'])
model.summary()

  base_model = MobileNetV2(include_top=False, weights="imagenet", pooling="avg")


In [19]:
from sklearn.metrics import mean_absolute_error, r2_score

callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor='mae', patience=4, restore_best_weights=True)
#    tf.keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.5, patience=2, min_lr=1e-6)
]

# 1. Train (no intermediate validation)
history = model.fit(
    train_ds,
    epochs=10
)

# 2. Get predictions + true labels from val_ds
y_true = []
y_pred = []

for (batch_imgs, batch_texts), batch_labels in val_ds:
    preds = model([batch_imgs, batch_texts], training=False)        # shape (batch,1)
    y_true.extend(batch_labels.numpy().flatten())   # actual calories
    y_pred.extend(preds.numpy().flatten())          # predicted calories

y_true = np.array(y_true)
y_pred = np.array(y_pred)

# 3. Metrics
mae = mean_absolute_error(y_true, y_pred)
r2 = r2_score(y_true, y_pred)

print(f"Final validation MAE: {mae:.2f} kcal")
print(f"Final validation R²: {r2:.3f}")

Epoch 1/10


[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 188ms/step - loss: 55433.2148 - mae: 168.5428
Epoch 2/10
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 184ms/step - loss: 31541.9062 - mae: 107.9624
Epoch 3/10
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 190ms/step - loss: 20210.8164 - mae: 95.8887
Epoch 4/10
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 189ms/step - loss: 20366.0371 - mae: 91.0422
Epoch 5/10
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 192ms/step - loss: 16737.1289 - mae: 85.5654
Epoch 6/10
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 195ms/step - loss: 18412.9004 - mae: 82.7229
Epoch 7/10
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 196ms/step - loss: 16109.6709 - mae: 82.0897
Epoch 8/10
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 194ms/step - loss: 11956.7822 - mae: 77.2210
Epoch 9/10
[1m176/176[0