In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model, Input, optimizers, regularizers
from tensorflow.keras.applications import MobileNetV2
from sklearn.metrics import mean_absolute_error, r2_score

df = pd.read_csv("../data/nutrition_filtered.csv", usecols=[0, 1], header=0, names=["dish_id", "calories"])
df["image_path"] = df["dish_id"].apply(lambda x: f"../data/images/{x}.png")

mae_scores, r2_scores = [], []

print(f"\n=== Fold 1 ===")
with open(f"../data/fold_1.txt") as f:
    test_ids = set(line.strip() for line in f)
train_ids = set(df["dish_id"]) - test_ids

train_df = df[df["dish_id"].isin(train_ids)].copy()
test_df  = df[df["dish_id"].isin(test_ids)].copy()

IMG_SIZE = (224, 224)
def preprocess(path, label):
    img = tf.io.read_file(path)
    img = tf.image.decode_png(img, channels=3)
    img = tf.image.random_flip_left_right(img)
    img = tf.image.random_brightness(img, 0.1)
    img = tf.image.random_contrast(img, 0.9, 1.1)
    img = tf.image.resize(img, IMG_SIZE)
    img = img / 255.0
    return img, label

def make_dataset(df, batch_size=16, shuffle=True):
    paths = df["image_path"].values
    labels = df["calories"].values.astype("float32")
    ds = tf.data.Dataset.from_tensor_slices((paths, labels))
    ds = ds.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
    if shuffle:
        ds = ds.shuffle(buffer_size=len(df))
    return ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)

train_ds = make_dataset(train_df, shuffle=True)
val_ds   = make_dataset(test_df,  shuffle=False)

# Building of Neural Net
base_model = MobileNetV2(input_shape=(224, 224, 3), include_top=False, weights='imagenet', pooling='avg')
base_model.trainable = False

inputs = Input(shape=(224, 224, 3))
x = base_model(inputs, training=False)
x = layers.Dense(128, activation='relu')(x)
x = layers.Dropout(0.3)(x)
x = layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(1e-4))(x)
x = layers.Dropout(0.3)(x)
outputs = layers.Dense(1)(x)
model = Model(inputs, outputs)
model.compile(optimizer=optimizers.Adam(), loss='mse', metrics=['mae'])

model.fit(train_ds, epochs=10, verbose=0)

# Testing stage
preds = model.predict(val_ds, verbose=0).flatten()
y_true = test_df["calories"].values.astype(float)
dish_ids = test_df["dish_id"].values

# Metrics
mae = mean_absolute_error(y_true, preds)
r2 = r2_score(y_true, preds)
print(f"MAE: {mae:.2f} kcal | R²: {r2:.4f}")

# Write predictions in error to output file for statistical analysis
out = pd.DataFrame({
    "dish_id": dish_ids,
    "y_true": y_true,
    "y_pred": preds,
    "abs_error": np.abs(y_true - preds)
})
out.to_csv(f"../data/unimodal_errors.csv", index=False)

print("\n=== Results ===")
print(f"MAE = {mae:.2f} kcal | R² = {r2:.4f}")
