In [1]:
import os, json, math, random, pathlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import (ModelCheckpoint, ReduceLROnPlateau,
                                        EarlyStopping, CSVLogger)
from tensorflow.keras.metrics import TopKCategoricalAccuracy
from tensorflow.keras.applications import efficientnet

from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix

# Optional mixed precision if you have a GPU (comment out if it causes issues on CPU-only)
# try:
#     tf.keras.mixed_precision.set_global_policy("mixed_float16")
# except Exception:
#     pass

SEED = 42
IMG_SIZE = 300   # EfficientNetB3 default
BATCH_SIZE = 24  # adjust down if you hit OOM
VAL_FRACTION = 0.1
EPOCHS_HEAD = 8
EPOCHS_FT = 15
DATA_ROOT = "../data/raw/food-101/images"
TRAIN_CSV = "../data/clean/train_cleaned.csv"
TEST_CSV  = "../data/clean/test_cleaned.csv"
LABEL_MAP_JSON = "../data/clean/label_map.json"
MODEL_HEAD = "../data/clean/effb3_head.keras"
MODEL_FT   = "../data/clean/effb3_finetuned.keras"
CSV_LOG    = "../data/clean/training_log.csv"

np.random.seed(SEED)
tf.random.set_seed(SEED)

print("TF version:", tf.__version__)
print("Mixed precision:", tf.keras.mixed_precision.global_policy())
print("GPU available:", tf.config.list_physical_devices('GPU'))

TF version: 2.17.0
Mixed precision: <DTypePolicy "float32">
GPU available: []


In [2]:
# Load CSVs
train_df = pd.read_csv(TRAIN_CSV)
test_df  = pd.read_csv(TEST_CSV)

# Build absolute image paths from image_name
def make_path(row):
    # image_name already like "label/12345" (without .jpg)
    return os.path.join(DATA_ROOT, f"{row['image_name']}.jpg")

train_df["image_path"] = train_df.apply(make_path, axis=1)
test_df["image_path"]  = test_df.apply(make_path, axis=1)

# Quick sanity checks
print("Train rows:", len(train_df), " | Test rows:", len(test_df))
missing_train = (~train_df["image_path"].apply(os.path.exists)).sum()
missing_test  = (~test_df["image_path"].apply(os.path.exists)).sum()
print(f"Missing train images: {missing_train} | Missing test images: {missing_test}")

# Filter out any missing files (should be zero ideally)
if missing_train > 0:
    train_df = train_df[train_df["image_path"].apply(os.path.exists)].reset_index(drop=True)
if missing_test > 0:
    test_df = test_df[test_df["image_path"].apply(os.path.exists)].reset_index(drop=True)

# Encode labels from TRAIN ONLY (fit on train labels)
le = LabelEncoder()
train_df["label_idx"] = le.fit_transform(train_df["label"])
num_classes = len(le.classes_)
print("Num classes:", num_classes)

# Map test labels -> idx (labels should be a subset of training labels)
test_df["label_idx"] = le.transform(test_df["label"])
label_map = {str(cls): int(idx) for cls, idx in zip(le.classes_, le.transform(le.classes_))}
with open(LABEL_MAP_JSON, "w") as f:
    json.dump(label_map, f)
print("Saved label_map to:", LABEL_MAP_JSON)
print("Sample label_map items:", list(label_map.items())[:8])

Train rows: 83250  | Test rows: 25250
Missing train images: 0 | Missing test images: 0
Num classes: 101
Saved label_map to: ../data/clean/label_map.json
Sample label_map items: [('apple_pie', 0), ('baby_back_ribs', 1), ('baklava', 2), ('beef_carpaccio', 3), ('beef_tartare', 4), ('beet_salad', 5), ('beignets', 6), ('bibimbap', 7)]


In [3]:
# Build Stratified train/val split on the training data
sss = StratifiedShuffleSplit(n_splits=1, test_size=VAL_FRACTION, random_state=SEED)
train_idx, val_idx = next(sss.split(train_df["image_path"], train_df["label_idx"]))

train_df_s = train_df.iloc[train_idx].reset_index(drop=True)
val_df_s   = train_df.iloc[val_idx].reset_index(drop=True)

print("Train split:", train_df_s.shape, "Val split:", val_df_s.shape)
print("Label distribution (train) top-5:\n", train_df_s["label"].value_counts().head())
print("Label distribution (val)   top-5:\n", val_df_s["label"].value_counts().head())

Train split: (74925, 14) Val split: (8325, 14)
Label distribution (train) top-5:
 label
churros             1350
baklava             1350
mussels             1350
huevos_rancheros    1350
tiramisu            1350
Name: count, dtype: int64
Label distribution (val)   top-5:
 label
churros         150
creme_brulee    150
mussels         150
tiramisu        150
falafel         150
Name: count, dtype: int64


In [4]:
# EfficientNet preprocess
preprocess_fn = efficientnet.preprocess_input  # scales to [-1,1]

# Keras native augmentation layers (fast, on-GPU)
data_augment = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.08),
    layers.RandomZoom(0.1),
])

def decode_resize(path, label_idx, augment=False):
    img = tf.io.read_file(path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, [IMG_SIZE, IMG_SIZE], antialias=True)
    img = preprocess_fn(img)
    if augment:
        img = data_augment(img)
    return img, label_idx

def make_ds(df, batch, shuffle=False, augment=False, cache=True):
    ds = tf.data.Dataset.from_tensor_slices((df["image_path"].values, df["label_idx"].values))
    if shuffle:
        ds = ds.shuffle(min(len(df), 10_000), seed=SEED, reshuffle_each_iteration=True)
    ds = ds.map(lambda p, y: decode_resize(p, y, augment=augment), num_parallel_calls=tf.data.AUTOTUNE)
    if cache:
        ds = ds.cache()
    ds = ds.batch(batch).prefetch(tf.data.AUTOTUNE)
    return ds

train_ds = make_ds(train_df_s, BATCH_SIZE, shuffle=True, augment=True)
val_ds   = make_ds(val_df_s,   BATCH_SIZE, shuffle=False, augment=False)
test_ds  = make_ds(test_df,    BATCH_SIZE, shuffle=False, augment=False)

# Peek
for x_batch, y_batch in train_ds.take(1):
    print("Batch:", x_batch.shape, y_batch.shape)

Batch: (8, 260, 260, 3) (8,)


2025-11-05 23:42:42.545254: W tensorflow/core/kernels/data/cache_dataset_ops.cc:913] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.
2025-11-05 23:42:42.547823: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [5]:
# Compute class weights (optional but helpful for imbalanced classes)
counts = train_df_s["label_idx"].value_counts().sort_index().values
max_count = counts.max()
class_weights = {i: float(max_count / c) for i, c in enumerate(counts)}
print("Example class_weights (first 10):", dict(list(class_weights.items())[:10]))

Example class_weights (first 10): {0: 2.0, 1: 2.0, 2: 1.0, 3: 2.0, 4: 2.0, 5: 2.0, 6: 2.0, 7: 2.0, 8: 2.0, 9: 2.0}


In [6]:
base = efficientnet.EfficientNetB3(include_top=False, weights="imagenet",
                                   input_shape=(IMG_SIZE, IMG_SIZE, 3))
base.trainable = False  # freeze

inputs = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
x = base(inputs, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.35)(x)
# IMPORTANT: in mixed precision, set dtype for the final Dense to float32 for numerical stability
outputs = layers.Dense(num_classes, activation="softmax", dtype="float32")(x)
model = Model(inputs, outputs, name="Food101_EfficientNetB3")

from tensorflow.keras.metrics import SparseTopKCategoricalAccuracy

model.compile(
    optimizer=Adam(learning_rate=1e-3),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy", SparseTopKCategoricalAccuracy(k=5, name="top5")]
)

model.summary()

In [7]:
for x, y in train_ds.take(1):
    print("x:", x.shape, " y:", y.shape, " dtype:", y.dtype)

x: (8, 260, 260, 3)  y: (8,)  dtype: <dtype: 'int64'>


2025-11-05 23:42:43.333099: W tensorflow/core/kernels/data/cache_dataset_ops.cc:913] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.
2025-11-05 23:42:43.343012: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [None]:
callbacks_head = [
    ModelCheckpoint(MODEL_HEAD, monitor="val_accuracy", save_best_only=True, verbose=1),
    ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=2, verbose=1),
    EarlyStopping(monitor="val_loss", patience=4, restore_best_weights=True, verbose=1),
    CSVLogger(CSV_LOG, append=False)
]

hist_head = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS_HEAD,
    class_weight=class_weights,
    callbacks=callbacks_head
)

model.save(MODEL_HEAD.replace(".keras", "_last.keras"))
print("Head training complete. Saved:", MODEL_HEAD)

Epoch 1/8
[1m4458/9366[0m [32m━━━━━━━━━[0m[37m━━━━━━━━━━━[0m [1m24:38[0m 301ms/step - accuracy: 0.3537 - loss: 5.0075 - top5: 0.6158

In [None]:
# Unfreeze last ~100 layers for fine-tuning (tune this number if needed)
for layer in base.layers[-100:]:
    if not isinstance(layer, layers.BatchNormalization):
        layer.trainable = True

# Compile with lower LR
model.compile(
    optimizer=Adam(learning_rate=1e-5),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy", TopKCategoricalAccuracy(k=5, name="top5")]
)

callbacks_ft = [
    ModelCheckpoint(MODEL_FT, monitor="val_accuracy", save_best_only=True, verbose=1),
    ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=2, verbose=1),
    EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True, verbose=1),
    CSVLogger(CSV_LOG, append=True)
]

hist_ft = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS_FT,
    class_weight=class_weights,
    callbacks=callbacks_ft
)

model.save(MODEL_FT.replace(".keras", "_last.keras"))
print("Fine-tuning complete. Saved:", MODEL_FT)

In [None]:
def plot_curves(history_list, keys=("accuracy","val_accuracy")):
    plt.figure(figsize=(6,4))
    for h in history_list:
        plt.plot(h.history[keys[0]], label=f"train {keys[0]}")
        plt.plot(h.history[keys[1]], label=f"val {keys[1]}")
    plt.legend(); plt.title(" / ".join(keys)); plt.xlabel("Epoch"); plt.ylabel("Score"); plt.show()

plot_curves([hist_head, hist_ft], ("accuracy", "val_accuracy"))
plot_curves([hist_head, hist_ft], ("top5", "val_top5"))
plot_curves([hist_head, hist_ft], ("loss", "val_loss"))

In [None]:
test_metrics = model.evaluate(test_ds, verbose=1)
print("\nTest metrics [loss, top1 acc, top5 acc]:", test_metrics)

In [None]:
# Collect preds/targets (may take a few minutes on CPU)
y_true = []
y_pred = []
for xb, yb in test_ds:
    logits = model.predict(xb, verbose=0)
    y_true.extend(yb.numpy().tolist())
    y_pred.extend(np.argmax(logits, axis=1).tolist())

print("Classes:", len(le.classes_))
print(classification_report(y_true, y_pred, target_names=le.classes_, digits=3)[:2000])  # truncated print

In [None]:
cm = confusion_matrix(y_true, y_pred, labels=list(range(num_classes)))
cm_df = pd.DataFrame(cm, index=le.classes_, columns=le.classes_)
cm_df.to_csv("../data/clean/confusion_matrix.csv")
print("Saved confusion_matrix.csv")

In [None]:
from rapidfuzz import process as rf_process, fuzz as rf_fuzz
import re
from PIL import Image

USDA_CSV = "../data/raw/usda_food_data.csv"
usda_df = pd.read_csv(USDA_CSV)

def normalize_text(s):
    s = str(s).lower()
    s = re.sub(r'[^a-z\s]', ' ', s)
    s = re.sub(r'\s+', ' ', s).strip()
    return s

usda_df["normalized_description"] = usda_df["description"].astype(str).apply(normalize_text)
usda_names = usda_df["normalized_description"].tolist()

def find_nutrition_for_label(food_label, score_cutoff=75):
    q = food_label.replace("_", " ")
    match = rf_process.extractOne(q, usda_names, scorer=rf_fuzz.token_sort_ratio, score_cutoff=score_cutoff)
    if match:
        hit = usda_df.iloc[usda_df.index[usda_df["normalized_description"] == match[0]][0]]
        return {
            "description": hit.get("description"),
            "calories": hit.get("calories"),
            "protein": hit.get("protein"),
            "fat": hit.get("fat"),
            "carbohydrates": hit.get("carbohydrates"),
            "score": match[1]
        }
    return None

# Demo: predict a few random test images
samples = random.sample(test_df["image_path"].tolist(), 5)
for p in samples:
    # Load + predict
    img_raw = tf.io.read_file(p)
    img = tf.image.decode_jpeg(img_raw, channels=3)
    img_resized = tf.image.resize(img, [IMG_SIZE, IMG_SIZE])
    img_pp = efficientnet.preprocess_input(img_resized)
    pred = model.predict(tf.expand_dims(img_pp, 0), verbose=0)
    pred_idx = int(np.argmax(pred, axis=1)[0])
    pred_label = le.inverse_transform([pred_idx])[0]

    print(f"\nImage: {p}")
    print("Predicted:", pred_label)
    info = find_nutrition_for_label(pred_label)
    if info:
        print("USDA match:", info["description"], f"(score={info['score']})")
        print(f"Calories: {info['calories']} | Protein: {info['protein']} | Fat: {info['fat']} | Carbs: {info['carbohydrates']}")
    else:
        print("No close USDA match found.")

    # Show image
    try:
        display(Image.open(p))
    except Exception:
        pass