## Multimodal Early Fusion NN
#### Complex model: BiLSTM + MultiHeadAttention + EfficientNet + Residual Dense Blocks

In [2]:
import os
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, f1_score, classification_report

import tensorflow as tf
from tensorflow.keras.layers import (
    Input, Embedding, Bidirectional, LSTM, Dense, Dropout, Concatenate,
    GlobalAveragePooling1D, GlobalMaxPooling1D, BatchNormalization, Add,
    SpatialDropout1D, MultiHeadAttention, LayerNormalization
)
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications.efficientnet import preprocess_input as eff_preprocess

In [3]:
df = pd.read_csv("FoodFactsCleaned.csv")
df["nutriscore_letter"] = df["nutriscore_letter"].astype(int) 

In [4]:
TEXT_COLS = [
    "brand_cleaned",
    "allergens_cleaned",
    "ingredients_text_cleaned",
    "countries_cleaned",
    "additives_cleaned",
]

TABULAR_COLS = [
    'nova_group', 'fat_100g',
    'saturated_fat_100g', 'carbohydrates_100g', 'sugars_100g', 'fiber_100g',
    'proteins_100g', 'contains_palm_oil', 'vegetarian_status', 'vegan_status',
    'nutrient_level_fat', 'nutrient_level_saturated_fat',
    'nutrient_level_sugars', 'nutrient_level_salt', 'ecoscore_grade', 'ecoscore_score',
    'carbon_footprint_100g', 'additives_count', 'sugar_ratio',
    'energy_density', 'protein_ratio', 'macro_balance', 'healthy_score',
    'log_energy_kcal_100g', 'log_salt_100g'
]

TARGET_COL = "nutriscore_letter"       
IMAGE_COL = "image_160_path"   

RANDOM_STATE = 42
TEST_SIZE = 0.2

# Text tokenization
MAX_WORDS = 30000
MAX_LEN = 200

# Image settings
IMG_SIZE = (160, 160)

In [7]:
needed = TEXT_COLS + TABULAR_COLS + [TARGET_COL, IMAGE_COL]
# Ensure text columns are strings
for c in TEXT_COLS:
    df[c] = df[c].fillna("").astype(str)

# Concatenate text into one document per row
df["text_concat"] = df[TEXT_COLS].agg(" ".join, axis=1)

# Prepare arrays
X_text = df["text_concat"].values
X_tab  = df[TABULAR_COLS].values.astype(np.float32)
X_img  = df[IMAGE_COL].astype(str).values
y = df[TARGET_COL].values

num_classes = len(np.unique(y))
print("Classes:", num_classes)

Classes: 5


In [8]:
# ---- Global split: Train / Val / Test ----
X_text_tv, X_text_te, X_tab_tv,  X_tab_te, X_img_tv,  X_img_te, y_tv, y_te = train_test_split(
    X_text, X_tab, X_img, y,
    test_size=0.15,
    random_state=RANDOM_STATE,
    stratify=y
)

X_text_tr, X_text_val, X_tab_tr,  X_tab_val, X_img_tr,  X_img_val, y_tr, y_val = train_test_split(
    X_text_tv, X_tab_tv, X_img_tv, y_tv,
    test_size=0.1765,   # â‰ˆ 15% of total
    random_state=RANDOM_STATE,
    stratify=y_tv
)

print("Train:", len(y_tr), "Val:", len(y_val), "Test:", len(y_te))

Train: 3596 Val: 771 Test: 771


In [9]:
tokenizer = Tokenizer(num_words=MAX_WORDS, oov_token="<OOV>")
tokenizer.fit_on_texts(X_text_tr)

def tokenize_and_pad(texts):
    seq = tokenizer.texts_to_sequences(texts)
    return pad_sequences(seq, maxlen=MAX_LEN, padding="post", truncating="post")

X_text_tr_pad  = tokenize_and_pad(X_text_tr)
X_text_val_pad = tokenize_and_pad(X_text_val)
X_text_te_pad  = tokenize_and_pad(X_text_te)

vocab_size = min(MAX_WORDS, len(tokenizer.word_index) + 1)


In [10]:
scaler = StandardScaler()
X_tab_tr_sc  = scaler.fit_transform(X_tab_tr).astype(np.float32)
X_tab_val_sc = scaler.transform(X_tab_val).astype(np.float32)
X_tab_te_sc  = scaler.transform(X_tab_te).astype(np.float32)

tab_dim = X_tab_tr_sc.shape[1]


In [14]:
def load_image(path):
    img_bytes = tf.io.read_file(path)
    img = tf.io.decode_image(img_bytes, channels=3, expand_animations=False)
    img = tf.image.resize(img, IMG_SIZE)
    img = tf.cast(img, tf.float32)
    img = eff_preprocess(img)
    return img
