In [2]:
import numpy as np
from tensorflow import keras
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Embedding, Input, Flatten


2025-07-06 14:33:11.306279: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-07-06 14:33:11.324539: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-07-06 14:33:11.454938: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-07-06 14:33:11.566081: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1751805191.661189    8724 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1751805191.68

In [3]:
reviews_data = {
    "review_text": ["The food was great, but it took forever to get seated.", "The tacos were life changing.", "This food made me question the presence of my taste buds."],
    "meal_type": ["lunch", "dinner", "dinner"],
    "meal_total": [50, 75, 60],
    "rating": [4, 5, 1]
}
""" 
The multimodal input design pattern addresseses the challenge of representing different types of data or data with complex structures
The key idea is to concatenate all available data representations, such as text, images, numerical features or categorical variables into a single combined input for a model.
This approach enables machine learning models to make use of diverse information sources, within a unified framework.
"""

"""

In a restaurant review example, the model can use both numerical and categorical metadata about the meal

- Categorical data: meal type (lunch, dinner) => one-hot encoded [0, 0 , 1] for dinner
- Numerical data: meal total (price) (30.5 euro)
- Combined feature vector => concatenate the one-hot vector and the numerical vector: [0, 0, 1, 30.5]
"""


# Define the input layers
""" 
What is a Flatten Layer?
========================
The Flatten layer reshapes multi-dimensional data into a single vector for each input sample.
This is often needed before:
- Concatenating with other features
- Passing data to dense layers

How It Works - Example:
----------------------
Let's say after an Embedding layer you have:
- Output shape: (batch_size, 4, 3)
- Meaning: For each sample, you have 4 tokens, each represented by a 3-dimensional embedding

Before Flattening:
# One sample looks like this (a 4x3 matrix):
# [
#   [0.1, 0.2, 0.3],   # Token 1
#   [0.4, 0.5, 0.6],   # Token 2
#   [0.7, 0.8, 0.9],   # Token 3
#   [1.0, 1.1, 1.2]    # Token 4
# ]

After Flattening:
# The same sample becomes a single vector:
# [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2]
# Length: 4 × 3 = 12

Why Use It?
-----------
The Flatten layer is crucial because:
1. Dense layers expect 1D input - they can't directly process 2D or 3D tensors
2. Feature concatenation - when combining different data types (text, numerical, categorical), 
   you need everything in the same format
3. Simplifies architecture - converts complex tensor shapes into simple vectors

Example Usage:
-------------
# For images (28x28 grayscale)
layers.Flatten(input_shape=(28, 28))
# This converts a 28x28 matrix into a 784-element vector

Key Insight:
-----------
The Flatten layer converts each sample from a matrix or tensor into a single vector, 
making it easier to combine with other features.
"""








In [9]:

# ──────────────────────────────────────────────────────────────────────────────
# Text preprocessing with a fixed vocabulary size
# ──────────────────────────────────────────────────────────────────────────────
# Limit tokenizer to the top 50 most frequent words
vocab_size = 50
tokenize = keras.preprocessing.text.Tokenizer(num_words=vocab_size)
# Build the word‐to‐index mapping based on review_text
tokenize.fit_on_texts(reviews_data["review_text"])

# Convert each review into a sequence of integer token IDs
reviews_train = tokenize.texts_to_sequences(reviews_data["review_text"])
# Enforce a fixed sequence length of 20 tokens per review
max_sequence_len = 20
# Pad shorter sequences with 0s at the end ("post") to reach length 20
reviews_train = keras.preprocessing.sequence.pad_sequences(
    reviews_train, maxlen=max_sequence_len, padding="post"
)



# word = tokenize.index_word[reviews_train[0][0]]  # Get the word corresponding to the first token ID
# print(word)



# Inspect the padded integer sequences
print(reviews_train)

# ──────────────────────────────────────────────────────────────────────────────
# One-hot encoding of categorical “meal_type” feature
# ──────────────────────────────────────────────────────────────────────────────
possible_meal_vocab = ['breakfast', 'lunch', 'dinner']
one_hot_meals = []
for meal in reviews_data['meal_type']:
    # start with all zeros
    one_hot_arr = [0] * len(possible_meal_vocab)
    # set the index corresponding to this meal to 1
    one_hot_arr[possible_meal_vocab.index(meal)] = 1
    one_hot_meals.append(one_hot_arr)

# Combine one-hot vectors with the numeric “meal_total” feature
tabular_features = np.concatenate(
    (
        np.array(one_hot_meals),                                   # shape: (batch, 3)
        np.expand_dims(reviews_data["meal_total"], axis=1),        # shape: (batch, 1)
    ),
    axis=1,                                                        # final shape: (batch, 4)
)
print("one hot meal:", one_hot_meals)

# View the assembled tabular feature matrix
print(tabular_features)


[[ 1  2  3  4  5  6  7  8  9 10 11  0  0  0  0  0  0  0  0  0]
 [ 1 12 13 14 15  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [16  2 17 18 19  1 20 21 22 23 24  0  0  0  0  0  0  0  0  0]]
one hot meal: [[0, 1, 0], [0, 0, 1], [0, 0, 1]]
[[ 0  1  0 50]
 [ 0  0  1 75]
 [ 0  0  1 60]]


In [None]:

# ──────────────────────────────────────────────────────────────────────────────
# Define inputs and embedding for the NLP branch
# ──────────────────────────────────────────────────────────────────────────────
batch_size = len(reviews_data['review_text'])  # number of samples

# Input layer for text sequences of length 20
embedding_input = Input(shape=(max_sequence_len,))
# Embedding layer: maps each token ID to a 64-dim dense vector
# Note: input_dim=batch_size is just for demo; in practice use vocab_size+1
embedding_layer = Embedding(input_dim=batch_size, output_dim=64)(embedding_input)

# ──────────────────────────────────────────────────────────────────────────────
# Define inputs and dense transform for the tabular branch
# ──────────────────────────────────────────────────────────────────────────────
tabular_input = Input(shape=(tabular_features.shape[1],))  # (batch, 4)
# Dense layer to process combined one-hot + numeric features
tabular_layer = Dense(32, activation='relu')(tabular_input)

# ──────────────────────────────────────────────────────────────────────────────
# Merge the two branches and build the final model
# ──────────────────────────────────────────────────────────────────────────────
# Concatenate text embeddings (batch, 20, 64) with tabular output (batch, 4)
merged_input = keras.layers.concatenate([embedding_layer, tabular_layer])
# Add a hidden dense layer
merged_dense = Dense(16, activation='relu')(merged_input)
# Final output: single continuous prediction (e.g. rating)
output = Dense(1)(merged_dense)

# Instantiate the Model with two inputs and one output
model = Model(inputs=[embedding_input, tabular_input], outputs=output)

# Preview architecture: input/output shapes and parameter counts


In [None]:
# Preview the model architecture
model.summary()