In [None]:
import numpy as np
from tensorflow import keras
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Embedding, Input, Flatten


In [None]:
reviews_data = {
    "review_text": ["The food was great, but it took forever to get seated.", "The tacos were life changing.", "This food made me question the presence of my taste buds."],
    "meal_type": ["lunch", "dinner", "dinner"],
    "meal_total": [50, 75, 60],
    "rating": [4, 5, 1]
}


In [None]:

# ──────────────────────────────────────────────────────────────────────────────
# Text preprocessing with a fixed vocabulary size
# ──────────────────────────────────────────────────────────────────────────────
# Limit tokenizer to the top 50 most frequent words
vocab_size = 50
tokenize = keras.preprocessing.text.Tokenizer(num_words=vocab_size)
# Build the word‐to‐index mapping based on review_text
tokenize.fit_on_texts(reviews_data["review_text"])

# Convert each review into a sequence of integer token IDs
reviews_train = tokenize.texts_to_sequences(reviews_data["review_text"])
# Enforce a fixed sequence length of 20 tokens per review
max_sequence_len = 20
# Pad shorter sequences with 0s at the end ("post") to reach length 20
reviews_train = keras.preprocessing.sequence.pad_sequences(
    reviews_train, maxlen=max_sequence_len, padding="post"
)

# Inspect the padded integer sequences
print(reviews_train)

# ──────────────────────────────────────────────────────────────────────────────
# One-hot encoding of categorical “meal_type” feature
# ──────────────────────────────────────────────────────────────────────────────
possible_meal_vocab = ['breakfast', 'lunch', 'dinner']
one_hot_meals = []
for meal in reviews_data['meal_type']:
    # start with all zeros
    one_hot_arr = [0] * len(possible_meal_vocab)
    # set the index corresponding to this meal to 1
    one_hot_arr[possible_meal_vocab.index(meal)] = 1
    one_hot_meals.append(one_hot_arr)

# Combine one-hot vectors with the numeric “meal_total” feature
tabular_features = np.concatenate(
    (
        np.array(one_hot_meals),                                   # shape: (batch, 3)
        np.expand_dims(reviews_data["meal_total"], axis=1),        # shape: (batch, 1)
    ),
    axis=1,                                                        # final shape: (batch, 4)
)

# View the assembled tabular feature matrix
print(tabular_features)


In [None]:

# ──────────────────────────────────────────────────────────────────────────────
# Define inputs and embedding for the NLP branch
# ──────────────────────────────────────────────────────────────────────────────
batch_size = len(reviews_data['review_text'])  # number of samples

# Input layer for text sequences of length 20
embedding_input = Input(shape=(max_sequence_len,))
# Embedding layer: maps each token ID to a 64-dim dense vector
# Note: input_dim=batch_size is just for demo; in practice use vocab_size+1
embedding_layer = Embedding(input_dim=batch_size, output_dim=64)(embedding_input)

# ──────────────────────────────────────────────────────────────────────────────
# Define inputs and dense transform for the tabular branch
# ──────────────────────────────────────────────────────────────────────────────
tabular_input = Input(shape=(tabular_features.shape[1],))  # (batch, 4)
# Dense layer to process combined one-hot + numeric features
tabular_layer = Dense(32, activation='relu')(tabular_input)

# ──────────────────────────────────────────────────────────────────────────────
# Merge the two branches and build the final model
# ──────────────────────────────────────────────────────────────────────────────
# Concatenate text embeddings (batch, 20, 64) with tabular output (batch, 4)
merged_input = keras.layers.concatenate([embedding_layer, tabular_layer])
# Add a hidden dense layer
merged_dense = Dense(16, activation='relu')(merged_input)
# Final output: single continuous prediction (e.g. rating)
output = Dense(1)(merged_dense)

# Instantiate the Model with two inputs and one output
model = Model(inputs=[embedding_input, tabular_input], outputs=output)

# Preview architecture: input/output shapes and parameter counts


In [None]:
# Preview the model architecture
model.summary()