In [7]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import LSTM, Dense, Input, Embedding, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.compose import ColumnTransformer

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [8]:
def ReadData():
    return pd.read_excel("/content/drive/MyDrive/Project/food_table.xlsx")
data = ReadData()

# Preprocess text data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(data['name'])
vocab_size = len(tokenizer.word_index) + 1

# Convert text to sequences
sequences = tokenizer.texts_to_sequences(data['name'])
max_sequence_len = max(len(seq) for seq in sequences)
padded_sequences = pad_sequences(sequences, maxlen=max_sequence_len)

In [9]:
# Prepare input-output pairs
X = padded_sequences
y_num = data[['calories', 'carb', 'fat', 'protein', 'size']].values

# Apply standard scaling to numerical features
scaler = StandardScaler()
y_num = scaler.fit_transform(y_num)

# Convert categorical target variable to numerical representation
label_encoder = LabelEncoder()
y_cat = label_encoder.fit_transform(data['type_size'])

X_train, X_val, y_num_train, y_num_val, y_cat_train, y_cat_val = train_test_split(
    X, y_num, y_cat, test_size=0.2, random_state=42)

In [39]:
# Step 2: LSTM Model with Multiple Outputs
input_layer = Input(shape=(max_sequence_len,))
embedding_layer = Embedding(vocab_size, 100)(input_layer)
lstm_layer = LSTM(128)(embedding_layer)
dense_layer1 = Dense(64, activation='relu')(lstm_layer)
# Numerical output
num_output = Dense(5, activation='linear', name='numerical_output')(dense_layer1)
# Categorical output
cat_output = Dense(len(label_encoder.classes_), activation='softmax', name='categorical_output')(dense_layer1)


k_folds = 5
# Create lists to store the accuracy scores for numerical and categorical outputs
num_scores = []
cat_scores = []

# Perform K-Fold cross-validation
kfold = KFold(n_splits=k_folds, shuffle=True)

for train_indices, test_indices in kfold.split(X_train):
    # Split the data into training and testing sets for each fold
    X_train_fold, X_test_fold = X_train[train_indices], X_train[test_indices]
    y_train_num_fold, y_train_cat_fold = y_num_train[train_indices], y_cat_train[train_indices]
    y_test_num_fold, y_test_cat_fold = y_num_train[test_indices], y_cat_train[test_indices]

      # Build and compile the model for each fold
    model = Model(inputs=input_layer, outputs=[num_output, cat_output])
    model.compile(loss=['mean_squared_error', 'sparse_categorical_crossentropy'],
                  optimizer='adam',
                  metrics={'numerical_output': 'accuracy', 'categorical_output': 'accuracy'})
    
    # Train the model on the current fold
    model.fit(X_train_fold, [y_train_num_fold, y_train_cat_fold], epochs=50, batch_size=32)
    
    # Evaluate the model on the test fold
    scores = model.evaluate(X_test_fold, [y_test_num_fold, y_test_cat_fold])
  


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/5

In [45]:
user_input = input('Enter a sentence: ')
input_seq = tokenizer.texts_to_sequences([user_input])
input_seq = pad_sequences(input_seq, maxlen=max_sequence_len)
num_output_pred, cat_output_pred = model.predict(input_seq)

# Rescale the numerical output
num_output_pred = scaler.inverse_transform(num_output_pred.reshape(1, -1)).reshape(-1)
# Convert categorical output to original labels
cat_output_pred = label_encoder.inverse_transform(np.argmax(cat_output_pred, axis=1))

print("Calories: ", num_output_pred[0])
print("Carb: ", num_output_pred[1])
print("Fat: ", num_output_pred[2])
print("Protein: ", num_output_pred[3])
print("Size: ", num_output_pred[4])
print("Type_Size: ", cat_output_pred[0])

Enter a sentence: مكرونه بالصوص الابيض
Calories:  201.04868
Carb:  39.9992
Fat:  5.7482204
Protein:  6.909592
Size:  1.0804374
Type_Size:  كوب
