# Training Model AI Generated Menu Morning Box

In [13]:
# Import Library

# Pandas and Numpy are used for data manipulation and preprocessing
import pandas as pd
import numpy as np

# OneHotEncoder and MultiLabelBinarizer for encoding categorical variables and splitting the data into training and testing sets
from sklearn.preprocessing import OneHotEncoder, MultiLabelBinarizer
from sklearn.model_selection import train_test_split

# TensorFlow with Keras for building the neural network model
import tensorflow as tf
from tensorflow.keras import layers

In [18]:
# Load Dataset
data = pd.read_csv('Dataset.csv', sep=';')


In [19]:
print(data.head())

   id survey_1 survey_2 survey_3 survey_4 survey_5  \
0   1        A        B        A        A        B   
1   2        B        A        B        B        A   
2   3        A        B        B        A        B   
3   4        B        A        A        B        A   
4   5        A        B        A        B        B   

                     survey_6                monday               tuesday  \
0    Telur, Kacang - kacangan  3tqsB3n37DmaFcFq2v6X  82PPspsOvyVK034pq52a   
1            Seafood, Kedelai  QqfyjPHylFOFrx82jA2O  T6aI1OQQaFx8BzDDtnE6   
2               Ikan, Seafood  y4RgqqYAHBCnGxBMkZTS  y6MzIGk0oHsAEDJRCbBE   
3              Telur, Kedelai  Gd3js09RMKe02enHWPya  LOK0YSMFj5c5ov2IvxM2   
4  Kacang - kacangan, Seafood  uC3UbmTGj3ymYIjd7PCa  uKPyjZSkUy1y349n9uhu   

              wednesday              thursday                friday  \
0  B96GahULQbBIPVBC3LHj  FL7IqcGuEfgZcAGnMAqW  Gd3js09RMKe02enHWPya   
1  WtCirfXQ0ZK99opIGTaG  uC3UbmTGj3ymYIjd7PCa  uKPyjZSkUy1y349n9uhu   

In [20]:
column_names = ['survey_1', 'survey_2', 'survey_3', 'survey_4', 'survey_5']

for column in column_names:
    if column in data.columns:
        data[column] = data[column].apply(lambda x: 0 if x == 'A' else 1)
    else:
        print(f"Column '{column}' not found in DataFrame.")


In [21]:
# Convert survey answers to numeric
data[['survey_1', 'survey_2', 'survey_3', 'survey_4', 'survey_5']] = data[[
    'survey_1','survey_2', 'survey_3', 'survey_4', 'survey_5']
    ].applymap(lambda x: 0 if x == 'A' else 1)


In [24]:
# One-hot encoding for allergy column
# The code below is for preprocessing the allergy column of the dataset

# Convert non-string values to empty strings and remove the square brackets and double quotes
data['survey_6'] = data['survey_6'].apply(lambda x: x.strip(
    '[]').replace('"', '').split(',') if isinstance(x, str) else '')

# Initialize the MultiLabelBinarizer
mlb = MultiLabelBinarizer()

# Apply one-hot encoding to the allergy column using MultiLabelBinarizer
encoded_allergies = mlb.fit_transform(data['survey_6'])

# Create column names
allergy_columns = ['allergy_' + col for col in mlb.classes_]

# Convert the encoded allergies array to a DataFrame and concat
encoded_allergies_df = pd.DataFrame(encoded_allergies, columns=allergy_columns)
data = pd.concat([data, encoded_allergies_df], axis=1)


In [25]:
# Create target labels
target_columns = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']
y = data[target_columns]


In [26]:
# Encode target labels
encoder = OneHotEncoder()
y_encoded = encoder.fit_transform(y).toarray()

In [27]:
# Split data into training and testing sets
X = data[['survey_1', 'survey_2', 'survey_3', 'survey_4', 'survey_5'] + allergy_columns].values
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

In [28]:
# Build and train the model
# Create a sequential model, which is a linear stack of layers
model = tf.keras.Sequential([
    # Add a dense layer with 64 neurons, ReLU activation function, and input shape matching the number of features in X_train
    layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    # Add another dense layer with 64 neurons and ReLU activation function
    layers.Dense(64, activation='relu'),
    # Add the output layer with neurons equal to the number of categories in y_encoded and a softmax activation function
    layers.Dense(y_encoded.shape[1], activation='softmax')
])


In [29]:
# Configure the model for training by specifying the optimizer, loss function, and metric to be used
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model using the training data (X_train, y_train), validating it against the test data (X_test, y_test)
# The training process will run for 20 epochs, with a batch size of 32 samples
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=20, batch_size=32)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [30]:
# Make predictions
sample_input = np.array([X_test[0]])  # Replace with your input data
predictions = model.predict(sample_input)
recommendations = encoder.inverse_transform(predictions)




In [31]:
# Print recommendations
day_names = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']
recommended_meals = {day: meal for day, meal in zip(day_names, recommendations[0])}
print(recommended_meals)


{'monday': 'FL7IqcGuEfgZcAGnMAqW', 'tuesday': 'T6aI1OQQaFx8BzDDtnE6', 'wednesday': 'zMAK1zOqQH3ayYtVfjTZ', 'thursday': 'uC3UbmTGj3ymYIjd7PCa', 'friday': 'T6aI1OQQaFx8BzDDtnE6', 'saturday': 'xlCldGyo9qpLZeJkvpFL', 'sunday': 'WtCirfXQ0ZK99opIGTaG'}
