# Food Preference

## Persiapan Data

In [85]:
import pandas as pd

In [86]:
# Baca data dari file CSV
file_path = 'Food_Preference.csv'
df = pd.read_csv(file_path)
df.head()

Unnamed: 0,Timestamp,Participant_ID,Gender,Nationality,Age,Food,Juice,Dessert
0,2019/05/07 2:59:13 PM GMT+8,FPS001,Male,Indian,24,Traditional food,Fresh Juice,Maybe
1,2019/05/07 2:59:45 PM GMT+8,FPS002,Female,Indian,22,Western Food,Carbonated drinks,Yes
2,2019/05/07 3:00:05 PM GMT+8,FPS003,Male,Indian,31,Western Food,Fresh Juice,Maybe
3,2019/05/07 3:00:11 PM GMT+8,FPS004,Female,Indian,25,Traditional food,Fresh Juice,Maybe
4,2019/05/07 3:02:50 PM GMT+8,FPS005,Male,Indian,27,Traditional food,Fresh Juice,Maybe


In [87]:
columns_to_drop = ['Timestamp']
df = df.drop(columns=columns_to_drop)

In [88]:
df.head()

Unnamed: 0,Participant_ID,Gender,Nationality,Age,Food,Juice,Dessert
0,FPS001,Male,Indian,24,Traditional food,Fresh Juice,Maybe
1,FPS002,Female,Indian,22,Western Food,Carbonated drinks,Yes
2,FPS003,Male,Indian,31,Western Food,Fresh Juice,Maybe
3,FPS004,Female,Indian,25,Traditional food,Fresh Juice,Maybe
4,FPS005,Male,Indian,27,Traditional food,Fresh Juice,Maybe


In [89]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 288 entries, 0 to 287
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Participant_ID  288 non-null    object
 1   Gender          284 non-null    object
 2   Nationality     288 non-null    object
 3   Age             288 non-null    int64 
 4   Food            288 non-null    object
 5   Juice           288 non-null    object
 6   Dessert         288 non-null    object
dtypes: int64(1), object(6)
memory usage: 15.9+ KB


In [90]:
df.isnull().sum()

Participant_ID    0
Gender            4
Nationality       0
Age               0
Food              0
Juice             0
Dessert           0
dtype: int64

In [91]:
df.dropna(subset=['Gender'], inplace=True)

In [92]:
df.isnull().sum()

Participant_ID    0
Gender            0
Nationality       0
Age               0
Food              0
Juice             0
Dessert           0
dtype: int64

In [93]:
df.duplicated().sum()

0

## Pemrosesan Data

In [94]:
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [95]:
label_encoder = LabelEncoder()
df['Gender'] = label_encoder.fit_transform(df['Gender'])
df['Nationality'] = label_encoder.fit_transform(df['Nationality'])
df['Food'] = label_encoder.fit_transform(df['Food'])
df['Juice'] = label_encoder.fit_transform(df['Juice'])
df['Dessert'] = label_encoder.fit_transform(df['Dessert'])

In [96]:
# Mengonversi kolom 'Participant_ID' menjadi numerik menggunakan LabelEncoder
label_encoder_participant = LabelEncoder()
df['Participant_ID'] = label_encoder_participant.fit_transform(df['Participant_ID'])

In [97]:
# Membagi data menjadi data latih dan data uji
train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)

## Pembuatan dan Pelatihan Model

In [98]:
# Membangun model collaborative filtering dengan TensorFlow
embedding_size = 10

In [99]:
user_input = tf.keras.layers.Input(shape=(1,), name='user_input')
food_input = tf.keras.layers.Input(shape=(1,), name='food_input')

In [100]:
user_embedding = tf.keras.layers.Embedding(input_dim=df['Participant_ID'].nunique(), output_dim=embedding_size, input_length=1)(user_input)
food_embedding = tf.keras.layers.Embedding(input_dim=df['Food'].nunique(), output_dim=embedding_size, input_length=1)(food_input)

In [101]:
user_vecs = tf.keras.layers.Flatten()(user_embedding)
food_vecs = tf.keras.layers.Flatten()(food_embedding)

In [102]:
prod = tf.keras.layers.Dot(axes=1)([user_vecs, food_vecs])

In [103]:
model = tf.keras.models.Model(inputs=[user_input, food_input], outputs=prod)
model.compile(optimizer='adam', loss='mean_squared_error')

In [104]:
# Melatih model
model.fit([train_data['Participant_ID'], train_data['Food']], train_data['Dessert'], epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x1649bb97710>

## Evaluasi Model

In [105]:
# Evaluasi model
test_loss = model.evaluate([test_data['Participant_ID'], test_data['Food']], test_data['Dessert'])
print(f'Test Loss: {test_loss}')

Test Loss: 2.053056240081787


## Prediksi

In [106]:
# Membuat prediksi untuk partisipan baru
new_participant = pd.DataFrame({'Participant_ID': ['FPS003'], 'Food': ['Traditional food']})
new_participant['Food'] = label_encoder.transform(new_participant['Food'])
prediction = model.predict([label_encoder.transform(new_participant['Participant_ID']), new_participant['Food']])
print(f'Prediction for new participant: {prediction}')

ValueError: y contains previously unseen labels: 'Traditional food'

## Ekspor Model

In [107]:
# Simpan model ke dalam format HDF5
model.save("path/to/model.h5")

  saving_api.save_model(


In [108]:
# Simpan model ke dalam format SavedModel
model.save("path/to/saved_model")

INFO:tensorflow:Assets written to: path/to/saved_model\assets


INFO:tensorflow:Assets written to: path/to/saved_model\assets
