In [13]:
import json
import pandas as pd
import re
import nltk
import tensorflow as tf
import numpy as np
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from gensim.models import Word2Vec
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras import layers, models, regularizers
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Load dataset
file_path = 'all_data_resep.csv'
data = pd.read_csv(file_path)

# Tambahkan threshold untuk kolom 'Loves'
threshold = 100
data['Label'] = (data['Loves'] >= threshold).astype(int)

In [14]:
# Bersihkan teks
nltk.download('wordnet')
nltk.download('stopwords')

def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    words = text.split("--")
    stop_words = set(stopwords.words('indonesian'))
    words = [word for word in words if word not in stop_words]
    lemmatizer = WordNetLemmatizer()
    words = [lemmatizer.lemmatize(word) for word in words]
    return ' '.join(words)

data['Title'].apply(clean_text)
data['Cleaned_Text'] = data['Title'].apply(clean_text)
data.head(3)

[nltk_data] Downloading package wordnet to C:\Users\Wildan
[nltk_data]     Septian\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to C:\Users\Wildan
[nltk_data]     Septian\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Unnamed: 0,Title,Ingredients,Steps,Loves,URL,Label,Cleaned_Text
0,Ayam Woku Manado,1 Ekor Ayam Kampung (potong 12)--2 Buah Jeruk ...,Cuci bersih ayam dan tiriskan. Lalu peras jeru...,1,/id/resep/4473027-ayam-woku-manado,0,ayam woku manado
1,Ayam goreng tulang lunak,1 kg ayam (dipotong sesuai selera jangan kecil...,"Haluskan bumbu2nya (BaPut, ketumbar, kemiri, k...",1,/id/resep/4471956-ayam-goreng-tulang-lunak,0,ayam goreng tulang lunak
2,Ayam cabai kawin,1/4 kg ayam--3 buah cabai hijau besar--7 buah ...,Panaskan minyak di dalam wajan. Setelah minyak...,2,/id/resep/4473057-ayam-cabai-kawin,0,ayam cabai kawin


In [15]:
# Menghapus emoji 
def clean_emoji(text):
  if text is not None and isinstance(text, str):
    emoji_pattern = re.compile("["
        u"\U0001F600-\U0001F64F"
        u"\U0001F300-\U0001F5FF"
        u"\U0001F680-\U0001F6FF"
        u"\U0001F700-\U0001F77F"
        u"\U0001F780-\U0001F7FF"
        u"\U0001F800-\U0001F8FF"
        u"\U0001F900-\U0001F9FF"
        u"\U0001FA00-\U0001FA6F"
        u"\U0001FA70-\U0001FAFF"
        u"\U00002702-\U000027B0"
        u"\U000024C2-\U0001F251"
        "]+", flags=re.UNICODE)
    return emoji_pattern.sub(r'', text)

for col in ['Ingredients', 'Title', 'Steps']:
    data[col] = data[col].fillna('').apply(clean_emoji)
data.head(1)

Unnamed: 0,Title,Ingredients,Steps,Loves,URL,Label,Cleaned_Text
0,Ayam Woku Manado,1 Ekor Ayam Kampung (potong 12)--2 Buah Jeruk ...,Cuci bersih ayam dan tiriskan. Lalu peras jeru...,1,/id/resep/4473027-ayam-woku-manado,0,ayam woku manado


In [16]:
# Initialize TF-IDF Vectorizer
tfidf_vectorizer = TfidfVectorizer(max_features=500)
tfidf_matrix = tfidf_vectorizer.fit_transform(data['Cleaned_Text'])

tfidf_tensor = tf.convert_to_tensor(tfidf_matrix.toarray(), dtype=tf.float32)

In [17]:
# Tokenisasi data teks yang sudah dibersihkan
data['Tokenized_Text'] = data['Cleaned_Text'].apply(lambda x: x.split())

word2vec_model = Word2Vec(
    sentences=data['Tokenized_Text'], 
    vector_size=100, 
    window=5, 
    min_count=1, 
    workers=4, 
    sg=1
)

In [18]:
# Fungsi untuk menghitung rata-rata embedding sebuah dokumen
def document_embedding(text, model, vector_size=100):
    words = text.split()
    word_vectors = [model.wv[word] for word in words if word in model.wv]
    if len(word_vectors) > 0:
        return np.mean(word_vectors, axis=0)
    else:
        return np.zeros(vector_size)

data['Embedding'] = data['Cleaned_Text'].apply(
    lambda x: document_embedding(x, word2vec_model, vector_size=100)
)
data.head()

Unnamed: 0,Title,Ingredients,Steps,Loves,URL,Label,Cleaned_Text,Tokenized_Text,Embedding
0,Ayam Woku Manado,1 Ekor Ayam Kampung (potong 12)--2 Buah Jeruk ...,Cuci bersih ayam dan tiriskan. Lalu peras jeru...,1,/id/resep/4473027-ayam-woku-manado,0,ayam woku manado,"[ayam, woku, manado]","[-0.071575575, 0.27144256, 0.10997706, -0.0608..."
1,Ayam goreng tulang lunak,1 kg ayam (dipotong sesuai selera jangan kecil...,"Haluskan bumbu2nya (BaPut, ketumbar, kemiri, k...",1,/id/resep/4471956-ayam-goreng-tulang-lunak,0,ayam goreng tulang lunak,"[ayam, goreng, tulang, lunak]","[-0.035983205, 0.29427776, 0.092676245, -0.054..."
2,Ayam cabai kawin,1/4 kg ayam--3 buah cabai hijau besar--7 buah ...,Panaskan minyak di dalam wajan. Setelah minyak...,2,/id/resep/4473057-ayam-cabai-kawin,0,ayam cabai kawin,"[ayam, cabai, kawin]","[-0.087103866, 0.2198242, 0.09560595, -0.02957..."
3,Ayam Geprek,250 gr daging ayam (saya pakai fillet)--Secuku...,Goreng ayam seperti ayam krispi--Ulek semua ba...,10,/id/resep/4473023-ayam-geprek,0,ayam geprek,"[ayam, geprek]","[-0.102970526, 0.3273179, 0.13214366, -0.08680..."
4,Minyak Ayam,400 gr kulit ayam & lemaknya--8 siung bawang p...,Cuci bersih kulit ayam. Sisihkan--Ambil 50 ml ...,4,/id/resep/4427438-minyak-ayam,0,minyak ayam,"[minyak, ayam]","[-0.10683087, 0.32586378, 0.12193802, -0.07292..."


In [19]:
# Pembagian data
X = data['Cleaned_Text']  
y = data['Label']        

# Membagi data menjadi training dan testing
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Menggunakan TF-IDF untuk representasi teks
tfidf_vectorizer = TfidfVectorizer(max_features=500)
tfidf_matrix_train = tfidf_vectorizer.fit_transform(X_train)
tfidf_matrix_val = tfidf_vectorizer.transform(X_val)

# Ubah ke TensorFlow tensor
X_train_tensor = tf.convert_to_tensor(tfidf_matrix_train.toarray(), dtype=tf.float32)
X_val_tensor = tf.convert_to_tensor(tfidf_matrix_val.toarray(), dtype=tf.float32)
y_train_tensor = tf.convert_to_tensor(y_train.values, dtype=tf.float32)  
y_val_tensor = tf.convert_to_tensor(y_val.values, dtype=tf.float32)  

In [20]:
# Model Neural Network dengan Regularization, Dropout, dan Learning Rate Scheduler
def create_model(input_shape):
    model = models.Sequential()
    
    # Layer pertama (Dense dengan L2 Regularization dan Dropout)
    model.add(layers.Dense(512, activation='relu', input_shape=input_shape,
                           kernel_regularizer=regularizers.l2(0.01)))  
    model.add(layers.Dropout(0.5))  
    
    # Layer kedua
    model.add(layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
    model.add(layers.Dropout(0.5))
    
    # Layer output
    model.add(layers.Dense(1, activation='sigmoid')) 
    
    # Kompilasi model dengan optimizer dan loss function
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), 
                  loss='binary_crossentropy', 
                  metrics=['accuracy'])
    
    return model

model = create_model((X_train_tensor.shape[1],))  

early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=1e-5)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [21]:
# Melatih model
history = model.fit(
    X_train_tensor,  
    y_train_tensor,  
    epochs=10, 
    batch_size=32, 
    validation_data=(X_val_tensor, y_val_tensor),  
    callbacks=[early_stopping, lr_scheduler]  
)

Epoch 1/10
[1m390/390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 10ms/step - accuracy: 0.9589 - loss: 2.0026 - val_accuracy: 0.9904 - val_loss: 0.0628 - learning_rate: 0.0010
Epoch 2/10
[1m390/390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - accuracy: 0.9907 - loss: 0.0634 - val_accuracy: 0.9904 - val_loss: 0.0629 - learning_rate: 0.0010
Epoch 3/10
[1m390/390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - accuracy: 0.9905 - loss: 0.0624 - val_accuracy: 0.9904 - val_loss: 0.0680 - learning_rate: 0.0010
Epoch 4/10
[1m390/390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - accuracy: 0.9928 - loss: 0.0472 - val_accuracy: 0.9904 - val_loss: 0.0555 - learning_rate: 2.0000e-04
Epoch 5/10
[1m390/390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - accuracy: 0.9915 - loss: 0.0519 - val_accuracy: 0.9904 - val_loss: 0.0553 - learning_rate: 2.0000e-04
Epoch 6/10
[1m390/390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

In [22]:
# Contoh data kategori
data['Category'] = ["Ayam", "Ikan", "Sapi", "Udang", "Ayam", "Kambing", "Tahu", "Tempe"] * (len(data) // 8) + ["Ayam", "Ikan", "Sapi", "Udang", "Ayam", "Kambing", "Tahu", "Tempe"][:len(data) % 8]

In [23]:
def recommend_recipes_with_category(query, category, word2vec_model, data, top_n=10, vector_size=100):
    # Filter dataset berdasarkan kategori hewan
    data_filtered = data[data['Category'] == category]

    if data_filtered.empty:
        return [{"error": "Tidak ada menu tersedia untuk kategori ini."}]
    
    # Preprocess query
    query_cleaned = clean_text(query)
    
    # Generate embedding untuk query
    query_embedding = document_embedding(query_cleaned, word2vec_model, vector_size=vector_size)
    
    # Hitung cosine similarity antara query dan semua embedding dalam dataset yang difilter
    doc_embeddings = np.array(data_filtered['Embedding'].tolist())
    similarities = cosine_similarity([query_embedding], doc_embeddings)[0]
    
    # Ambil top N rekomendasi berdasarkan similarity
    top_indices = similarities.argsort()[-top_n:][::-1] 
    recommended_recipes = data_filtered.iloc[top_indices]
    
    # Urutkan berdasarkan 'Loves' secara descending
    recommended_recipes = recommended_recipes.sort_values(by=['Loves'], ascending=False)
    
    # Convert hasil ke JSON format serializable
    recommendations_json = recommended_recipes[['Title', 'Ingredients', 'Steps', 'Loves']].to_dict(orient='records')
    
    return recommendations_json

#contoh query
query = "ayam goreng"
# Kategori hewan dari akun pengguna
user_category = "Ayam"  
recommended_recipes_json = recommend_recipes_with_category(query, user_category, word2vec_model, data, top_n=10)

# Simpan hasil rekomendasi
with open('recommended_recipes_by_category.json', 'w', encoding='utf-8') as json_file:
    json.dump(recommended_recipes_json, json_file, ensure_ascii=False, indent=4)

# Tampilkan hasil
print(json.dumps(recommended_recipes_json, ensure_ascii=False, indent=4))

[
    {
        "Title": "Ayam Goreng Lengkuas",
        "Ingredients": "2 ekor ayam kampung potong 4 bagian--(1 kg ayam negri)--100 gr lengkuas, parut kasar--3 lembar daun salam--4 lembar daun jeruk, robek--Bumbu halus :--6 butir bawang merah--6 butir bawang putih--1 sdm ketumbar--3 butir kemiri--1 ruas kunyit--1 ruas jahe--1 Batang serai ambil putihnya--secukupnya Garam--",
        "Steps": "Cuci bersih ayam, sisihkan--Rendam ayam dengan bumbu halus dan parutan lengkuas kurleb 30 menit--Lalu beri air secukupnya lalu masukkan daun salam dan daun jeruk, kemudian presto (ungkep ayam biasa bisa) sampai empuk--Angkat ayam, saring sisa bumbu lengkuas tiriskan--Goreng ayam lalu sisihkan, goreng bumbu lengkuas lalu tiriskan di tisu makan agar tidak berminyak--Taburkan bumbu lengkuas ke atas ayam goreng, sajikan--",
        "Loves": 76
    },
    {
        "Title": "Ayam goreng laos",
        "Ingredients": "1/2 ekor ayam potong--3 daun jeruk--3 daun salam--2 batang serai--Bumbu halus :--2 ri

In [24]:
# #Save model h5
model.save("recommended_recipes_by_category.h5")

