In [1]:
import pandas as pd
import numpy as np
import random
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.cluster import KMeans

In [2]:
# Load dataset
data = pd.read_csv('dataset.csv')

In [3]:
# Rename columns
data.rename(columns={'Unnamed: 0': 'id', 'Title': 'name', 'Ingredients': 'ingredients',
                     'Instructions': 'steps', 'Image_Name': 'image'}, inplace=True)

In [4]:
# Pilih kolom yang relevan
new_data = data[['id', 'name', 'ingredients', 'steps', 'image']]

In [5]:
# Tambahkan kolom rating dengan nilai random antara 1-5
new_data['rating'] = [random.randint(1, 5) for _ in range(len(new_data))]

In [6]:
# Tambahkan kolom cluster dengan nilai random 0 atau 1
new_data['cluster'] = np.random.choice([0, 1], size=len(new_data), replace=True)
new_data['cluster_label'] = new_data['cluster'].map({0: 'biasa saja', 1: 'favorit'})

In [7]:
# Konversi ingredients ke fitur numerik menggunakan TF-IDF
tfidf = TfidfVectorizer(stop_words='english')
X_ingredients = tfidf.fit_transform(new_data['ingredients'].values.astype('U'))

In [8]:
# **Gunakan K-Means untuk membentuk kategori makanan**
num_clusters = 5
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
new_data['category_num'] = kmeans.fit_predict(X_ingredients)

In [9]:
# **Pemetaan kategori otomatis berdasarkan hasil clustering**
category_mapping = {
    0: "Meat-Based",
    1: "Vegetarian",
    2: "Dessert",
    3: "Bakery",
    4: "Italian"
}

In [10]:
new_data['category'] = new_data['category_num'].map(category_mapping)  # Ubah angka menjadi nama kategori


In [11]:
# **Gunakan Naïve Bayes untuk memprediksi kategori berdasarkan ingredients**
model_category = MultinomialNB()
model_category.fit(X_ingredients, new_data['category_num'])  # Latih dengan kategori numerik


In [12]:
# **Gunakan Naïve Bayes untuk rekomendasi makanan berdasarkan nama**
cv = TfidfVectorizer(max_features=10000, stop_words='english')
X_name = cv.fit_transform(new_data['name'].values.astype('U'))

In [13]:
model_recommendation = MultinomialNB()
y = np.arange(len(new_data))
model_recommendation.fit(X_name, y)

In [14]:
# **Fungsi untuk mencari berdasarkan nama kategori dengan hasil acak**
def search_by_category(category_name, N=5):
    matching_items = new_data[new_data['category'] == category_name]
    if matching_items.empty:
        print(f"Tidak ditemukan makanan dalam kategori '{category_name}'")
    else:
        print(f"Makanan acak dalam kategori '{category_name}':")
        print(matching_items.sample(frac=1).head(N)[['id', 'name', 'ingredients', 'category', 'rating', 'cluster_label']])

In [15]:
# **Fungsi untuk mencari berdasarkan cluster dengan hasil acak**
def search_by_cluster(cluster_label, N=5):
    matching_items = new_data[new_data['cluster_label'] == cluster_label]
    if matching_items.empty:
        print(f"Tidak ditemukan makanan dalam cluster '{cluster_label}'")
    else:
        print(f"Makanan acak dalam cluster '{cluster_label}':")
        print(matching_items.sample(frac=1).head(N)[['id', 'name', 'ingredients', 'category', 'rating', 'cluster_label']])

In [16]:
# **Fungsi untuk mencari berdasarkan nama dan memberikan 5 rekomendasi**
def recommend_nb(name):
    query_vec = cv.transform([name.lower()])
    predicted_probs = model_recommendation.predict_proba(query_vec)[0]
    top_5_indices = np.argsort(predicted_probs)[-5:][::-1]

    recommended_items = new_data.iloc[top_5_indices].copy()
    recommended_items['predicted_category'] = recommended_items['ingredients'].apply(
        lambda x: category_mapping.get(model_category.predict(tfidf.transform([x.lower()]))[0], "Unknown")
    )

    print("Rekomendasi untuk:", name)
    print(recommended_items[['id', 'name', 'ingredients', 'category', 'predicted_category', 'rating', 'cluster_label']])

In [17]:
# **Tes pencarian berdasarkan kategori (acak)**
search_by_category("Dessert")

Makanan acak dalam kategori 'Dessert':
          id                                               name  \
3860    3860  Chilled Watercress, Spring Nettle, And Sorrel ...   
3287    3287  Mixed Lettuces and Kohlrabi With Creamy Sumac ...   
11463  11463  Cauliflower Soup with Seared Scallops, Lemon O...   
1474    1474            Chicken Stew with Potatoes and Radishes   
9628    9628   Scalloped Potatoes With Coconut Milk and Chilies   

                                             ingredients category  rating  \
3860   ['2 tablespoons extra virgin olive oil', '2 ta...  Dessert       2   
3287   ['1/2 cup blanched hazelnuts', '1 white or pin...  Dessert       4   
11463  ['3 tablespoons vegetable oil, divided', '1 cu...  Dessert       4   
1474   ['4 chicken legs (thigh and drumstick), patted...  Dessert       4   
9628   ['1 can (13.5 ounces) unsweetened coconut milk...  Dessert       3   

      cluster_label  
3860        favorit  
3287        favorit  
11463    biasa saja  
1474   

In [20]:
# **Tes pencarian berdasarkan cluster (acak)**
search_by_cluster("biasa saja")

Makanan acak dalam cluster 'biasa saja':
          id                                       name  \
1331    1331                      Walnut-Crusted Salmon   
1397    1397         Sticky Maple and Bourbon Pork Ribs   
6903    6903                           Avocado Smoothie   
8860    8860  Penne with Sun-Dried Tomatoes and Arugula   
10589  10589                          Sweet Potato Flan   

                                             ingredients    category  rating  \
1331   ['2 (4-inch-long) daikon radishes, thinly slic...     Dessert       1   
1397   ['2 cups (500ml) malt vinegar', '2 quarts (2 l...  Vegetarian       2   
6903   ['1 ripe avocado, halved and pitted', '1 1/4 c...  Vegetarian       4   
8860   ['1/4 pound thinly sliced pancetta, finely cho...     Italian       5   
10589  ['1 large red-skinned sweet potato (yam; about...  Vegetarian       4   

      cluster_label  
1331     biasa saja  
1397     biasa saja  
6903     biasa saja  
8860     biasa saja  
10589    bias

In [22]:
# **Tes pencarian berdasarkan nama makanan**
target_item = "sausage"
recommend_nb(target_item)

Rekomendasi untuk: sausage
        id                            name  \
5669  5669                 Chicken Sausage   
4690  4690               Beans and Sausage   
6869  6869                   Sausage Gravy   
7165  7165                   Sausage Rolls   
930    930  Pasta with Sausage and Arugula   

                                            ingredients    category  \
5669  ['1 1/2 pound/675 grams chicken thigh meat, di...  Vegetarian   
4690  ['1 ancho chile or dried choricero pepper', '4...     Italian   
6869  ['1/2 pound breakfast sausage', '1/2 cup (1 st...  Meat-Based   
7165  ['1 1/2 pounds good quality pork sausage', '1 ...     Dessert   
930   ['2 small red onions, sliced into 1/2"-thick w...      Bakery   

     predicted_category  rating cluster_label  
5669         Vegetarian       2       favorit  
4690            Dessert       4    biasa saja  
6869            Dessert       5       favorit  
7165            Dessert       4       favorit  
930             Italian      