In [31]:
import pandas as pd
import numpy as np
import random
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.cluster import KMeans

In [32]:
# Load dataset
data = pd.read_csv('dataset.csv')


In [33]:

# Rename columns
data.rename(columns={'Unnamed: 0': 'id', 'Title': 'name', 'Ingredients': 'ingredients',
                     'Instructions': 'steps', 'Image_Name': 'image'}, inplace=True)

In [34]:
# Pilih kolom yang relevan
new_data = data[['id', 'name', 'ingredients', 'steps', 'image']]

In [35]:
# Tambahkan kolom rating dengan nilai random antara 1-5
new_data['rating'] = [random.randint(1, 5) for _ in range(len(new_data))]

In [36]:
# Konversi ingredients ke fitur numerik menggunakan TF-IDF
tfidf = TfidfVectorizer(stop_words='english')
X_ingredients = tfidf.fit_transform(new_data['ingredients'].values.astype('U'))

In [47]:
# **Gunakan K-Means untuk membentuk kategori makanan**
num_clusters = 3
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
new_data['category_num'] = kmeans.fit_predict(X_ingredients)

In [48]:
# **Pemetaan kategori otomatis berdasarkan hasil clustering**
category_mapping = {
    0: "Meat-Based",
    1: "Vegetarian",
    2: "Dessert",
    3: "Bakery",
    4: "Italian",
    5: "Seafood",
    6: "Vegan",
    7: "Asian",
    8: "Mexican",
    9: "Gluten-Free",
    10: "Fast Food",
    11: "Healthy",
    12: "Mediterranean",
    13: "Indian",
    14: "French",
    15: "Breakfast",
    16: "Beverages",
    17: "Snacks"
}


In [49]:
new_data['category'] = new_data['category_num'].map(category_mapping)  # Ubah angka menjadi nama kategori

In [50]:
# **Gunakan Naïve Bayes untuk memprediksi kategori berdasarkan ingredients**
model_category = MultinomialNB()
model_category.fit(X_ingredients, new_data['category_num'])  # Latih dengan kategori numerik


In [51]:
# **Gunakan Naïve Bayes untuk rekomendasi makanan berdasarkan nama**
cv = TfidfVectorizer(max_features=10000, stop_words='english')
X_name = cv.fit_transform(new_data['name'].values.astype('U'))

model_recommendation = MultinomialNB()
y = np.arange(len(new_data))
model_recommendation.fit(X_name, y)

In [52]:
# **Fungsi untuk mencari berdasarkan nama kategori**
def search_by_category(category_name):
    matching_items = new_data[new_data['category'] == category_name]
    if matching_items.empty:
        print(f"Tidak ditemukan makanan dalam kategori '{category_name}'")
    else:
        print(f"Makanan dalam kategori '{category_name}':")
        print(matching_items[['id', 'name', 'ingredients', 'category', 'rating']].head())

In [53]:
# **Fungsi untuk mencari berdasarkan nama dan memberikan 5 rekomendasi**
def recommend_nb(name):
    query_vec = cv.transform([name.lower()])
    predicted_probs = model_recommendation.predict_proba(query_vec)[0]
    top_5_indices = np.argsort(predicted_probs)[-5:][::-1]

    recommended_items = new_data.iloc[top_5_indices].copy()
    recommended_items['predicted_category'] = recommended_items['ingredients'].apply(
        lambda x: category_mapping.get(model_category.predict(tfidf.transform([x.lower()]))[0], "Unknown")
    )

    print("Rekomendasi untuk:", name)
    print(recommended_items[['id', 'name', 'ingredients', 'category', 'predicted_category', 'rating']])

In [62]:
# **Tes pencarian berdasarkan nama kategori**
search_by_category("Mexican")

Tidak ditemukan makanan dalam kategori 'Mexican'


In [55]:
# **Tes pencarian berdasarkan nama makanan**
target_item = "BURGER"
recommend_nb(target_item)

Rekomendasi untuk: BURGER
        id                       name  \
5509  5509  The Burger Lover's Burger   
2197  2197  Islak Burger (Wet Burger)   
2017  2017        The Barbecue Burger   
4531  4531         Spiced Lamb Burger   
8716  8716        The Ultimate Burger   

                                            ingredients    category  \
5509  ['1 1/2 pounds well-marbled beef chuck top bla...  Vegetarian   
2197  ['1 1/2 pounds ground beef chuck (20% fat)', '...     Dessert   
2017  ['2 pounds ground beef (preferably a mixture o...  Vegetarian   
4531  ['2 1/2 pounds ground lamb, preferably shoulde...     Dessert   
8716  ['2 1/2 pounds skirt steak or sirloin flap ste...  Vegetarian   

     predicted_category  rating  
5509            Dessert       4  
2197            Dessert       2  
2017         Vegetarian       1  
4531            Dessert       3  
8716            Dessert       4  
