In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import jaccard_score
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
df = pd.read_excel("Concept_Data Resep.xlsx")
df

Unnamed: 0,id,nama_resep,bahan_resep,bahan_alergen,daerah_resep,umur_resep
0,1,Bubur Hati Ayam,"nasi, hati ayam, bayam, minyak jagung, garam, ...",,Indonesia,6-8 Bulan
1,2,Bubur Hati Ayam Santan,"nasi, hati ayam, wortel, santan, minyak kelapa...",,Indonesia,6-8 Bulan
2,3,Bubur Pepes Hati Ayam,"nasi, hati ayam, wortel, minyak kelapa, tomat,...",,Indonesia,6-8 Bulan
3,4,Bubur Udang Tahu,"nasi, udang giling, santan, tahu, tomat, keman...","udang, tahu",Indonesia,6-8 Bulan
4,5,Bubur Udang Buncis,"nasi, udang giling, buncis, minyak jagung, jer...",udang,Indonesia,6-8 Bulan
...,...,...,...,...,...,...
201,202,Ikan Dori Saus Mayo,"ikan dori, brokoli, tomat merah, mayonnaise, l...","ikan, mayonnaise",Indonesia,6-8 Bulan
202,203,Beef Stroganoff,"daging wagyu, mentega truffle garlic, bawang b...","mayonnaise, krim",Indonesia,6-8 Bulan
203,204,Buffalo Chicken Toast,"ayam bagian dada, mayonnaise, lada hitam, hima...","roti, mayonnaise",Indonesia,6-8 Bulan
204,205,Pancake Satan Keju Madu,"bubuk pancake, santan kara, susu uht full crea...","keju, mayonnaise",Indonesia,6-8 Bulan


In [3]:
#hapus blankspace dan non word
df[['bahan_resep','bahan_alergen']] = df[['bahan_resep','bahan_alergen']].replace(to_replace=r'[^\w\s]', value=' ', regex=True)
#replace NaN jadi blank
df = df.replace(np.nan, "")
df

Unnamed: 0,id,nama_resep,bahan_resep,bahan_alergen,daerah_resep,umur_resep
0,1,Bubur Hati Ayam,nasi hati ayam bayam minyak jagung garam ...,,Indonesia,6-8 Bulan
1,2,Bubur Hati Ayam Santan,nasi hati ayam wortel santan minyak kelapa...,,Indonesia,6-8 Bulan
2,3,Bubur Pepes Hati Ayam,nasi hati ayam wortel minyak kelapa tomat ...,,Indonesia,6-8 Bulan
3,4,Bubur Udang Tahu,nasi udang giling santan tahu tomat keman...,udang tahu,Indonesia,6-8 Bulan
4,5,Bubur Udang Buncis,nasi udang giling buncis minyak jagung jer...,udang,Indonesia,6-8 Bulan
...,...,...,...,...,...,...
201,202,Ikan Dori Saus Mayo,ikan dori brokoli tomat merah mayonnaise l...,ikan mayonnaise,Indonesia,6-8 Bulan
202,203,Beef Stroganoff,daging wagyu mentega truffle garlic bawang b...,mayonnaise krim,Indonesia,6-8 Bulan
203,204,Buffalo Chicken Toast,ayam bagian dada mayonnaise lada hitam hima...,roti mayonnaise,Indonesia,6-8 Bulan
204,205,Pancake Satan Keju Madu,bubuk pancake santan kara susu uht full crea...,keju mayonnaise,Indonesia,6-8 Bulan


In [4]:
#cek duplikat
df.duplicated().sum()

0

In [5]:
#gabungin bahan resep dan alergen
data = pd.DataFrame(df['bahan_resep'].astype(str)+ ' '+ df['bahan_alergen'].astype(str),columns=['bahan_resep_dan_alergen'])
data

Unnamed: 0,bahan_resep_dan_alergen
0,nasi hati ayam bayam minyak jagung garam ...
1,nasi hati ayam wortel santan minyak kelapa...
2,nasi hati ayam wortel minyak kelapa tomat ...
3,nasi udang giling santan tahu tomat keman...
4,nasi udang giling buncis minyak jagung jer...
...,...
201,ikan dori brokoli tomat merah mayonnaise l...
202,daging wagyu mentega truffle garlic bawang b...
203,ayam bagian dada mayonnaise lada hitam hima...
204,bubuk pancake santan kara susu uht full crea...


In [29]:
tfidf = CountVectorizer()
data_matrix = tfidf.fit_transform(data['bahan_resep_dan_alergen'])


In [30]:
print(data_matrix.shape)

(206, 263)


In [31]:
data_words = tfidf.get_feature_names_out()
print(data_words)

['abon' 'agar' 'air' 'almond' 'alpukat' 'apel' 'aron' 'asam' 'asi' 'asin'
 'asparagus' 'atau' 'avokad' 'awabng' 'ayam' 'baby' 'bagian' 'baking'
 'bandeng' 'batang' 'batita' 'bawag' 'bawal' 'bawang' 'bawnag' 'bayam'
 'bayi' 'beku' 'bengkuang' 'beras' 'biasa' 'bihun' 'biskuit' 'bombai'
 'bombay' 'brokoli' 'buah' 'bubuk' 'bumbu' 'buncis' 'butternut' 'cair'
 'canola' 'cengkeh' 'champgnon' 'champignon' 'cheddar' 'chocolate'
 'cincang' 'cokelat' 'cooking' 'cream' 'dada' 'daging' 'dan' 'dark' 'daun'
 'dayam' 'dori' 'dried' 'edamame' 'extra' 'filet' 'fillet' 'formula'
 'full' 'garam' 'garlic' 'gelatin' 'giling' 'goreng' 'gula' 'gulai'
 'gurame' 'halus' 'hati' 'havermut' 'havernmut' 'heruk' 'hgaram' 'hijau'
 'himalayan' 'hitam' 'hujau' 'iga' 'ikan' 'inggris' 'instant' 'irisan'
 'jagung' 'jahe' 'jalar' 'jambu' 'jamur' 'jawa' 'jeli' 'jelu' 'jeruk'
 'jintan' 'kacang' 'kakap' 'kaki' 'kaldu' 'kampung' 'kancing' 'kangkung'
 'kanola' 'kapulaga' 'kara' 'katuk' 'kayu' 'kayumanis' 'kecap' 'kecil'
 'kedel

In [17]:
print(data_matrix.toarray()[3])
print(data_matrix.shape)

[0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.22228661
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.13805493 0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.1410774  0.         0.         0.24612041 0.         0.19425029
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.2103253  0.         0.         0.         0.         0.
 0.         0.18967951 0.         0.    

In [10]:
#user_input = "susu"
#user_input_vector = vectorizer.transform([user_input])

In [24]:
user_input = "minyak kedelai"
user_input_vector = tfidf.transform([user_input])

def find_recipes(user_input):    
    similarities = cosine_similarity(user_input_vector, data_matrix)
    return similarities

similarities=find_recipes(user_input)

In [25]:
print(user_input_vector.toarray())
print(user_input_vector.shape)

[[0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.   

In [26]:
top_n = 10
top_indices = np.argsort(similarities[0])[:top_n]
top_recipes = [df['nama_resep'][i] for i in top_indices]


print(f"{top_n} Rekomendasi Resep MPASI:")
for i, (recipe,score) in enumerate(zip(top_recipes, top_scores), 1):
    print(f"{i}. {recipe}")

10 Rekomendasi Resep MPASI:
1. Nasi Tim Hati Sapi
2. Bubur Susu Brokoli
3. Bubur Susu Jagung Manis
4. Bubur Susu Apel Jagung
5. Pure Semangka
6. Bubur Tenggiri Brokoli
7. Bubur Saring Kentang Sayuran
8. Bubur Saring Salmon
9. Bubur Oat Rasa Cokelat
10. Bubur Hijau


In [27]:
top_n = 10
top_indices = np.argsort(similarities[0][:top_n]) #ini ascending
top_recipes = [df['nama_resep'][i] for i in top_indices]
top_scores = [similarities[0][i] for i in top_indices]

print(f"{top_n} Rekomendasi Resep MPASI:")   
result_df = pd.DataFrame(columns = ['Resep MPASI', 'Similarity Score'])
for i, (recipe,score) in enumerate(zip(top_recipes, top_scores), 1):
    result = pd.DataFrame({
        'Resep MPASI': [recipe],
        'Similarity Score' : [score]
    })
    result_df = pd.concat([result_df, result], ignore_index=True)

result_df

10 Rekomendasi Resep MPASI:


  result_df = pd.concat([result_df, result], ignore_index=True)


Unnamed: 0,Resep MPASI,Similarity Score
0,Bubur Udang Tahu,0.0
1,Bubur Ayam Mentega,0.0
2,Bubur Patin,0.053533
3,Bubur Hati dan Ayam,0.059071
4,Bubur Pepes Hati Ayam,0.06813
5,Bubur Udang Buncis,0.068184
6,Bubur Tenggiri,0.076075
7,Bubur Hati Ayam Santan,0.07781
8,Bubur Daging Sapi,0.092176
9,Bubur Hati Ayam,0.095744


In [28]:
top_n = 10
top_indices = np.argsort(similarities[0][-top_n:])[::-1] #ini descending
top_recipes = [df['nama_resep'][i] for i in top_indices]
top_scores = [similarities[0][i] for i in top_indices]

print(f"{top_n} Rekomendasi Resep MPASI:")   
result_df = pd.DataFrame(columns = ['Resep MPASI', 'Similarity Score'])
for i, (recipe,score) in enumerate(zip(top_recipes, top_scores), 1):
    result = pd.DataFrame({
        'Resep MPASI': [recipe],
        'Similarity Score' : [score]
    })
    result_df = pd.concat([result_df, result], ignore_index=True)

result_df

10 Rekomendasi Resep MPASI:


  result_df = pd.concat([result_df, result], ignore_index=True)


Unnamed: 0,Resep MPASI,Similarity Score
0,Bubur Udang Tahu,0.0
1,Bubur Hati dan Ayam,0.059071
2,Bubur Ayam Mentega,0.0
3,Bubur Tenggiri,0.076075
4,Bubur Daging Sapi,0.092176
5,Bubur Patin,0.053533
6,Bubur Udang Buncis,0.068184
7,Bubur Pepes Hati Ayam,0.06813
8,Bubur Hati Ayam Santan,0.07781
9,Bubur Hati Ayam,0.095744


In [16]:
user_input = "nasi"
def recipe_similarity(user_input_vector,data_matrix):
    A = np.array(user_input_vector)
    B = np.array(data_matrix)

    intersect = np.intersect1d(A,B)
    similarity = 2*intersect/set(A)+set(B)
    return similarity

def find_recipes(user_input):    
    user_input_vector = tfidf.transform([user_input])
    similarities = recipe_similarity(user_input_vector, data_matrix)
    return similarities

similarities=find_recipes(user_input)

ValueError: inconsistent shapes