In [5]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import jaccard_score
from sklearn.metrics.pairwise import cosine_similarity

In [6]:
df = pd.read_excel("Concept_Data Resep.xlsx")
df

Unnamed: 0,id,nama_resep,bahan_resep,bahan_alergen,daerah_resep,umur_resep
0,1,Bubur Hati Ayam,"nasi, hati ayam, bayam, minyak jagung, garam, ...",,Indonesia,6-8 Bulan
1,2,Bubur Hati Ayam Santan,"nasi, hati ayam, wortel, santan, minyak kelapa...",,Indonesia,6-8 Bulan
2,3,Bubur Pepes Hati Ayam,"nasi, hati ayam, wortel, minyak kelapa, tomat,...",,Indonesia,6-8 Bulan
3,4,Bubur Udang Tahu,"nasi, udang giling, santan, tahu, tomat, keman...","udang, tahu",Indonesia,6-8 Bulan
4,5,Bubur Udang Buncis,"nasi, udang giling, buncis, minyak jagung, jer...",udang,Indonesia,6-8 Bulan
...,...,...,...,...,...,...
201,202,Ikan Dori Saus Mayo,"ikan dori, brokoli, tomat merah, mayonnaise, l...","ikan, mayonnaise",Indonesia,6-8 Bulan
202,203,Beef Stroganoff,"daging wagyu, mentega truffle garlic, bawang b...","mayonnaise, krim",Indonesia,6-8 Bulan
203,204,Buffalo Chicken Toast,"ayam bagian dada, mayonnaise, lada hitam, hima...","roti, mayonnaise",Indonesia,6-8 Bulan
204,205,Pancake Satan Keju Madu,"bubuk pancake, santan kara, susu uht full crea...","keju, mayonnaise",Indonesia,6-8 Bulan


In [7]:
#hapus blankspace dan non word
df[['bahan_resep','bahan_alergen']] = df[['bahan_resep','bahan_alergen']].replace(to_replace=r'[^\w\s]', value=' ', regex=True)
#replace NaN jadi blank
df = df.replace(np.nan, "")
df

Unnamed: 0,id,nama_resep,bahan_resep,bahan_alergen,daerah_resep,umur_resep
0,1,Bubur Hati Ayam,nasi hati ayam bayam minyak jagung garam ...,,Indonesia,6-8 Bulan
1,2,Bubur Hati Ayam Santan,nasi hati ayam wortel santan minyak kelapa...,,Indonesia,6-8 Bulan
2,3,Bubur Pepes Hati Ayam,nasi hati ayam wortel minyak kelapa tomat ...,,Indonesia,6-8 Bulan
3,4,Bubur Udang Tahu,nasi udang giling santan tahu tomat keman...,udang tahu,Indonesia,6-8 Bulan
4,5,Bubur Udang Buncis,nasi udang giling buncis minyak jagung jer...,udang,Indonesia,6-8 Bulan
...,...,...,...,...,...,...
201,202,Ikan Dori Saus Mayo,ikan dori brokoli tomat merah mayonnaise l...,ikan mayonnaise,Indonesia,6-8 Bulan
202,203,Beef Stroganoff,daging wagyu mentega truffle garlic bawang b...,mayonnaise krim,Indonesia,6-8 Bulan
203,204,Buffalo Chicken Toast,ayam bagian dada mayonnaise lada hitam hima...,roti mayonnaise,Indonesia,6-8 Bulan
204,205,Pancake Satan Keju Madu,bubuk pancake santan kara susu uht full crea...,keju mayonnaise,Indonesia,6-8 Bulan


In [8]:
#cek duplikat
df.duplicated().sum()

0

In [9]:
#gabungin bahan resep dan alergen
data = pd.DataFrame(df['bahan_resep'].astype(str)+ ' '+ df['bahan_alergen'].astype(str),columns=['bahan_resep_dan_alergen'])
data

Unnamed: 0,bahan_resep_dan_alergen
0,nasi hati ayam bayam minyak jagung garam ...
1,nasi hati ayam wortel santan minyak kelapa...
2,nasi hati ayam wortel minyak kelapa tomat ...
3,nasi udang giling santan tahu tomat keman...
4,nasi udang giling buncis minyak jagung jer...
...,...
201,ikan dori brokoli tomat merah mayonnaise l...
202,daging wagyu mentega truffle garlic bawang b...
203,ayam bagian dada mayonnaise lada hitam hima...
204,bubuk pancake santan kara susu uht full crea...


In [10]:
tfidf = CountVectorizer()
data_matrix = tfidf.fit_transform(data['bahan_resep_dan_alergen'])


In [11]:
print(data_matrix.shape)

(206, 263)


In [12]:
data_words = tfidf.get_feature_names_out()
print(data_words)

['abon' 'agar' 'air' 'almond' 'alpukat' 'apel' 'aron' 'asam' 'asi' 'asin'
 'asparagus' 'atau' 'avokad' 'awabng' 'ayam' 'baby' 'bagian' 'baking'
 'bandeng' 'batang' 'batita' 'bawag' 'bawal' 'bawang' 'bawnag' 'bayam'
 'bayi' 'beku' 'bengkuang' 'beras' 'biasa' 'bihun' 'biskuit' 'bombai'
 'bombay' 'brokoli' 'buah' 'bubuk' 'bumbu' 'buncis' 'butternut' 'cair'
 'canola' 'cengkeh' 'champgnon' 'champignon' 'cheddar' 'chocolate'
 'cincang' 'cokelat' 'cooking' 'cream' 'dada' 'daging' 'dan' 'dark' 'daun'
 'dayam' 'dori' 'dried' 'edamame' 'extra' 'filet' 'fillet' 'formula'
 'full' 'garam' 'garlic' 'gelatin' 'giling' 'goreng' 'gula' 'gulai'
 'gurame' 'halus' 'hati' 'havermut' 'havernmut' 'heruk' 'hgaram' 'hijau'
 'himalayan' 'hitam' 'hujau' 'iga' 'ikan' 'inggris' 'instant' 'irisan'
 'jagung' 'jahe' 'jalar' 'jambu' 'jamur' 'jawa' 'jeli' 'jelu' 'jeruk'
 'jintan' 'kacang' 'kakap' 'kaki' 'kaldu' 'kampung' 'kancing' 'kangkung'
 'kanola' 'kapulaga' 'kara' 'katuk' 'kayu' 'kayumanis' 'kecap' 'kecil'
 'kedel

In [33]:
print(data_matrix.toarray()[1])
print(data_matrix.shape)

[0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 0 0 0]
(206, 263)


In [14]:
#user_input = "susu"
#user_input_vector = vectorizer.transform([user_input])

In [25]:
user_input = "minyak kedelai"
user_input_vector = tfidf.transform([user_input])

def find_recipes(user_input):    
    similarities = cosine_similarity(user_input_vector, data_matrix)
    return similarities

similarities=find_recipes(user_input)

In [31]:
print(user_input_vector.toarray())
print(user_input_vector.shape)

[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0]]
(1, 263)


In [26]:
top_n = 10
top_indices = np.argsort(similarities[0])[:top_n]
top_recipes = [df['nama_resep'][i] for i in top_indices]


print(f"{top_n} Rekomendasi Resep MPASI:")
for i, recipe in enumerate(top_recipes, 1):
    print(f"{i}. {recipe}")
    print(similarities)

10 Rekomendasi Resep MPASI:
1. Nasi Tim Hati Sapi
[[0.18898224 0.18898224 0.14433757 0.         0.16666667 0.11624764
  0.21320072 0.12909944 0.16666667 0.         0.26261287 0.18898224
  0.11785113 0.15430335 0.13867505 0.11952286 0.11952286 0.2773501
  0.1767767  0.17149859 0.         0.         0.         0.
  0.14744196 0.44901326 0.14142136 0.10783277 0.         0.
  0.14433757 0.64549722 0.29488391 0.5        0.38729833 0.15811388
  0.31980107 0.31277162 0.14142136 0.19611614 0.45883147 0.15811388
  0.13363062 0.         0.13363062 0.13608276 0.         0.14142136
  0.         0.13130643 0.15811388 0.         0.         0.11785113
  0.11785113 0.         0.12909944 0.16222142 0.16222142 0.13608276
  0.16222142 0.125      0.         0.         0.         0.
  0.         0.         0.         0.         0.20412415 0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0

In [18]:
top_n = 10
top_indices = np.argsort(similarities[0])[:top_n]
top_recipes = [df['nama_resep'][i] for i in top_indices]

print(f"{top_n} Rekomendasi Resep MPASI:")
result_df = pd.DataFrame(columns=['Resep MPASI'])
                      
for i, recipe in enumerate(top_recipes, 1):
    result = pd.DataFrame({
        'Resep MPASI': [recipe]
    })
    result_df = pd.concat([result_df, result], ignore_index=True)

result_df

10 Rekomendasi Resep MPASI:


Unnamed: 0,Resep MPASI
0,Nasi Tim Hati Sapi
1,Bubur Susu Brokoli
2,Bubur Susu Jagung Manis
3,Bubur Susu Apel Jagung
4,Pure Semangka
5,Bubur Tenggiri Brokoli
6,Bubur Saring Kentang Sayuran
7,Bubur Saring Salmon
8,Bubur Oat Rasa Cokelat
9,Bubur Hijau


In [28]:
user_input = "nasi"
def recipe_similarity(user_input_vector,data_matrix):
    A = np.array(user_input_vector)
    B = np.array(data_matrix)

    intersect = np.intersect1d(A,B)
    similarity = 2*intersect/set(A)+set(B)
    return similarity

def find_recipes(user_input):    
    user_input_vector = tfidf.transform([user_input])
    similarities = recipe_similarity(user_input_vector, data_matrix)
    return similarities

similarities=find_recipes(user_input)

ValueError: inconsistent shapes

In [None]:
''' def recipe_similarity():

    if A.ndim == 1:
        A = A.reshape(1, -1)

    similarity = []

    for row in B:
        intersect = np.intersect1d(A,row)
        if len(A)+len(row) >0 :
            similarity =  2 * len(intersect) / (len(A) + len(row))
        else:
            similarity = 0.0

    similarity.append(similarity)
    similarities = np.array(similarities)
    #intersect = set(A).intersection(set(B))
    #recipe_similarity = 2*intersect/(set(A)+set(B))
    
    return similarity
    


In [None]:
#print(similarities)

In [None]:
#from scipy.sparse import csr_matrix

#def cosine_similarity(user_input_vector,data_matrix):
#    cosim = cosine_similarity(user_input_vector,data_matrix)
#    return cosim

#def find_recipes(user_input):
#    similarities = cosine_similarity(user_input_vector, data_matrix)
#    return similarities

In [None]:
#similarities=find_recipes(user_input)

#top_n = 10
#top_indices = np.argsort(similarities[0].argsort())[:top_n]
#top_recipes = [data['nama_resep'][i] for i in top_indices]

#print(f"Top {top_n} Recipe Recommendations:")
#for i, recipe in enumerate(top_recipes, 1):
#    print(f"{i}. {recipe}")

In [None]:
#print(f"Top {top_n} Recipe Recommendations:")
#for i, recipe in enumerate(top_recipes, 1):
#    print(f"{i}. {recipe}")