In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
df = pd.read_excel("Concept_Data Resep.xlsx")
df

Unnamed: 0,id,nama_resep,bahan_resep,bahan_alergen,daerah_resep,umur_resep
0,1,Bubur Hati Ayam,"nasi, hati ayam, bayam, minyak jagung, garam, ...",,Indonesia,6-8 Bulan
1,2,Bubur Hati Ayam Santan,"nasi, hati ayam, wortel, santan, minyak kelapa...",,Indonesia,6-8 Bulan
2,3,Bubur Pepes Hati Ayam,"nasi, hati ayam, wortel, minyak kelapa, tomat,...",,Indonesia,6-8 Bulan
3,4,Bubur Udang Tahu,"nasi, udang giling, santan, tahu, tomat, keman...","udang, tahu",Indonesia,6-8 Bulan
4,5,Bubur Udang Buncis,"nasi, udang giling, buncis, minyak jagung, jer...",udang,Indonesia,6-8 Bulan
...,...,...,...,...,...,...
201,202,Ikan Dori Saus Mayo,"ikan dori, brokoli, tomat merah, mayonnaise, l...","ikan, mayonnaise",Indonesia,6-8 Bulan
202,203,Beef Stroganoff,"daging wagyu, mentega truffle garlic, bawang b...","mayonnaise, krim",Indonesia,6-8 Bulan
203,204,Buffalo Chicken Toast,"ayam bagian dada, mayonnaise, lada hitam, hima...","roti, mayonnaise",Indonesia,6-8 Bulan
204,205,Pancake Satan Keju Madu,"bubuk pancake, santan kara, susu uht full crea...","keju, mayonnaise",Indonesia,6-8 Bulan


In [3]:
#hapus blankspace dan non word
df[['bahan_resep','bahan_alergen']] = df[['bahan_resep','bahan_alergen']].replace(to_replace=r'[^\w\s]', value='', regex=True)
#replace NaN jadi blank
df = df.replace(np.nan, "")
df

Unnamed: 0,id,nama_resep,bahan_resep,bahan_alergen,daerah_resep,umur_resep
0,1,Bubur Hati Ayam,nasi hati ayam bayam minyak jagung garam bawan...,,Indonesia,6-8 Bulan
1,2,Bubur Hati Ayam Santan,nasi hati ayam wortel santan minyak kelapa saw...,,Indonesia,6-8 Bulan
2,3,Bubur Pepes Hati Ayam,nasi hati ayam wortel minyak kelapa tomat daun...,,Indonesia,6-8 Bulan
3,4,Bubur Udang Tahu,nasi udang giling santan tahu tomat kemangi da...,udang tahu,Indonesia,6-8 Bulan
4,5,Bubur Udang Buncis,nasi udang giling buncis minyak jagung jeruk n...,udang,Indonesia,6-8 Bulan
...,...,...,...,...,...,...
201,202,Ikan Dori Saus Mayo,ikan dori brokoli tomat merah mayonnaise lada ...,ikan mayonnaise,Indonesia,6-8 Bulan
202,203,Beef Stroganoff,daging wagyu mentega truffle garlic bawang bom...,mayonnaise krim,Indonesia,6-8 Bulan
203,204,Buffalo Chicken Toast,ayam bagian dada mayonnaise lada hitam himalay...,roti mayonnaise,Indonesia,6-8 Bulan
204,205,Pancake Satan Keju Madu,bubuk pancake santan kara susu uht full cream ...,keju mayonnaise,Indonesia,6-8 Bulan


In [4]:
#cek duplikat
df.duplicated().sum()

0

In [5]:
#gabungin bahan resep dan alergen
data = pd.DataFrame(df['bahan_resep'].astype(str)+ ' '+ df['bahan_alergen'].astype(str),columns=['bahan_resep_dan_alergen'])
data

Unnamed: 0,bahan_resep_dan_alergen
0,nasi hati ayam bayam minyak jagung garam bawan...
1,nasi hati ayam wortel santan minyak kelapa saw...
2,nasi hati ayam wortel minyak kelapa tomat daun...
3,nasi udang giling santan tahu tomat kemangi da...
4,nasi udang giling buncis minyak jagung jeruk n...
...,...
201,ikan dori brokoli tomat merah mayonnaise lada ...
202,daging wagyu mentega truffle garlic bawang bom...
203,ayam bagian dada mayonnaise lada hitam himalay...
204,bubuk pancake santan kara susu uht full cream ...


In [6]:
tfidf = TfidfVectorizer()
data_matrix = tfidf.fit_transform(data['bahan_resep_dan_alergen'])


In [7]:
print(data_matrix.shape)

(206, 351)


In [8]:
data_words = tfidf.get_feature_names_out()
print(data_words)

['abon' 'agaragar' 'air' 'airsusukaldu' 'almond' 'alpukat' 'apel' 'aron'
 'asam' 'asi' 'asin' 'asipasiair' 'asisusu' 'asparagus' 'atau' 'avokad'
 'awabng' 'ayam' 'ayamikandaging' 'ayamsapi' 'baby' 'bagian' 'baking'
 'bandeng' 'batang' 'batita' 'bawag' 'bawal' 'bawang' 'bawangudang'
 'bawnag' 'bayam' 'bayi' 'beku' 'bengkuang' 'beras' 'biasa' 'bihun'
 'biskuit' 'bombai' 'bombay' 'brokoli' 'buah' 'bubuk' 'bumbu' 'buncis'
 'butternut' 'cair' 'canola' 'cengkeh' 'champgnon' 'champignon' 'cheddar'
 'chocolate' 'cincang' 'cokelat' 'cooking' 'cream' 'dada' 'daging' 'dan'
 'dark' 'daun' 'dayam' 'dori' 'dried' 'edamame' 'extra' 'filet' 'fillet'
 'formula' 'full' 'garam' 'garlic' 'gelatin' 'giling' 'goreng' 'gula'
 'gulai' 'gurame' 'halus' 'hati' 'havermut' 'havermutkacang' 'havernmut'
 'heruk' 'hgaram' 'hijau' 'himalayan' 'hitam' 'hujau' 'iga' 'ikan'
 'ikankacang' 'ikankeju' 'ikanminyak' 'ikanpasta' 'ikanrotikejutepung'
 'ikantahu' 'ikantahumentegakecap' 'ikantempekeju' 'ikantempementega'
 'ikant

In [9]:
print(data_matrix.toarray()[1])

[0.         0.         0.17116062 0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.19122936
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.15833317 0.
 0.43039392 0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.20097685 0.         0.         0.         0.         0.
 0.         0.         0.         0.3087462  0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0. 

In [10]:
#user_input = "susu"
#user_input_vector = vectorizer.transform([user_input])

In [51]:
user_input = "ikan"
def calculate_similarity(user_input_vector, recipe_matrix):
    return cosine_similarity(user_input_vector, recipe_matrix)

def find_recipes(user_input):    
    user_input_vector = tfidf.transform([user_input])
    similarities = calculate_similarity(user_input_vector, data_matrix)
    return similarities

similarities=find_recipes(user_input)

In [42]:
top_n = 10
top_indices = np.argsort(similarities[0])[:top_n]
top_recipes = [df['nama_resep'][i] for i in top_indices]


print(f"{top_n} Rekomendasi Resep MPASI:")
for i, recipe in enumerate(top_recipes, 1):
    print(f"{i}. {recipe}")
    print(similarities)

TypeError: 'float' object is not subscriptable

In [49]:
user_input = "nasi"
def recipe_similarity(user_input_vector,data_matrix):
    A = user_input_vector()
    B = data_matrix()
    intersect = np.intersect1d(A,B)
    intersect_len = len(intersect)
    recipe_similarity = 2*intersect_len/(len(A)+len(B))
    
    return recipe_similarity

def find_recipes(user_input):    
    user_input_vector = tfidf.transform([user_input])
    similarities = recipe_similarity(user_input_vector, data_matrix)
    return similarities

similarities=find_recipes(user_input)

In [52]:
print(similarities)

[[0.         0.         0.         0.         0.         0.31991394
  0.         0.         0.45302521 0.         0.         0.
  0.30727878 0.         0.         0.17089421 0.34290562 0.54474984
  0.         0.         0.         0.         0.         0.
  0.         0.         0.42043971 0.         0.17643699 0.
  0.         0.         0.         0.         0.         0.
  0.11878117 0.         0.34771505 0.         0.         0.41312615
  0.         0.         0.         0.         0.         0.20872001
  0.         0.         0.         0.         0.31367343 0.
  0.         0.         0.         0.38849097 0.3943127  0.
  0.35490294 0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.20212717 0.         0.         0.
  0.         0.         0.         0.         0.38480819 0.
  0.         0.2455784  0.         0.         0.22296666 0.21471995


In [None]:
#from scipy.sparse import csr_matrix

#def cosine_similarity(user_input_vector,data_matrix):
#    cosim = cosine_similarity(user_input_vector,data_matrix)
#    return cosim

#def find_recipes(user_input):
#    similarities = cosine_similarity(user_input_vector, data_matrix)
#    return similarities

In [None]:
#similarities=find_recipes(user_input)

#top_n = 10
#top_indices = np.argsort(similarities[0].argsort())[:top_n]
#top_recipes = [data['nama_resep'][i] for i in top_indices]

#print(f"Top {top_n} Recipe Recommendations:")
#for i, recipe in enumerate(top_recipes, 1):
#    print(f"{i}. {recipe}")

In [None]:
#print(f"Top {top_n} Recipe Recommendations:")
#for i, recipe in enumerate(top_recipes, 1):
#    print(f"{i}. {recipe}")