In [104]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import jaccard_score
from sklearn.metrics.pairwise import cosine_similarity

In [105]:
df = pd.read_excel("Concept_Data Resep_Updated.xlsx")
df = df.fillna('')
df['umur_resep'] = df['umur_resep'].astype(str)
df['umur_resep'] = df['umur_resep'].apply(lambda x: list(map(int, x.split(','))))

In [106]:
df_clean = df.copy()
df_clean

Unnamed: 0,id,nama_resep,bahan_resep,bahan_alergen,nationality_resep,daerah_resep,umur_resep
0,1,Bubur Hati Ayam,"nasi, hati ayam, bayam, minyak jagung, garam, ...",,Indonesian,Jawa Timur,"[6, 7, 8]"
1,2,Bubur Hati Ayam Santan,"nasi, hati ayam, wortel, santan, minyak kelapa...",,Indonesian,Jawa Timur,"[6, 7, 8]"
2,3,Bubur Pepes Hati Ayam,"nasi, hati ayam, wortel, minyak kelapa, tomat,...",,Indonesian,,"[6, 7, 8]"
3,4,Bubur Udang Tahu,"nasi, udang giling, santan, tahu, tomat, keman...","udang, tahu",Indonesian,Sumatera Barat,"[6, 7, 8]"
4,5,Bubur Udang Buncis,"nasi, udang giling, buncis, minyak jagung, jer...",udang,Indonesian,Jawa Timur,"[6, 7, 8]"
...,...,...,...,...,...,...,...
222,223,Puree Alpukat,"alpukat, asi",,Amerika Latin,Meksiko,"[6, 7, 8, 9, 10, 11, 12]"
223,224,Granadilla,markisa,,Amerika Latin,Peru,"[6, 7, 8, 9, 10, 11, 12]"
224,225,Vietnamese Lemongrass Beef,"daging sapi giling, buncis, paprika, bawang pu...",kecap,Asia Tenggara,Vietnam,[12]
225,226,Sup Tofu,"bihun jagung, tauge, sawi hijau, daun bawang, ...","kecap, tofu, mie",Asia Tenggara,Vietnam,[12]


In [107]:
df_clean[['bahan_resep','bahan_alergen']] = df_clean[['bahan_resep','bahan_alergen']].replace(to_replace=r'[^\w\s]', value=' ', regex=True)
df_clean

Unnamed: 0,id,nama_resep,bahan_resep,bahan_alergen,nationality_resep,daerah_resep,umur_resep
0,1,Bubur Hati Ayam,nasi hati ayam bayam minyak jagung garam ...,,Indonesian,Jawa Timur,"[6, 7, 8]"
1,2,Bubur Hati Ayam Santan,nasi hati ayam wortel santan minyak kelapa...,,Indonesian,Jawa Timur,"[6, 7, 8]"
2,3,Bubur Pepes Hati Ayam,nasi hati ayam wortel minyak kelapa tomat ...,,Indonesian,,"[6, 7, 8]"
3,4,Bubur Udang Tahu,nasi udang giling santan tahu tomat keman...,udang tahu,Indonesian,Sumatera Barat,"[6, 7, 8]"
4,5,Bubur Udang Buncis,nasi udang giling buncis minyak jagung jer...,udang,Indonesian,Jawa Timur,"[6, 7, 8]"
...,...,...,...,...,...,...,...
222,223,Puree Alpukat,alpukat asi,,Amerika Latin,Meksiko,"[6, 7, 8, 9, 10, 11, 12]"
223,224,Granadilla,markisa,,Amerika Latin,Peru,"[6, 7, 8, 9, 10, 11, 12]"
224,225,Vietnamese Lemongrass Beef,daging sapi giling buncis paprika bawang pu...,kecap,Asia Tenggara,Vietnam,[12]
225,226,Sup Tofu,bihun jagung tauge sawi hijau daun bawang ...,kecap tofu mie,Asia Tenggara,Vietnam,[12]


In [108]:
data = pd.DataFrame({
    'nama_resep' : df['nama_resep'],
    'bahan_resep_dan_alergen': df_clean['bahan_resep'].astype(str) + ' ' + df_clean['bahan_alergen'].astype(str),
    'umur_resep': df_clean['umur_resep']
})
data

Unnamed: 0,nama_resep,bahan_resep_dan_alergen,umur_resep
0,Bubur Hati Ayam,nasi hati ayam bayam minyak jagung garam ...,"[6, 7, 8]"
1,Bubur Hati Ayam Santan,nasi hati ayam wortel santan minyak kelapa...,"[6, 7, 8]"
2,Bubur Pepes Hati Ayam,nasi hati ayam wortel minyak kelapa tomat ...,"[6, 7, 8]"
3,Bubur Udang Tahu,nasi udang giling santan tahu tomat keman...,"[6, 7, 8]"
4,Bubur Udang Buncis,nasi udang giling buncis minyak jagung jer...,"[6, 7, 8]"
...,...,...,...
222,Puree Alpukat,alpukat asi,"[6, 7, 8, 9, 10, 11, 12]"
223,Granadilla,markisa,"[6, 7, 8, 9, 10, 11, 12]"
224,Vietnamese Lemongrass Beef,daging sapi giling buncis paprika bawang pu...,[12]
225,Sup Tofu,bihun jagung tauge sawi hijau daun bawang ...,[12]


In [109]:
user_input = "susu yogurt krim mentega whey kasein keju"
def find_recipes_sim(data_matrix, user_input, min_similarity = 0):
    vectorizer = CountVectorizer()
    data_matrix = vectorizer.fit_transform(data['bahan_resep_dan_alergen'])
    user_matrix = vectorizer.transform([user_input])
    data_matrix_sets = [row.indices for row in data_matrix]
    user_matrix_sets = [row.indices for row in user_matrix][0]

    similarities = []
    for row in data_matrix_sets:
        intersection = len(list(set(user_matrix_sets).intersection(row)))
        similarity = 2*float(intersection)/(len(set(user_matrix_sets)) + len(set(row)))
        similarities.append(similarity)

    top_recipes = [(df.loc[i, ['nama_resep', 'bahan_resep', 'bahan_alergen', 'umur_resep']], similarities[i]) 
                   for i in np.argsort(similarities) if similarities[i] == min_similarity]

    return top_recipes

recommended_recipe = find_recipes_sim(df, user_input)

recipe_result = pd.DataFrame(
    [
        {**recipe[0].to_dict(), "similarity_score": recipe[1]}
        for recipe in recommended_recipe
    ]
)

recipe_result


Unnamed: 0,nama_resep,bahan_resep,bahan_alergen,umur_resep,similarity_score
0,Bubur Hati Ayam,"nasi, hati ayam, bayam, minyak jagung, garam, ...",,"[6, 7, 8]",0.0
1,Bubur Oat Rasa Cokelat,"oat, bubuk cokelat, air, ASI",,"[6, 7, 8]",0.0
2,Bubur Hijau,"kacang hijau, daun bayam, air, air rebusan kac...",kacang,"[6, 7, 8]",0.0
3,Bubur Havermut,"havermut, air, avokad, kuning telur rebus",telur,"[6, 7, 8]",0.0
4,Bubur Saring Manado,"beras, air, labu kuning, ikan kakap, daun kang...",ikan,"[6, 7, 8]",0.0
...,...,...,...,...,...
135,Bubur Susu Pertama,"tepung beras putih, air, ASI",tepung,"[6, 7, 8]",0.0
136,Tim Bubur Manado Daging dan Udang,"nasi, labu kuning, daging sapi, udang, jagung,...",udang,"[9, 10, 11]",0.0
137,Telur Kwek Kwek,"telur puyuh, tepung maizena, tepung terigu, ga...","telur, tepung",[12],0.0
138,Nasi Tim Ayam Lele Cincang,"nasi, ayam cincang, ikan lele, wortel, minyak ...",ikan,"[9, 10, 11]",0.0


In [110]:
user_age = 9
top_n = 10
filtered_data = recipe_result[recipe_result['umur_resep'].apply(lambda x: user_age in x )]
filtered_pd = pd.DataFrame(filtered_data)[:top_n]
filtered_pd

Unnamed: 0,nama_resep,bahan_resep,bahan_alergen,umur_resep,similarity_score
10,Tim Beras Merah Kakap,"beras merah, ikan kakap, tahu, tomat, daun bay...",ikan,"[9, 10, 11]",0.0
11,Nasi Tim Sari Tomat,"beras merah, tomat, tahu, ikan sarden, kacang ...","tahu,ikan,kacang","[9, 10, 11]",0.0
12,Tim Havermut Daging Giling,"daging sapi giling, havermut, air, brokoli, ta...",tahu,"[9, 10, 11]",0.0
13,Mi Tim Kakap Merah,"kacang merah, mie, air, minyak zaitun extra vi...","mie,ikan","[9, 10, 11]",0.0
14,Tim Jagung Salmon,"margarin, bawang putih, bawang merah, jagung m...","ikan,kacang","[9, 10, 11]",0.0
15,Tim Merah Sapi Wortel,"beras merah, daging sapi, wortel, irisan seled...",tahu,"[9, 10, 11]",0.0
16,Agar-agar Buah,"buah pepaya, melon oranye, mangga, air, gula h...",,"[9, 10, 11]",0.0
21,Pure Bayam Jagung Tahu,"daun bayam, jagung, tahu, kaldu ayam",tahu,"[9, 10, 11]",0.0
22,Tim Makaroni Bayam,"daun bayam, makaroni, ikan kakap, air, margarin","ikan,pasta","[9, 10, 11]",0.0
23,Nasi Tim Hati Ayam,"air, beras, hati ayam, tempe, labu kuning, tom...",tempe,"[9, 10, 11]",0.0


In [111]:
def hamming_distance(df,user_input):
    if len(df) != len(user_input):
        return None
    else:
        return sum (c1 != c2 for c1, c2 in zip(df,user_input))


In [112]:
filtered_pd['bahan_resep'] = filtered_pd['bahan_resep'].replace(to_replace=r'[^\w\s]', value=' ', regex=True)

for index, row in filtered_pd.iterrows():
    bahan_list = row['bahan_resep'].split(' ')
    input_list = [input.strip() for input in user_input.split(' ')]  
    
    for input in input_list:
        for bahan in bahan_list:
            if len(bahan) == len(input):
                distance = hamming_distance(bahan.strip(), input.strip())
                print(f"Hamming distance between '{bahan.strip()}' and '{input.strip()}' in '{row['nama_resep']}': {distance}")

                # if distance == 0 :
                #     print(f"Similar allergen inducing ingredient found in:'{row['nama_resep']}'")
                # else: 
                #     print(f"No allergen inducing ingredient found in:'{row['nama_resep']}' ")
                




Hamming distance between 'ikan' and 'susu' in 'Tim Beras Merah Kakap': 4
Hamming distance between 'tahu' and 'susu' in 'Tim Beras Merah Kakap': 3
Hamming distance between 'daun' and 'susu' in 'Tim Beras Merah Kakap': 4
Hamming distance between 'ikan' and 'krim' in 'Tim Beras Merah Kakap': 4
Hamming distance between 'tahu' and 'krim' in 'Tim Beras Merah Kakap': 4
Hamming distance between 'daun' and 'krim' in 'Tim Beras Merah Kakap': 4
Hamming distance between 'ikan' and 'whey' in 'Tim Beras Merah Kakap': 4
Hamming distance between 'tahu' and 'whey' in 'Tim Beras Merah Kakap': 4
Hamming distance between 'daun' and 'whey' in 'Tim Beras Merah Kakap': 4
Hamming distance between 'ikan' and 'keju' in 'Tim Beras Merah Kakap': 4
Hamming distance between 'tahu' and 'keju' in 'Tim Beras Merah Kakap': 3
Hamming distance between 'daun' and 'keju' in 'Tim Beras Merah Kakap': 4
Hamming distance between 'tahu' and 'susu' in 'Nasi Tim Sari Tomat': 3
Hamming distance between 'ikan' and 'susu' in 'Nasi T

In [113]:
user_input = "telur"
vectorizer = CountVectorizer()
data_matrix = vectorizer.fit_transform(data['bahan_resep_dan_alergen'])
user_matrix = vectorizer.transform([user_input])
data_matrix_sets = [row.indices for row in data_matrix]
user_matrix_sets = [row.indices for row in user_matrix][0]

print(data_matrix.shape)
print(data_matrix.toarray()[69])
print(user_matrix.toarray())
print(data_matrix_sets[69])
print(user_matrix_sets)
data_words = vectorizer.get_feature_names_out()
print(data_words)
# for index, word in enumerate(data_words):
#     print(f"{index}. {word}")

(227, 277)
[0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 

In [114]:
def HelloWorld(a):
    return print(a)



In [115]:
HelloWorld('print')

print
