In [7]:

#library

import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


#import of data

def load_foods_data():
    data1 = pd.read_csv('RecipeMaterials.csv', index_col='recipe_id')
    return data1

def suggest_recipes_with_preferences(desired_ingredients, allergic_ingredients, num_suggestions=20, allergy_threshold=0.000001):
    foods = load_foods_data()

    vectorizer = TfidfVectorizer()

    # Fit the vectorizer on the entire ingredients dataset
    cc=vectorizer.fit(foods['ingredients'].apply(lambda x: x.replace('mt-', '')))
    print("cc is:",cc)

    # Process desired and allergic ingredients
    desired_ingredients_list = [ingredient.strip().lower().replace('mt-', '') for ingredient in desired_ingredients.split(',')]
    print("desired_ingredients :", desired_ingredients_list)
    allergic_ingredients_list = [ingredient.strip().lower().replace('mt-', '') for ingredient in allergic_ingredients.split(',')]
    print("allergic_ingredients :", allergic_ingredients_list)
    # Transform desired and allergic ingredients into vectors
    desired_ingredients_vector = vectorizer.transform([' '.join(desired_ingredients_list)])
    print("desired_ingredients_vector:", desired_ingredients_vector)
    allergic_ingredients_vector = vectorizer.transform([' '.join(allergic_ingredients_list)])
    print("allergic_ingredients_vector :", allergic_ingredients_vector)
    # Transform ingredients to vectors
    ingredient_vectors = vectorizer.transform(foods['ingredients'])
    print("ingredient_vectors :", ingredient_vectors)
    # Calculate similarity between desired ingredients and all recipes
    desired_similarity_scores = cosine_similarity(desired_ingredients_vector, ingredient_vectors).flatten()
    print("desired_similarity_scores:",desired_similarity_scores)
    # Calculate similarity between allergic ingredients and all recipes
    allergic_similarity_scores = cosine_similarity(allergic_ingredients_vector, ingredient_vectors).flatten()
    print("allergic_similarity_scores :",allergic_similarity_scores)
    # Filter out recipes with high allergic ingredient similarity
    eligible_recipes_mask = allergic_similarity_scores < allergy_threshold
    print("eligible_recipes_mask :",eligible_recipes_mask )
    eligible_foods = foods.iloc[eligible_recipes_mask]
    print("eligible_foods :", eligible_foods)
    eligible_foods['desired_similarity_score'] = desired_similarity_scores[eligible_recipes_mask]
    print(eligible_foods['desired_similarity_score'])
    # Count matching ingredients and add as a new column
    eligible_foods['matching_ingredient_count'] = eligible_foods['ingredients'].apply(
        lambda x: sum(ingredient in x for ingredient in desired_ingredients_list)
    )

    # Sort recipes first by matching ingredient count, then by desired similarity score
    sorted_suggested_recipes = eligible_foods.sort_values(
        by=['matching_ingredient_count', 'desired_similarity_score'], ascending=[False, False]
    )

    return sorted_suggested_recipes.head(num_suggestions)

# Example usage
desired_ingredients = 'mt-garlic, mt-tomatoes, mt-eggs, mt-eggplant'
allergic_ingredients = 'mt-onion, mt-eggs'
suggested_recipes = suggest_recipes_with_preferences(desired_ingredients, allergic_ingredients)
test = pd.DataFrame(suggested_recipes)
test

cc is: TfidfVectorizer()
desired_ingredients : ['garlic', 'tomatoes', 'eggs', 'eggplant']
allergic_ingredients : ['onion', 'eggs']
desired_ingredients_vector:   (0, 355)	0.517684618818992
  (0, 143)	0.40966853946358694
  (0, 123)	0.42059416399939015
  (0, 121)	0.6223141268054748
allergic_ingredients_vector :   (0, 245)	0.5155065139626636
  (0, 123)	0.8568856598532048
ingredient_vectors :   (0, 308)	0.42379072774313464
  (0, 252)	0.3156947409037814
  (0, 245)	0.11606910191570757
  (0, 227)	0.2767368209750707
  (0, 219)	0.24592544908632505
  (0, 211)	0.4482325389577485
  (0, 145)	0.3475301885866515
  (0, 108)	0.325730173622661
  (0, 96)	0.24682783821555448
  (0, 31)	0.28546427087715354
  (1, 308)	0.41272489070838886
  (1, 281)	0.40327826432002356
  (1, 223)	0.2988658321363939
  (1, 211)	0.4365284881018195
  (1, 152)	0.4365284881018195
  (1, 108)	0.3172248506350553
  (1, 64)	0.3097711619709695
  (2, 393)	0.5310293682778215
  (2, 308)	0.463686276396817
  (2, 211)	0.49042903334870463
  (2, 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eligible_foods['desired_similarity_score'] = desired_similarity_scores[eligible_recipes_mask]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eligible_foods['matching_ingredient_count'] = eligible_foods['ingredients'].apply(


Unnamed: 0_level_0,title,ingredients,desired_similarity_score,matching_ingredient_count
recipe_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
355,کاپوناتا,mt-blackpepper|mt-garlic|mt-bellpepper|mt-eggp...,0.443296,3
638,بورانی بادمجان و سیر,mt-blackpepper|mt-salt|mt-yogurt|mt-eggplant|m...,0.558921,2
288,گوجه بادمجان,mt-water|mt-tomatopaste|mt-eggplant|mt-salt|mt...,0.505632,2
365,متبل,mt-eggplant|mt-yogurt|mt-oliveoil|mt-lemonjuic...,0.371859,2
453,سس محمره,mt-tomatoes|mt-garlic|mt-paprika|mt-bellpepper...,0.369605,2
704,پاستا بادمجان,mt-mozzarellacheese|mt-liquidoil|mt-blackpeppe...,0.301909,2
262,تست پنیر و گوجه,mt-blackpepper|mt-garlic|mt-korea|mt-toast|mt-...,0.275818,2
703,پاستا زیتون و گوجه,mt-salt|mt-olive|mt-water|mt-liquidoil|mt-toma...,0.273798,2
623,لازانیا بادمجان,mt-lasagna|mt-tomatopaste|mt-liquidoil|mt-cowm...,0.266156,2
348,حُمص محمره,mt-ardeh|mt-oliveoil|mt-salt|mt-garlic|mt-blac...,0.263059,2


In [6]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def load_foods_data():
    data1 = pd.read_csv('RecipeMaterials.csv', index_col='recipe_id')
    return data1

def suggest_recipes_with_preferences(desired_ingredients, allergic_ingredients, num_suggestions=15, allergy_threshold=0.000001):
    foods = load_foods_data()
    vectorizer = TfidfVectorizer()
    vectorizer.fit(foods['ingredients'].apply(lambda x: x.replace('mt-', '')))
    desired_ingredients_list = [ingredient.strip().lower().replace('mt-', '') for ingredient in desired_ingredients.split(',')]
    allergic_ingredients_list = [ingredient.strip().lower().replace('mt-', '') for ingredient in allergic_ingredients.split(',')]
    desired_ingredients_vector = vectorizer.transform([' '.join(desired_ingredients_list)])
    allergic_ingredients_vector = vectorizer.transform([' '.join(allergic_ingredients_list)])
    ingredient_vectors = vectorizer.transform(foods['ingredients'])
    desired_similarity_scores = cosine_similarity(desired_ingredients_vector, ingredient_vectors).flatten()
    allergic_similarity_scores = cosine_similarity(allergic_ingredients_vector, ingredient_vectors).flatten()
    eligible_recipes_mask = allergic_similarity_scores < allergy_threshold
    eligible_foods = foods.iloc[eligible_recipes_mask]
    eligible_foods['desired_similarity_score'] = desired_similarity_scores[eligible_recipes_mask]
    eligible_foods['matching_ingredient_count'] = eligible_foods['ingredients'].apply(
        lambda x: sum(ingredient in x for ingredient in desired_ingredients_list))
    sorted_suggested_recipes = eligible_foods.sort_values(
        by=['matching_ingredient_count', 'desired_similarity_score'], ascending=[False, False])

    return sorted_suggested_recipes.head(num_suggestions)
# Example usage
desired_ingredients = 'mt-onion,mt-eggs,mt-eggplant'
allergic_ingredients = 'mt-tomatoes'
suggested_recipes = suggest_recipes_with_preferences(desired_ingredients, allergic_ingredients)
test = pd.DataFrame(suggested_recipes)
test

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eligible_foods['desired_similarity_score'] = desired_similarity_scores[eligible_recipes_mask]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eligible_foods['matching_ingredient_count'] = eligible_foods['ingredients'].apply(


Unnamed: 0_level_0,title,ingredients,desired_similarity_score,matching_ingredient_count
recipe_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
776,خورش کرک بپرس,mt-salt|mt-nardon|mt-blackpepper|mt-tomatopast...,0.577309,3
601,تاس‌کباب گوشت و صیفی جات,mt-garlicpowder|mt-mincedmeat|mt-eggs|mt-onion...,0.349757,3
649,کوکو بادمجان حلقه ای,mt-liquidoil|mt-salt|mt-eggs|mt-turmeric|mt-bl...,0.717651,2
648,کوکو بادمجان کبابی,mt-turmeric|mt-liquidoil|mt-eggplant|mt-blackp...,0.6633,2
625,ماست بادمجان,mt-yogurt|mt-eggplant|mt-garlic|mt-salt|mt-oni...,0.534049,2
933,آبگوشت عدس و بادمجان,mt-salt|mt-water|mt-sourgrapejuice|mt-onion|mt...,0.411754,2
364,کشک و بادمجان,mt-driedmint|mt-whey|mt-turmeric|mt-blackpeppe...,0.405954,2
714,املت پیاز,mt-eggs|mt-liquidoil|mt-onion|mt-blackpepper|m...,0.401149,2
772,فسنجان باقلا,mt-water|mt-onion|mt-liquidoil|mt-eggplant|mt-...,0.399666,2
409,پلو بادمجان با مرغ,mt-rice|mt-turmeric|mt-blackpepper|mt-eggplant...,0.399326,2


In [2]:
ingredients = test.loc[776]['ingredients'].split('|')
print(ingredients)

['mt-salt', 'mt-nardon', 'mt-blackpepper', 'mt-tomatopaste', 'mt-liquidoil', 'mt-onion', 'mt-water', 'mt-eggplant', 'mt-eggs']


In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def load_foods_data():
    data1 = pd.read_csv('RecipeMaterials.csv', index_col='recipe_id')
    return data1

def suggest_recipes_with_preferences(desired_ingredients, allergic_ingredients, num_suggestions=10, allergy_threshold=0.000001):
    foods = load_foods_data()
    vectorizer = TfidfVectorizer()
    vectorizer.fit(foods['ingredients'].apply(lambda x: x.replace('mt-', '')))
    desired_ingredients_list = [ingredient.strip().lower().replace('mt-', '') for ingredient in desired_ingredients.split(',')]
    allergic_ingredients_list = [ingredient.strip().lower().replace('mt-', '') for ingredient in allergic_ingredients.split(',')]
    desired_ingredients_vector = vectorizer.transform([' '.join(desired_ingredients_list)])
    allergic_ingredients_vector = vectorizer.transform([' '.join(allergic_ingredients_list)])
    ingredient_vectors = vectorizer.transform(foods['ingredients'])
    desired_similarity_scores = cosine_similarity(desired_ingredients_vector, ingredient_vectors).flatten()
    allergic_similarity_scores = cosine_similarity(allergic_ingredients_vector, ingredient_vectors).flatten()
    eligible_recipes_mask = allergic_similarity_scores < allergy_threshold
    eligible_foods = foods.iloc[eligible_recipes_mask]
    eligible_foods['desired_similarity_score'] = desired_similarity_scores[eligible_recipes_mask]
    eligible_foods['matching_ingredient_count'] = eligible_foods['ingredients'].apply(
        lambda x: sum(ingredient in x for ingredient in desired_ingredients_list))
    sorted_suggested_recipes = eligible_foods.sort_values(
        by=['matching_ingredient_count', 'desired_similarity_score'], ascending=[False, False])
    recipe_ids = sorted_suggested_recipes.index.tolist()
    print('dsfj:',recipe_ids)
    return recipe_ids[:num_suggestions]

# Example usage
desired_ingredients = 'mt-eggplant, mt-garlic, mt-barberry, mt-apple, mt-raisins'
allergic_ingredients = 'mt-celery, mt-banana'
suggested_recipes = suggest_recipes_with_preferences(desired_ingredients, allergic_ingredients)
suggested_recipes


dsfj: [638, 120, 625, 648, 761, 650, 364, 778, 365, 869, 25, 514, 725, 704, 355, 857, 142, 623, 367, 506, 145, 591, 899, 519, 369, 34, 601, 921, 223, 39, 368, 30, 796, 844, 259, 190, 649, 119, 643, 337, 423, 345, 641, 806, 267, 532, 340, 451, 419, 599, 288, 830, 124, 803, 776, 933, 422, 799, 114, 772, 409, 740, 140, 420, 756, 734, 459, 328, 755, 512, 541, 346, 44, 754, 406, 760, 510, 226, 590, 470, 770, 713, 814, 530, 241, 471, 692, 561, 464, 280, 418, 711, 421, 672, 222, 482, 741, 221, 417, 472, 792, 455, 607, 49, 227, 473, 347, 469, 626, 348, 680, 453, 130, 817, 795, 881, 851, 357, 122, 582, 736, 317, 864, 635, 642, 366, 653, 587, 344, 449, 400, 916, 496, 737, 627, 707, 349, 262, 722, 703, 23, 613, 710, 863, 314, 709, 343, 456, 898, 824, 571, 624, 536, 651, 753, 602, 842, 866, 609, 318, 565, 699, 441, 726, 783, 131, 158, 700, 751, 402, 487, 705, 501, 805, 27, 568, 505, 865, 791, 325, 622, 603, 581, 716, 886, 793, 354, 888, 474, 697, 663, 457, 35, 313, 332, 701, 319, 906, 660, 825, 70

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eligible_foods['desired_similarity_score'] = desired_similarity_scores[eligible_recipes_mask]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eligible_foods['matching_ingredient_count'] = eligible_foods['ingredients'].apply(


[638, 120, 625, 648, 761, 650, 364, 778, 365, 869]