In [90]:
import json
from pathlib import Path

import numpy as np
import pandas as pd
from sklearn.decomposition import TruncatedSVD
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [91]:
DATA_DIR = Path("data")
RECIPES_DATA = DATA_DIR / "recipe-ingredients-dataset.csv"

In [92]:
df = pd.read_csv(RECIPES_DATA)[:25000]

In [93]:
df["tags"] = df["tags"].apply(lambda x: " ".join(json.loads(x.replace('\'', '"').replace('{', '[').replace('}', ']'))))
df["search_terms"] = df["search_terms"].apply(lambda x: " ".join(json.loads(x.replace('\'', '"').replace('{', '[').replace('}', ']'))))
df["ingredients"] = df["ingredients"].apply(lambda x: " ".join(json.loads(x.replace('\'', '"'))))

In [94]:
df = df.set_index("id")[["name", "description", "ingredients", "tags", "search_terms", "cuisine"]]

In [95]:
df

Unnamed: 0_level_0,name,description,ingredients,tags,search_terms,cuisine
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
96313,Grilled Garlic Cheese Grits,"We love grits, this is another good way to ser...",water grits salt cheddar cheese garlic olive oil,time-to-make course main-ingredient preparatio...,diabetic low-calorie vegetarian low-carb side,southern_us
232037,Simple Shrimp and Andouille Jambalaya,"Simple, easy and very tasty for when you are i...",onion red bell pepper garlic cloves large shri...,60-minutes-or-less time-to-make course main-in...,dinner shrimp,cajun_creole
41090,black-and-white bean salad,,white beans canned black beans tomatoes onion ...,15-minutes-or-less time-to-make course main-in...,vegetarian salad side dinner vegan,italian
60656,Crock Pot Italian Zucchini,This is a good recipe for weight watchers. It ...,zucchini yellow squash diced tomatoes onion ga...,weeknight time-to-make course main-ingredient ...,side vegetarian italian,italian
232047,Beef Stew With Dried Cherries,This is a fabulous stew that came from one of ...,beef stew meat flour salt allspice cinnamon bl...,time-to-make course main-ingredient preparatio...,dinner,russian
...,...,...,...,...,...,...
200338,Zsidotojas (Hungarian Egg Dip),"This dip can be used as a sandwich filling, or...",hard-boiled eggs onion butter dijon mustard sa...,30-minutes-or-less time-to-make course main-in...,appetizer low-carb,southern_us
303622,Non-Dairy 'cream' of Mushroom Soup,"Low fat, low cholesterol 'cream' soup made wit...",chicken broth mushrooms non-dairy powdered cof...,30-minutes-or-less time-to-make course main-in...,low-carb soup low-calorie dairy-free,italian
318621,Oatmeal Chocolate Chip Cookies,These are on the lighter side... all my stuff ...,butter applesauce honey brown sugar egg vanill...,30-minutes-or-less time-to-make course prepara...,cookie dessert,southern_us
322475,Apple Crisp,I have not tried this recipe. I got this recip...,apples sugar substitute pancake mix cinnamon b...,60-minutes-or-less time-to-make course main-in...,dessert,southern_us


In [96]:
df["all_tags"] = df["description"] + " " + df["ingredients"] + " " + df["tags"] + " " + df["search_terms"] + " " + df["cuisine"]

In [97]:
df = df[["name", "all_tags"]]

In [98]:
df

Unnamed: 0_level_0,name,all_tags
id,Unnamed: 1_level_1,Unnamed: 2_level_1
96313,Grilled Garlic Cheese Grits,"We love grits, this is another good way to ser..."
232037,Simple Shrimp and Andouille Jambalaya,"Simple, easy and very tasty for when you are i..."
41090,black-and-white bean salad,
60656,Crock Pot Italian Zucchini,This is a good recipe for weight watchers. It ...
232047,Beef Stew With Dried Cherries,This is a fabulous stew that came from one of ...
...,...,...
200338,Zsidotojas (Hungarian Egg Dip),"This dip can be used as a sandwich filling, or..."
303622,Non-Dairy 'cream' of Mushroom Soup,"Low fat, low cholesterol 'cream' soup made wit..."
318621,Oatmeal Chocolate Chip Cookies,These are on the lighter side... all my stuff ...
322475,Apple Crisp,I have not tried this recipe. I got this recip...


In [99]:
df.drop(df[df['all_tags'].isnull()].index, inplace=True)
df = df.reset_index()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop(df[df['all_tags'].isnull()].index, inplace=True)


In [114]:
tfidf = TfidfVectorizer(max_features=5000, stop_words="english")

# Transform the data
vectorized_data = tfidf.fit_transform(df['all_tags'].values)
vectorized_dataframe = pd.DataFrame(vectorized_data.toarray(), index=df['all_tags'].index.tolist())
vectorized_dataframe.shape

(24411, 5000)

In [115]:
svd = TruncatedSVD(n_components=3000)

# Fit transform the data
reduced_data = svd.fit_transform(vectorized_dataframe)
reduced_data

array([[ 3.94963222e-01,  1.52434742e-02,  4.63987606e-02, ...,
         5.79943903e-04, -3.43337477e-03, -1.12008220e-03],
       [ 2.05406695e-01, -1.99302069e-01,  1.58825887e-01, ...,
        -2.81809006e-03, -6.68464432e-04, -6.66665699e-03],
       [ 2.14695594e-01, -1.75196155e-01,  9.96434094e-02, ...,
         1.19923854e-03,  7.22635769e-04,  1.41351667e-03],
       ...,
       [ 1.62288549e-01, -2.36992788e-01, -4.07947637e-01, ...,
        -3.29353680e-03,  4.06637127e-04,  1.13741724e-03],
       [ 1.41106973e-01, -1.45896579e-01, -1.88637481e-01, ...,
         7.02527675e-05,  1.07131743e-03, -3.22710573e-03],
       [ 2.48092465e-01, -8.51712526e-02,  1.87752480e-01, ...,
        -2.76643842e-03, -2.77311958e-03, -8.35991091e-04]])

In [127]:
def recommendation(prompt, k=5):
    target_vector = tfidf.transform([prompt])
    reduced_target_vector = svd.transform(target_vector)

    similarities = cosine_similarity(reduced_target_vector, reduced_data).flatten()
    topk = np.argpartition(similarities, -k)[-k:]
    
    return [(df.iloc[i]["name"]) for i in topk]

In [137]:

recommendation("a type of sandwich that uses one piece continuous piece of bread that cradles a protein a fiber and a sort of dressing. something I don't have to cook")

['Secret Ingredient Chocolate Sheet Cake',
 'Turkish Towel Sandwich',
 'Mustard and Rosemary Marinated Flank Steak',
 'Baked Cheese on Toast With Wine',
 'Almond Butter Toffee']

In [129]:
dish_prompts = [
    "I'm looking for a dairy-free breakfast dish with a hint of spice; something like a Mexican-style omelet but without cheese.",
    "Can you suggest a gluten-free Italian lunch option, preferably with a lot of fresh vegetables and no meat?",
    "I need a quick Japanese-inspired snack, but I'm allergic to shellfish—something light and vegetarian would be ideal.",
    "I'm craving a hearty vegan dinner that's Mediterranean-inspired, featuring chickpeas or lentils and lots of fresh herbs.",
    "Could you recommend a kid-friendly dessert that's nut-free but still has a touch of chocolate? Maybe something from French cuisine?",
    "I want a light Thai lunch dish, but it has to be free of soy and seafood, with a focus on fresh and crisp ingredients.",
    "I'm in the mood for a comforting American breakfast but need it to be egg-free—something like pancakes or waffles without eggs would be great.",
    "Give me an Indian-inspired dinner option that's mild in spice and free of dairy. I'd love something with a good mix of veggies.",
    "I need a quick snack that's Mediterranean-style and free of gluten and dairy, ideally something I can eat on the go.",
    "I'm looking for a festive dessert for dinner, inspired by Mexican cuisine but without any cinnamon or nuts."
]

In [133]:
for prompt in dish_prompts:
    print(prompt)
    results = recommendation(prompt)
    for i, dish in enumerate(results, start=1):
        print(f"\t{i}:", dish)
    print()

I'm looking for a dairy-free breakfast dish with a hint of spice; something like a Mexican-style omelet but without cheese.
	1: Garden Vegetable Omelet
	2: Brandi's Yummy Spinach Omelet
	3: Souffle Omelet (Puffy Omelet)
	4: Canadian Bacon and Potato Omelet
	5: Avocado Omelet

Can you suggest a gluten-free Italian lunch option, preferably with a lot of fresh vegetables and no meat?
	1: Management of Childhood Obesity Clean Eating Almond Butter Fudge
	2: Vegan This is Not Your Mama's Pumpkin Soup
	3: Gluten Free Spritz (Press) Cookies
	4: Italian Herb Salad Dressing
	5: Broccoli Casserole Gluten Free

I need a quick Japanese-inspired snack, but I'm allergic to shellfish—something light and vegetarian would be ideal.
	1: Avocado and Prawns in Wasabi
	2: Yakisoba
	3: Ponzu Sauce
	4: Chawan Mushi
	5: California Roll Burgers with Wasabi Mayonnaise

I'm craving a hearty vegan dinner that's Mediterranean-inspired, featuring chickpeas or lentils and lots of fresh herbs.
	1: Lentils With Apricot