In [62]:
import json
from pathlib import Path

import numpy as np
import pandas as pd
from sklearn.decomposition import TruncatedSVD
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [63]:
DATA_DIR = Path("data")
RECIPES_DATA = DATA_DIR / "recipe-ingredients-dataset.csv"

In [64]:
df = pd.read_csv(RECIPES_DATA)[:5000]

In [65]:
df["tags"] = df["tags"].apply(lambda x: " ".join(json.loads(x.replace('\'', '"').replace('{', '[').replace('}', ']'))))
df["search_terms"] = df["search_terms"].apply(lambda x: " ".join(json.loads(x.replace('\'', '"').replace('{', '[').replace('}', ']'))))
df["ingredients"] = df["ingredients"].apply(lambda x: " ".join(json.loads(x.replace('\'', '"'))))

In [66]:
df = df.set_index("id")[["name", "description", "ingredients", "tags", "search_terms", "cuisine"]]

In [67]:
df

Unnamed: 0_level_0,name,description,ingredients,tags,search_terms,cuisine
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
96313,Grilled Garlic Cheese Grits,"We love grits, this is another good way to ser...",water grits salt cheddar cheese garlic olive oil,time-to-make course main-ingredient preparatio...,diabetic low-calorie vegetarian low-carb side,southern_us
232037,Simple Shrimp and Andouille Jambalaya,"Simple, easy and very tasty for when you are i...",onion red bell pepper garlic cloves large shri...,60-minutes-or-less time-to-make course main-in...,dinner shrimp,cajun_creole
41090,black-and-white bean salad,,white beans canned black beans tomatoes onion ...,15-minutes-or-less time-to-make course main-in...,vegetarian salad side dinner vegan,italian
60656,Crock Pot Italian Zucchini,This is a good recipe for weight watchers. It ...,zucchini yellow squash diced tomatoes onion ga...,weeknight time-to-make course main-ingredient ...,side vegetarian italian,italian
232047,Beef Stew With Dried Cherries,This is a fabulous stew that came from one of ...,beef stew meat flour salt allspice cinnamon bl...,time-to-make course main-ingredient preparatio...,dinner,russian
...,...,...,...,...,...,...
232946,Apricot Chicken,A favourite in our home....tastes even better ...,oil skinless chicken breasts plain flour dry o...,60-minutes-or-less time-to-make course main-in...,low-calorie healthy low-carb low-fat dinner ch...,moroccan
268660,Rigatoni in Bianco,A pasta sauce without tomatoes.,onion garlic cloves fennel bulb flat leaf pars...,time-to-make course preparation main-dish 4-ho...,dinner pasta,italian
345227,Molten Chocolate Hot Milk Drink,Only for true chocolate lovers!! Great way to ...,2%25 low-fat milk chocolate syrup chocolate wh...,15-minutes-or-less time-to-make course main-in...,healthy low-calorie low-fat low-sodium,italian
408628,Sarasota's Twisted Double Layer BLT Sandwich,I can not take too much credit for this. This ...,plum tomatoes shallot bibb lettuce bacon avoc...,30-minutes-or-less time-to-make course prepara...,sandwich lunch,mexican


In [68]:
df["all_tags"] = df["description"] + " " + df["ingredients"] + " " + df["tags"] + " " + df["search_terms"] + " " + df["cuisine"]

In [69]:
df = df[["name", "all_tags"]]

In [70]:
df

Unnamed: 0_level_0,name,all_tags
id,Unnamed: 1_level_1,Unnamed: 2_level_1
96313,Grilled Garlic Cheese Grits,"We love grits, this is another good way to ser..."
232037,Simple Shrimp and Andouille Jambalaya,"Simple, easy and very tasty for when you are i..."
41090,black-and-white bean salad,
60656,Crock Pot Italian Zucchini,This is a good recipe for weight watchers. It ...
232047,Beef Stew With Dried Cherries,This is a fabulous stew that came from one of ...
...,...,...
232946,Apricot Chicken,A favourite in our home....tastes even better ...
268660,Rigatoni in Bianco,A pasta sauce without tomatoes. onion garlic c...
345227,Molten Chocolate Hot Milk Drink,Only for true chocolate lovers!! Great way to ...
408628,Sarasota's Twisted Double Layer BLT Sandwich,I can not take too much credit for this. This ...


In [71]:
df.drop(df[df['all_tags'].isnull()].index, inplace=True)
df = df.reset_index()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop(df[df['all_tags'].isnull()].index, inplace=True)


In [72]:
tfidf = TfidfVectorizer(max_features=5000)

# Transform the data
vectorized_data = tfidf.fit_transform(df['all_tags'].values)
vectorized_dataframe = pd.DataFrame(vectorized_data.toarray(), index=df['all_tags'].index.tolist())
vectorized_dataframe.shape

(4901, 5000)

In [73]:
svd = TruncatedSVD(n_components=3000)

# Fit transform the data
reduced_data = svd.fit_transform(vectorized_dataframe)
reduced_data

array([[ 4.34841600e-01,  3.45537833e-02,  4.81954269e-02, ...,
        -1.69277910e-03, -3.25176951e-04, -1.88758384e-04],
       [ 2.61653181e-01, -1.84123040e-01,  1.59265410e-01, ...,
         4.88059148e-03,  1.21831872e-03,  1.63821936e-03],
       [ 2.63378472e-01, -1.44961752e-01,  1.15940982e-01, ...,
        -2.15138605e-03, -4.31016167e-03,  2.12503026e-03],
       ...,
       [ 4.77389208e-01,  3.15531461e-01, -1.94010733e-01, ...,
         1.85548194e-03,  6.95013235e-05,  2.09340729e-03],
       [ 2.28313529e-01, -1.84737678e-01, -7.57428761e-03, ...,
         1.64696435e-03, -1.95648332e-03,  3.13342854e-03],
       [ 3.48727121e-01, -3.83926157e-02,  8.29602322e-02, ...,
        -2.02410317e-03,  1.57072042e-03, -6.49210829e-03]])

In [109]:
similarity = cosine_similarity(reduced_data)
def recommendation(prompt):
    target_vector = tfidf.transform(prompt.split())

    id_of_movie = df[df['all_tags'].str.contains(prompt, case=False)].index[0]
    distances = similarity[id_of_movie]
    movie_list = sorted(list(enumerate(distances)), reverse=True, key=lambda x:x[1])[1:10]
    
    for i in movie_list:
        print(df.iloc[i[0]]["name"])

In [110]:

recommendation("pizza")

Beef &amp; Cheddar Sausage Pizza Singles
Easy Spinach and Sausage Pie
Sausage Pie
Breakfast Pizza
Deep Dish Pizza Pot Pie
Pizza Biscuit Bake
Pasquale's Favorite Pizza Topping
Bolognese Pizza Pie
Sloppy Giuseppe's
