In [1]:
import pandas as pd
import numpy as np
import json
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load the dataset
dataset = pd.read_csv('Gusto Dataset - Cleaned.csv')

# Define the user input in JSON format
user_input = {"ingredients": ["Fish", "eggs", "sauce", "noodles"]}

# Preprocess the dataset
dataset["text"] = dataset["Title"] + " " + dataset["Instructions"] + " " + dataset["Cleaned_Ingredients"]

In [2]:
# Vectorize the text column using TF-IDF vectorizer
vectorizer = TfidfVectorizer(stop_words="english")
x = vectorizer.fit_transform(dataset['text'].apply(lambda x: np.str_(x)))

# Vectorize the user input
user_input_text = " ".join(user_input["ingredients"]).lower()
user_input_matrix = vectorizer.transform([user_input_text])

# Calculate cosine similarity between user input and all recipes in the dataset
similarity_scores = cosine_similarity(user_input_matrix, x).flatten()

# Get the indices of top 10 most similar recipes
top_indices = np.argsort(similarity_scores)[-10:][::-1]

In [3]:
# Generate recipe output in JSON format
recipe_output = []
for index in top_indices:
    recipe = {}
    recipe["title"] = dataset.loc[index, "Title"]
    recipe['ingredients'] = dataset.loc[index, 'Cleaned_Ingredients'].split(',')
    recipe['instructions'] = dataset.loc[index, 'Instructions'].split('.')
    recipe['image_name'] = dataset.loc[index, 'Image_Name']
    recipe_output.append(recipe)
    
# # Generate recipe output in JSON format
# recipe_output = []
# for index, score in enumerate(similarity_scores):
#     if score > 0.5: # Set a threshold for similarity score
#         recipe = {}
#         recipe['title'] = dataset.loc[index, 'Title']
#         recipe['ingredients'] = dataset.loc[index, 'Cleaned_Ingredients'].split(',')
#         recipe['instructions'] = dataset.loc[index, 'Instructions'].split('.')
#         recipe['image_name'] = dataset.loc[index, 'Image_Name']
#         recipe_output.append(recipe)


# Print recipe output in JSON format
print(json.dumps(recipe_output, indent=4))

[
    {
        "title": "Easy Pad Thai",
        "ingredients": [
            "['8 ounces pad thai or lo mein noodles'",
            " '2 tablespoons vegetable oil'",
            " '1 clove garlic",
            " minced'",
            " '2 large eggs'",
            " '1 1/2 tablespoons soy sauce'",
            " '2 tablespoons fresh lime juice (from about 1 medium lime)'",
            " '2 tablespoons brown sugar'",
            " '1 teaspoon fish sauce'",
            " '1/8 teaspoon red pepper flakes'",
            " '3 green onions",
            " sliced'",
            " '1/4 bunch fresh cilantro",
            " leaves only",
            " roughly chopped'",
            " '1/4 cup chopped'",
            " 'unsalted peanuts']"
        ],
        "instructions": [
            "Bring a large pot of water to a rolling boil",
            " Add the noodles and cook for 7 to 10 minutes or until tender",
            " Drain the noodles and set aside",
            "\nIn a large skillet, heat 