In [2]:
from dotenv import load_dotenv
import os
from data_formats import *
import json
from opensearchpy import OpenSearch
from opensearchpy import helpers
from tqdm import tqdm
assert load_dotenv()
# os.environ["NOVA_SEARCH_PW"]


index_name = os.environ["NOVA_SEARCH_US"]

# Create the client with SSL/TLS enabled, but hostname verification disabled.
client = OpenSearch(
    hosts = [{'host': os.environ["NOVA_SEARCH_HOST"], 'port': os.environ["NOVA_SEARCH_PORT"]}],
    http_compress = True, # enables gzip compression for request bodies
    http_auth = (os.environ["NOVA_SEARCH_US"], os.environ["NOVA_SEARCH_PW"]),
    url_prefix = 'opensearch',
    use_ssl = True,
    verify_certs = False,
    ssl_assert_hostname = False,
    ssl_show_warn = False
)

client.indices.exists(index_name)

True

In [3]:
def search_by_recipy_name(name: str, size=5):


    query = {
        "size": size,
        "query": {
            "multi_match": {"query": name, "fields": ["displayName", "description"]}
        },
    }

    return client.search(body=query, index=index_name)


search_by_recipy_name("chicken parmesan")

{'took': 13,
 'timed_out': False,
 '_shards': {'total': 4, 'successful': 4, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 87, 'relation': 'eq'},
  'max_score': 9.324757,
  'hits': [{'_index': 'user205',
    '_type': '_doc',
    '_id': '557',
    '_score': 9.324757,
    '_source': {'displayName': 'Chicken Parmesan',
     'description': None,
     'tools': [],
     'ingredients': [{'displayText': '4  skinless, boneless chicken breast halves',
       'ingredient': None,
       'ingredientId': '32900cb9e09a58a7cc2c3ec42af0fcda56cbb456934b28168d1d12e8968dd3a4',
       'quantity': 4.0,
       'unit': 'COUNT',
       'images': [],
       'embedding': [0.03762403130531311,
        0.014560741372406483,
        -0.021826310083270073,
        0.012402557767927647,
        -0.02465919964015484,
        -0.02514045499265194,
        0.03478061780333519,
        -0.00496554234996438,
        -0.007680173963308334,
        0.001759665901772678,
        -0.03354646638035774,
        -0.127

In [4]:
def get_recipy_by_ingredients(ingredients: List[str], min_should=-1, size=5):

    query = {
        "size": size,
        "query": {
            "bool": {
                "should": [
                    {
                        "nested": {
                            "path": "ingredients",
                            "query": {"multi_match": {'query' : ingredient, 'fields' : ['ingredients.displayText', 'ingredients.ingredient']}},
                        }
                    }
                    for ingredient in ingredients
                ],
                "minimum_should_match": (
                    min_should if min_should > 0 else len(ingredients)
                ),
            }
        }
    }

    return client.search(body=query, index=index_name)

get_recipy_by_ingredients(["oregano", "chicken", "butter"])

{'took': 15,
 'timed_out': False,
 '_shards': {'total': 4, 'successful': 4, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 3, 'relation': 'eq'},
  'max_score': 14.996145,
  'hits': [{'_index': 'user205',
    '_type': '_doc',
    '_id': '415',
    '_score': 14.996145,
    '_source': {'displayName': "Mike's Polish Smothered Chicken",
     'description': None,
     'tools': [],
     'ingredients': [{'displayText': 'Italian dressing or marinade, to taste',
       'ingredient': None,
       'ingredientId': 'aba9a20a31840377347e22108ebc1ea54cebb8edf8e566e22ac3febd59fa21da',
       'quantity': 1.0,
       'unit': 'TO_TASTE',
       'images': [],
       'embedding': [-0.10482634603977203,
        -0.033175043761730194,
        0.006261628121137619,
        0.04040108248591423,
        -0.08248759806156158,
        0.008138930425047874,
        0.08492666482925415,
        -0.01186260487884283,
        -0.011555724777281284,
        -0.1001816987991333,
        0.06172015890479088,
  

In [5]:
def recipy_with_images(size=5):

    query = {
        "size": size,
        "query": {"exists": {"field": "images"}}
    }

    return client.search(body=query, index=index_name)

recipy_with_images()

{'took': 2,
 'timed_out': False,
 '_shards': {'total': 4, 'successful': 4, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 0, 'relation': 'eq'},
  'max_score': None,
  'hits': []}}

In [6]:
def get_recipes_with_less_time(max_total_time: int, size=5):
    query = {
        "size": size,
        "query": {
            "range": {
                "totalTimeMinutes": {"lte": max_total_time}  # Filter by maximum total time
            }
        }
    }

    return client.search(body=query, index=index_name)


recipes = get_recipes_with_less_time(max_total_time=30)
print(recipes)


{'took': 9, 'timed_out': False, '_shards': {'total': 4, 'successful': 4, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 300, 'relation': 'eq'}, 'max_score': 1.0, 'hits': [{'_index': 'user205', '_type': '_doc', '_id': '23', '_score': 1.0, '_source': {'displayName': 'Holiday Salad', 'description': None, 'tools': [], 'ingredients': [{'displayText': '2 lemons, juiced, plus ½ lemon', 'ingredient': 'lemon', 'ingredientId': 'type_09150_00', 'quantity': 2.0, 'unit': 'COUNT', 'images': [], 'embedding': [-0.06840547174215317, 0.03421396389603615, 0.017012322321534157, 0.06956873089075089, -0.02842882089316845, 0.03507789969444275, 0.09402766078710556, 0.0033733344171196222, 0.022020217031240463, -0.08503040671348572, 0.006658882834017277, -0.09691157937049866, -0.0029465456027537584, -0.052372921258211136, -0.031961530447006226, -0.0028116784524172544, 0.03979817032814026, 0.05841398611664772, -0.12319903075695038, -0.11903423070907593, -0.12079279869794846, -0.06400788575410843, 0.0116

In [19]:
def ingredient_similarity_search(ingredient: str, size=5):
    query = {
        "size": size,
        "query": {
            "nested": {
                "path": "ingredients",
                "query": {
                    "knn": {
                        "ingredients.embedding": {
                            "vector": get_embedding(ingredient),
                            "k": 2,
                        }
                    }
                },
            }
        },
    }
    return client.search(body=query, index=index_name)

ingredient = "fenkuł"
recipes = ingredient_similarity_search(ingredient)
print(recipes)

{'took': 47, 'timed_out': False, '_shards': {'total': 4, 'successful': 4, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 42, 'relation': 'eq'}, 'max_score': 1.488523, 'hits': [{'_index': 'user205', '_type': '_doc', '_id': '35', '_score': 1.488523, '_source': {'displayName': 'Beer Can Chicken', 'description': None, 'tools': [], 'ingredients': [{'displayText': '6 tablespoons dark brown sugar, packed', 'ingredient': 'dark brown sugar', 'ingredientId': 'subs_dark_brown_sugar', 'quantity': 6.0, 'unit': 'TABLESPOON', 'images': [], 'embedding': [-0.062458861619234085, -0.023272352293133736, -0.07433807104825974, 0.015962421894073486, -0.030494054779410362, -0.05740836262702942, 0.1483631581068039, -0.010826843790709972, 0.00619155028834939, 0.0465727224946022, -0.034288179129362106, -0.04842528700828552, -0.07113226503133774, -0.07445184141397476, -0.05378314480185509, 0.05380856990814209, 0.07815229892730713, -0.0783834233880043, -0.023078860715031624, -0.04202776402235031, 0.029372

In [15]:
res = Recipe(**recipes["hits"]["hits"][0]["_source"])
[i.displayText for i in res.ingredients]

['6 tablespoons dark brown sugar, packed',
 '1 tablespoon smoked paprika',
 '1 tablespoon chili powder',
 '1 tablespoon red pepper flakes',
 '2  teaspoons garlic powder',
 '1 teaspoon fennel, ground and toasted',
 '2 tablespoons kosher salt',
 '1 tablespoon coarse black pepper',
 '½ tablespoon mustard powder',
 '2 tablespoons softened, unsalted butter or olive oil',
 '1  4-pound chicken, giblets removed',
 '4   cloves garlic, peeled and smashed',
 '1  12-ounce can of beer -- drink or pour out half (lager or ales work nicely, but feel free to experiment with other beers you like)']

In [20]:
def search_by_recipe_name_with_time_limit(name: str, max_total_time: int, size=5):
    query = {
        "size": size,
        "query": {
            "bool": {
                "must": [
                    {"multi_match": {"query": name, "fields": ["displayName", "description"]}},
                    {"range": {"totalTimeMinutes": {"lte": max_total_time}}}
                ]
            }
        }
    }
    return client.search(body=query, index=index_name)

# Example usage:
recipes = search_by_recipe_name_with_time_limit("chicken parmesan", max_total_time=30)
print(recipes)


{'took': 10, 'timed_out': False, '_shards': {'total': 4, 'successful': 4, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 22, 'relation': 'eq'}, 'max_score': 5.630069, 'hits': [{'_index': 'user205', '_type': '_doc', '_id': '554', '_score': 5.630069, '_source': {'displayName': 'Edamame Parmesan', 'description': None, 'tools': [], 'ingredients': [{'displayText': 'One 12-ounce package frozen shelled edamame, thawed', 'ingredient': None, 'ingredientId': 'ec10bf0fcea29c0529d8e750d8811f0fbac23b75dc6da6e90f6e64da1870c8d7', 'quantity': 12.0, 'unit': 'OUNCE', 'images': [], 'embedding': [-0.06050143390893936, 0.06852489709854126, 0.03844023868441582, 0.06375130265951157, -0.04606351628899574, -0.0668082907795906, 0.006024907808750868, 0.06435560435056686, -0.04179973527789116, 0.07792413234710693, -0.02823146991431713, -0.06939008831977844, 0.033168260008096695, -0.03281639143824577, 0.013860078528523445, -0.0523693822324276, 0.004210485145449638, -0.04529528319835663, -0.117936246097087