In [1]:
#Import the libraries
#*NEW*

#1. Download the following into your environment

# !pip install spacy --> Requires numpy ==1.21
# pip install --upgrade click
# python -m spacy download en_core_web_sm

#2. 
import spacy

# Load the English language model
nlp = spacy.load("en_core_web_sm")

#3. Add in code to lemmatize user input


#2. Add the 'boost' list
#3. Add in the updated query -- NER vs ingredients

In [2]:
import pandas as pd

In [3]:
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk


es = Elasticsearch("http://localhost:9200")
es.info().body

{'name': '6575549c42c0',
 'cluster_name': 'docker-cluster',
 'cluster_uuid': 'I8vgNl59S2GL9tOzyMBa7Q',
 'version': {'number': '8.7.0',
  'build_flavor': 'default',
  'build_type': 'docker',
  'build_hash': '09520b59b6bc1057340b55750186466ea715e30e',
  'build_date': '2023-03-27T16:31:09.816451435Z',
  'build_snapshot': False,
  'lucene_version': '9.5.0',
  'minimum_wire_compatibility_version': '7.17.0',
  'minimum_index_compatibility_version': '7.0.0'},
 'tagline': 'You Know, for Search'}

In [4]:
#Load the data
df = (pd.read_csv("recipe_dataset_cleaned_v4.csv")
     .dropna()
     .sample(5000, random_state=42)
     .reset_index()
     )

In [5]:
df.head()

Unnamed: 0,index,title,ingredients,directions,link,source,NER,cleaned_NER
0,1116810,Tex-Mex Corn On The Cob,"[""12 small ears fresh corn on the cob (about 6...","[""Place corn in a Dutch oven or kettle; cover ...",www.tasteofhome.com/recipes/tex-mex-corn-on-th...,Gathered,"[""corn"", ""fresh cilantro"", ""chili powder"", ""li...","['corn', 'lime']"
1,1137989,Cheddar Herb Snacks,"[""1 cup shredded cheddar cheese"", ""1/2 cup but...","[""In a large bowl, combine the first six ingre...",www.tasteofhome.com/recipes/cheddar-herb-snacks/,Gathered,"[""cheddar cheese"", ""butter"", ""parsley"", ""chive...","['cheese', 'chives', 'pimientos', 'green onion..."
2,933189,Spice-Marinated And Grilled Lamb Chops,"[""1/2 teaspoon fennel seeds"", ""1 serrano chile...","[""Toast fennel seeds in a dry small skillet ov...",www.epicurious.com/recipes/food/views/indian-s...,Gathered,"[""fennel seeds"", ""serrano chile"", ""ginger"", ""g...","['fennel seeds', 'serrano chile', 'cr\\u00e8me..."
3,1488091,Dow Sah Bow Cha Siu Bow Recipe,"[""1 package instant dry yeast"", ""1 3/4 c. hot ...","[""First, make the dough."", ""This dough is good...",cookeatshare.com/recipes/dow-sah-bow-cha-siu-b...,Recipes1M,"[""yeast"", ""water"", ""sugar"", ""baking pwdr"", ""fl...",['baking pwdr']
4,64045,Baked Salmon Croquettes,"[""1 (15 1/2 oz.) can pink salmon"", ""milk"", ""1/...","[""Drain salmon, reserving liquid; add enough m...",www.cookbooks.com/Recipe-Details.aspx?id=341036,Gathered,"[""pink salmon"", ""milk"", ""butter"", ""onion"", ""fl...","['pink salmon', 'milk', 'onion', 'cornflakes']"


In [9]:
es.indices.delete(index='recipes')

ObjectApiResponse({'acknowledged': True})

In [10]:
#Create an index
mappings = {
        "properties": {
            "index": {"type": "integer"},
            "title": {"type": "text", "analyzer": "english"},
            "ingredients": {"type": "text", "analyzer": "standard"},
            "directions": {"type": "text", "analyzer": "standard"},
            "link": {"type": "keyword"},
            "source": {"type": "text", "analyzer": "standard"},
            "NER": {"type": "text", "analyzer": "english"},
            "cleaned_NER": {"type": "text", "analyzer": "english"},
    }
}

es.indices.create(index="recipes", mappings=mappings)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'recipes'})

In [11]:
from elasticsearch.helpers import bulk

bulk_data = []
for i,row in df.iterrows():
    bulk_data.append(
        {
            "_index": "recipes",
            "_id": i,
            "_source": {        
                "index": row['index'],
                "title": row['title'],
                "ingredients": row["ingredients"],
                "directions": row["directions"],
                "link": row["link"],
                "source": row["source"],
                "NER": row["NER"],
                "cleaned_NER": row["cleaned_NER"]
            }
        }
    )
bulk(es, bulk_data)


(5000, [])

In [12]:
es.indices.refresh(index="recipes")
es.cat.count(index="recipes", format = "json")

ListApiResponse([{'epoch': '1712975817', 'timestamp': '02:36:57', 'count': '5000'}])

In [18]:
#Pull in ingredients from EfficientNet
# ingredient1 = "beef"
# ingredient2 = "potatoes"
# ingredient3 = "asparagus"
# ingredient4 = "mushroom"
# ingredient5 = None
# must_not = "tomato"


#ingredient1
doc = nlp(ingredient1)
lemmatized_ingredient_1 = " ".join([token.lemma_ for token in doc])

#ingredient2
doc = nlp(ingredient2)
lemmatized_ingredient_2 = " ".join([token.lemma_ for token in doc])


# Lemmatize each ingredient
lemmatized_ingredients = []
for ingredient in [ingredient3, ingredient4, ingredient5]:
    if ingredient:
        doc = nlp(ingredient)
        lemmatized_ingredient = " ".join([token.lemma_ for token in doc])
        lemmatized_ingredients.append(lemmatized_ingredient)
        
        
#Lemmatize must_not
doc = nlp(must_not)
lemmatized_must_not = " ".join([token.lemma_ for token in doc])


#*NEW*
boost_factors = {
    ingredient1: 5,
    ingredient2: 4,
    ingredient3: 3,
    ingredient4: 2
}


In [19]:
def search_recipes(es, query):
    response = es.search(index="recipes", body=query, size=5)
    if not response['hits']['hits']:  # Check if the query returned no results
        # Modify the query for a broader search
        # For example, remove the `minimum_should_match` or adjust `must_not` clause
        # This is a placeholder for how you might adjust the query
        query['query']['bool'].pop('minimum_should_match', None)  # Example adjustment
        response = es.search(index="recipes", body=query, size=10)  # Try again with the adjusted query
    return response

# Initial query
query = {
    "query": {
        "bool": {
            "must": [
                {"match": {"cleaned_NER": {"query": lemmatized_ingredient_1, "boost": boost_factors.get(lemmatized_ingredient_1, 1)}}},
                {"match": {"cleaned_NER": {"query": lemmatized_ingredient_2, "boost": boost_factors.get(lemmatized_ingredient_2, 1)}}},  # New must clause for ingredient2
            ],
            "should": [
                {"match": {"cleaned_NER": {"query": ingredient, "boost": boost_factors.get(ingredient, 1)}}}
                for ingredient in lemmatized_ingredients if ingredient is not None
            ],
            "must_not": [
                {"match": {"cleaned_NER": lemmatized_must_not}},
            ],
            "minimum_should_match": 1
        }
    }
}


# Execute search with fallback
result = search_recipes(es, query)

  response = es.search(index="recipes", body=query, size=5)


In [20]:
result

ObjectApiResponse({'took': 72, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 6, 'relation': 'eq'}, 'max_score': 21.24495, 'hits': [{'_index': 'recipes', '_id': '4119', '_score': 21.24495, '_source': {'index': 1472672, 'title': 'Hamburger, Potato, And Onion Casserole Recipe', 'ingredients': '["Hamburger", "Grated potatoes", "Sliced onions", "Cream of mushroom soup", "Salt and pepper"]', 'directions': '["Make layers of meat, grated potatoes, sliced onions, soup, salt, and pepper.", "Bake at 350 degrees for 1 to 1 1/2 hrs, depending on how many layers and how big a pan."]', 'link': 'cookeatshare.com/recipes/hamburger-potato-and-onion-casserole-38024', 'source': 'Recipes1M', 'NER': '["Hamburger", "potatoes", "onions", "Cream of mushroom soup", "Salt"]', 'cleaned_NER': "['ground beef', 'potato', 'onion', 'mushroom soup']"}}, {'_index': 'recipes', '_id': '1140', '_score': 19.002792, '_source': {'index': 682948, 'title': "