In [2]:
import pandas as pd
import json

# Data Ingestion and Indexing

In [4]:
df=pd.read_json('../data/recipes.json')

In [5]:
receipes=df.to_dict(orient='records')

In [8]:
print(len(receipes))

360


In [78]:
clean_recipes = []
for recipe in receipes:
    clean_recipe = {key: value for key, value in recipe.items() if key not in ['output', 'date']}
    clean_recipes.append(clean_recipe)

In [79]:
clean_recipes[0]

{'title': 'Creamy Mashed Potatoes',
 'tags': ['potato', 'side', 'cheesefare'],
 'introduction': "![Creamy Mashed Potatoes](/pix/creamy-mashed-potatoes.webp) Mashed potatoes is a really great recipe that is often relegated to the position of side dish. This recipe is a spin of the classical mashed potatoes recipe that's got itself more going on. You can serve this dish for a relatively light meal, or you can also serve it as a side dish if you want to have a really hearty meal.",
 'ingredients': 'The quantities here are for about four adult portions. If you are planning on eating this as a side dish, it might be more like 6-8 portions. * 1kg potatoes * 200ml milk* * 200ml mayonnaise* * ~100g cheese * Garlic powder * 12-16 strips of bacon * Butter * 3-4 green onions * Black pepper * Salt  *You can play with the proportions depending on how creamy or dry you want the mashed potatoes to be.',
 'direction': '1. Peel and cut the potatoes into medium sized pieces. 2. Put the potatoes in a pot

In [22]:
from sentence_transformers import SentenceTransformer
model_name='all-MiniLM-L12-v2'
model=SentenceTransformer(model_name)

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/352 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]



1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [81]:
from elasticsearch import Elasticsearch

es_client=Elasticsearch('http://localhost:9200')

index_settings={
    "settings":{
            "number_of_shards":1,
            "number_of_replicas":0
    },
    "mappings":{
            "properties":{
                    "title":{"type":"text"},
                    "tags":{"type":"text"},
                    "introduction":{"type":"text"},
                    "ingredients":{"type":"text"},
                    "direction":{"type":"text"},
                    "combined_vector":{
                            "type":"dense_vector",
                            "dims":384,
                            "index":True,
                            "similarity":"cosine"
                     },
            },
    }
}
index_name="food_recipes"
es_client.indices.delete(index=index_name,ignore_unavailable=True)
es_client.indices.create(index=index_name,body=index_settings)                
                    
                        
    
    

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'food_recipes'})

In [83]:
from tqdm.auto import tqdm

for recipe in tqdm(clean_recipes):
            title = recipe['title']
            ingredients = recipe['ingredients']
            direction = recipe['direction']
            recipe['combined_vector']=model.encode(title + ' '+ ingredients + ' '+ direction )
            try:
                es_client.index(index=index_name,document=recipe)  
            except Exception as e:
                print(e)
            
 

  0%|          | 0/360 [00:00<?, ?it/s]

In [87]:
def elastic_search_knn_query(field,vector):
            knn={
                "field":field,
                "query_vector":vector,
                "k":5,
                "num_candidates": 360,
            }
            search_query={
                "knn":knn,
                "_source":["title","tags","introduction","ingredients","directions","output"]
            }
            es_results=es_client.search(index=index_name,body=search_query)
            results=[]
            for hit in es_results['hits']['hits']:
                    results.append(hit['_source'])
            return results
            

In [88]:
def vector_query(q):
        question=q['question']
        vect_query=model.encode(question)
        #print(vect_query)
        return elastic_search_knn_query('combined_vector',vect_query)
            

In [89]:
vector_query(dict(
    question='how to make mashed potatos?'
    ))

[{'ingredients': "- Potatoes (yellow or brown), I don't count but 10+ - Butter, a stick or two - Whole Milk - Garlic - Cheese (optional) - Onion Greens (optional) - Sour Cream (optional)",
  'title': 'Potato Soup',
  'introduction': 'Serves many people or one person for many days.',
  'tags': ['potato', 'milk', 'soup']},
 {'ingredients': '- potatoes - flour - tomato - parmesan - butter - sage or oregano or thyme',
  'title': 'Gnocchi',
  'introduction': 'Gnocchi is a pasta based on potato, making it a less bloating alternative to other kinds of pasta. They are substantial and can be roasted in butter and other ingredients.',
  'tags': ['italian', 'potato', 'side', 'fasting']},
 {'ingredients': '- 3 Potatoes, peeled and quartered - 7 Tbsp Butter, cut into small chunks - 1/2 cup Sour Cream - 1 Egg - 1 Tbsp Milk - 3 cup Cabbage, shredded - 2 Leeks, chopped - 1 small Onion, chopped - 2 cubes Chicken Bouillon - 1/2 cup Cheddar Cheese, shredded',
  'title': 'Colcannon Bake',
  'introduction'