#### Rename recipe files to Rx_recipe_name

In [2]:
import sys, os

parent = './recipes'
# R_ID = 1
# for child in os.listdir('./recipes') :
#     new_name = f'R{R_ID}-' + child
#     os.rename(os.path.join(parent, child), os.path.join(parent, new_name))
#     R_ID += 1


#### Chunking every recipe and upserting into Pinecone 


In [3]:
def createChunks(file, sep = '. ') :
    ingredientsFound, methodFound = False, False
    with open(os.path.join(parent, file), 'r') as f :
        chunks = []
        curChunk = ''
        for lineNum, line in enumerate(f.readlines(), start=1) :
            line = line.strip()
            if line :                          # Line not empty
                lowerCaseLine = line.lower()

                # 'Ingredients' section - every 5 lines (delimiter = \n) is one chunk
                if lowerCaseLine.startswith('ingredients') and not ingredientsFound:
                    ingredientsFound = True
                    ingLine = 0
                    curChunk = lowerCaseLine + sep
                    
                # 'Method' section - every 5 sentences(delimiter = .)
                elif lowerCaseLine.startswith('method') and not methodFound:
                    # Leftover chunk from ingredients?
                    if curChunk :
                        chunks.append({'_id' : file, 'chunk_text' : curChunk})

                    methodFound = True
                    wordCnt = 0
                    curChunk = lowerCaseLine + sep
                
                # Lines under 'Ingredients' section
                elif ingredientsFound and not methodFound :
                    ingLine += 1
                    if not (ingLine % 5) :
                        chunks.append({'_id' : file, 'chunk_text' : curChunk})
                        curChunk = line + sep
                    else :
                        curChunk += line + sep
                
                # Lines under method
                elif methodFound :
                    # Find all '.'-separated sentences
                    sentences = line.split('.')
                    for sent in sentences :
                        sentLen = len(sent.split())
                        if wordCnt + sentLen < 50 :
                            wordCnt += sentLen
                            curChunk += sent + sep
                                                    
                        else :
                            chunks.append({'_id' : file, 'chunk_text' : curChunk})
                            wordCnt = sentLen
                            curChunk = sent + sep
                            
        if curChunk :
            chunks.append({'_id' : file, 'chunk_text' : curChunk})
    return chunks
    

#### Upload chunks to Pinecone

In [14]:
from pinecone import Pinecone
from dotenv import load_dotenv

load_dotenv()
pc = Pinecone(api_key=os.getenv('PINECONE_KEY'))

index_name = "dense-recipes"

if not pc.has_index(index_name):
    pc.create_index_for_model(
        name=index_name,
        cloud="aws",
        region="us-east-1",
        embed={
            "model":"llama-text-embed-v2",
            "field_map":{"text": "chunk_text"}
        }
    )

In [15]:
pc.describe_index('dense-recipes')

{
    "name": "dense-recipes",
    "metric": "cosine",
    "host": "dense-recipes-j2tpktj.svc.aped-4627-b74a.pinecone.io",
    "spec": {
        "serverless": {
            "cloud": "aws",
            "region": "us-east-1"
        }
    },
    "status": {
        "ready": true,
        "state": "Ready"
    },
    "vector_type": "dense",
    "dimension": 1024,
    "deletion_protection": "disabled",
    "tags": null,
    "embed": {
        "model": "llama-text-embed-v2",
        "field_map": {
            "text": "chunk_text"
        },
        "dimension": 1024,
        "metric": "cosine",
        "write_parameters": {
            "dimension": 1024.0,
            "input_type": "passage",
            "truncate": "END"
        },
        "read_parameters": {
            "dimension": 1024.0,
            "input_type": "query",
            "truncate": "END"
        },
        "vector_type": "dense"
    }
}

In [22]:
chunks

[{'_id': 'R6-Kadai-Paneer.txt',
  'chunk_text': 'ingredients. 5 ripe tomatoes diced. 2 medium onions diced. Ginger garlic paste. 2 green chillies chopped. '},
 {'_id': 'R6-Kadai-Paneer.txt',
  'chunk_text': 'Oil. Butter. Paneer cubed. Capsicum cubed. 1 tomato cubed. '},
 {'_id': 'R6-Kadai-Paneer.txt',
  'chunk_text': '1 large carrot cut lengthwise. 1 small onion cubed. Cream 2 tbsp. Masalas. Crushed coriander seeds 1tbsp. '},
 {'_id': 'R6-Kadai-Paneer.txt',
  'chunk_text': 'method. And oil and some butter into a pan, heat it. . Add diced onion, ginger garlic paste, green chilli and diced tomatoes along with some salt and small piece of jaggery.  Add coriander powder, jeera powder, kashmiri chilli powder, turmeric.  Cook till oil separates,  onions to be browned. '},
 {'_id': 'R6-Kadai-Paneer.txt',
  'chunk_text': ' Set it aside. In another pan add 1 to 2 teaspoons of oil and onions.   Fry for a while.   Follow it up with carrots, capsicums, tomato and finally paneer. . Add crushed cori

In [27]:
denseIndex = pc.Index(host='dense-recipes-j2tpktj.svc.aped-4627-b74a.pinecone.io')

for file in os.listdir('./recipes') :
    if file != 'R6-Kadai-Paneer.txt' :
        chunks = createChunks(file)
        denseIndex.upsert_records(namespace='default', records=chunks)

#### Search index

In [None]:
results = denseIndex.search(
    namespace="default", 
    query={
        "inputs": {"text": "quick and easy dessert"}, 
        "top_k": 5
    },
    fields=["chunk_text"]
)

print(results)

{'result': {'hits': [{'_id': 'R19-Milk-Barfi.txt',
                      '_score': 0.3538697063922882,
                      'fields': {'chunk_text': 'Spread crushed nuts and '
                                               'slightly press. . Cool a bit '
                                               'and put in fridge for an hour. '
                                               'Remove and cut into pieces. '
                                               '. '}},
                     {'_id': 'R15-Wheat-Apple-Cake.txt',
                      '_score': 0.29383018612861633,
                      'fields': {'chunk_text': 'Cut the other Apple into half '
                                               'and then make thin slices, dip '
                                               'these slices into the cake '
                                               'mixture in  circles. . Preheat '
                                               'oven to 180 deg C and bake for '
                    