In [1]:
from tqdm import tqdm
import polars as pl
import sys
import os

In [2]:
sys.path.append(os.path.abspath('../core'))

In [3]:
from retrival import VectorSearcher, HybridSearcher
from generation import LLM

## Get questions

In [4]:
question_list = pl.read_csv("./dataset/synthetic-questions.csv").to_dicts()

In [5]:
question_list[:3]

[{'question_id': 1, 'question': 'How do I make a lemon herb baked salmon?'},
 {'question_id': 2, 'question': 'How do I make homemade hummus?'},
 {'question_id': 3, 'question': 'What can I make with polenta and mushrooms?'}]

## Retrival eval

### VectorSearcher

In [16]:
vector_client = VectorSearcher()

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

In [9]:
vector_answers = []

for question in tqdm(question_list):
    qid, q = question.values()
    results = vector_client.search(q)
    results = list(enumerate(results, start=1))
    for r, a in results:
        vector_answers.append({'question_id': qid, 'question': q, "rank": r,"answer": a})

100%|██████████| 450/450 [00:02<00:00, 154.86it/s]


In [10]:
pl.from_dicts(vector_answers).write_csv("./dataset/vector-answers.csv")

### HybridSearcher

In [52]:
#question_list = question_list[:5]

In [6]:
hybrid_client = HybridSearcher()
hybrid_answers = []

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

In [21]:
i = 0

In [22]:
vector_client.search(question_list[i]["question"])

['Title: Lemon Grilled Salmon\nIngredients:\n2 tsp. snipped fresh dill or 1/2 tsp. dill weed\n1/2 tsp. lemon-pepper seasoning\n1/2 tsp. salt (optional)\n1/4 tsp. garlic powder\n1 salmon fillet (1 1/2 lb.)\n1/4 c. packed brown sugar\n3 Tbsp. chicken broth\n3 Tbsp. vegetable oil\n1/2 Tbsp. soy sauce\n3 Tbsp. chopped green onions (can substitute regular)\n1 small lemon, thinly sliced\n2 onion slices, separated into rings\nDirections:\nIn a small bowl, mix together dill, lemon-pepper, salt and garlic powder.\nSprinkle on salmon.\nPlace in a large resealable plastic bag or shallow glass container.\nCombine brown sugar, broth, oil, soy sauce and green onions.\nPour over the salmon. Cover and refrigerate for 1 hour.\nTurn once.',
 'Title: Pacific Salmon With Roasted Garlic\nIngredients:\n4 (6 oz.) salmon fillets\n1 medium head garlic\n1/4 c. olive oil\n1 1/2 Tbsp. butter\n1 fresh lemon\nfresh rosemary, chopped\nDirections:\nSeparate garlic into cloves and peel. Place in small ramekin and cove

In [23]:
hybrid_client.search(question_list[i]["question"])

[]

In [53]:
qid_set = {d['question_id'] for d in hybrid_answers}

for question in tqdm(question_list):
    qid, q = question.values()
    print(qid, q)
    if not qid in qid_set:
        results = hybrid_client.search(q)
        results = list(enumerate(results, start=1))
        print(results)
        for r, a in results:
            print({'question_id': qid, 'question': q, "rank": r,"answer": a})
            hybrid_answers.append({'question_id': qid, 'question': q, "rank": r,"answer": a})

  0%|          | 0/5 [00:00<?, ?it/s]

1 How do I make a lemon herb baked salmon?


 20%|██        | 1/5 [00:01<00:07,  1.79s/it]

[]
2 How do I make homemade hummus?


 40%|████      | 2/5 [00:02<00:04,  1.41s/it]

[]
3 What can I make with polenta and mushrooms?


 60%|██████    | 3/5 [00:04<00:02,  1.41s/it]

[]
4 I want a recipe for chicken made on the stove.


 80%|████████  | 4/5 [00:05<00:01,  1.41s/it]

[]
5 What’s a good recipe for a no-knead bread?


100%|██████████| 5/5 [00:06<00:00,  1.40s/it]

[]





In [51]:
len(hybrid_answers)

0

In [None]:
pl.from_dicts(hybrid_answers).write_csv("./dataset/hybrid-answers.csv")

### Query rewrite + HybridSearcher