In [1]:
from qdrant_client import models, QdrantClient
from sentence_transformers import SentenceTransformer
import pandas as pd
import os
from smolagents import OpenAIServerModel
from smolagents import CodeAgent, WebSearchTool, LiteLLMRouterModel
import requests

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Test df
df = pd.read_csv("100recipes.csv")

## Load Some Embedding model:

In [3]:
model_emb = SentenceTransformer("avsolatorio/GIST-large-Embedding-v0")

In [4]:
# Combine relevant text fields into one string per recipe
def make_full_text(row):
    ingredients = " ".join(eval(row["ingredients"])) if isinstance(row["ingredients"], str) else ""
    directions = " ".join(eval(row["directions"])) if isinstance(row["directions"], str) else ""
    return f"{row['title']} {ingredients} {directions}"

df["full_text"] = df.apply(make_full_text, axis=1)

## Embed the first 100 test recipies

In [5]:
texts = model_emb.encode(df.full_text, show_progress_bar= True)

Batches: 100%|██████████| 4/4 [00:13<00:00,  3.38s/it]


## Query Extpansion

In [6]:
question = input()

In [None]:
url = "http://localhost:1234/v1/chat/completions"
headers = {"Content-Type": "application/json"}

data = {
    "model": "qwen3-0.6b",
    "messages": [
        {"role": "system", "content": """"You are an intelligent query expansion assistant. Your task is not to answer the user's question, but to extract and generate a list of relevant keywords that expand or enrich the user's query. These keywords should be closely related in meaning, context, or domain, and useful for improving information retrieval or search accuracy. 

Only return a comma-separated list of keywords or key phrases. Do not provide explanations, full sentences, or answers.

For example:
User: 'I want to eat something Italian.'
You: Italian, food, pasta, pizza, risotto, espresso, trattoria

User: 'How to train a neural network?'
You: neural network, deep learning, training, machine learning, model optimization, backpropagation, AI, algorithm"
  """},
        {"role": "user", "content": f"{question}"}
    ],
    "temperature": 0.1,
    "max_tokens": 256,
    "stream": False
}

response = requests.post(url, headers=headers, json=data)
print(response.json()["choices"][0]["message"]["content"])


<think>
Okay, let's see. The user wants American food and has beef at home. So the key here is to expand the question with relevant keywords. The original input was "I want some american food. I have beef at home." 

First, "American food" would be important. Then, since they mentioned beef, that's a good keyword. Maybe "beef" or "meat". Also, "some" could fit here as quantity. So combining those, the keywords should be American food, beef, and some. That makes sense.
</think>

Keywords: American food, beef, some


In [8]:
raw_query = response.json()["choices"][0]["message"]["content"]
_, q_ext = raw_query.split('</think>\n\n')

In [9]:
q_ext + question

'Keywords: American food, beef, someI want some american food. I have beef at home'

In [10]:
question_vec = model_emb.encode(question + q_ext)


In [11]:
import torch

In [12]:
similarities = model_emb.similarity(texts, question_vec)
similarities[0:10]

tensor([[0.3518],
        [0.4487],
        [0.3892],
        [0.3906],
        [0.3283],
        [0.5703],
        [0.3042],
        [0.3539],
        [0.4897],
        [0.3201]])

## Get the 3 best Recepies

In [13]:
top_k = torch.topk(similarities.squeeze(), k=3)
top_indices = top_k.indices
print(df.iloc[top_indices])

    Unnamed: 0.1  Unnamed: 0                               title  \
5              5           5            Cheeseburger Potato Soup   
26            26          26  Corral Barbecued Beef Steak Strips   
56            56          56   Corned Beef And Cabbage Casserole   

                                          ingredients  \
5   ["6 baking potatoes", "1 lb. of extra lean gro...   
26  ["2 lb. round steak 1/2 to 3/4-inch thick, sli...   
56  ["1 small cabbage, shredded", "1 tsp. dry must...   

                                           directions  \
5   ["Wash potatoes; prick several times with a fo...   
26  ["Brown strips in cooking oil.", "Pour off dri...   
56  ["Combine all ingredients and cook uncovered o...   

                                               link    source  \
5    www.cookbooks.com/Recipe-Details.aspx?id=20115  Gathered   
26  www.cookbooks.com/Recipe-Details.aspx?id=420402  Gathered   
56  www.cookbooks.com/Recipe-Details.aspx?id=263782  Gathered   

        

for later agentic usage:

In [14]:
model = OpenAIServerModel(
    model_id="qwen3-0.6b",
    api_base="http://localhost:1234/v1",
    api_key= "not-needed",
)

In [15]:
# agent = CodeAgent(tools=[WebSearchTool()], model=model)
# agent.run("How many seconds would it take for a leopard at full speed to run through Pont des Arts?")

In [None]:
from smolagents import (
    CodeAgent,
    ToolCallingAgent,
    InferenceClientModel,
    WebSearchTool,
    LiteLLMModel,
)

model = OpenAIServerModel(
    model_id="qwen3-0.6b",
    api_base="http://localhost:1234/v1",
    api_key= "not-needed",
)

web_agent = ToolCallingAgent(
    tools=[WebSearchTool(), visit_webpage],
    model=model,
    max_steps=10,
    name="web_search_agent",
    description="Runs web searches for you.",
)

In [18]:
manager_agent = CodeAgent(
    tools=[],
    model=model,
    managed_agents=[web_agent],
    additional_authorized_imports=["time", "numpy", "pandas"],
)

In [19]:
answer = manager_agent.run("If LLM training continues to scale up at the current rhythm until 2030, what would be the electric power in GW required to power the biggest training runs by 2030? What would that correspond to, compared to some countries? Please provide a source for any numbers used.")