In [None]:
import chromadb
from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction

In [None]:
embedding_function = SentenceTransformerEmbeddingFunction()
client = chromadb.PersistentClient(path='../chroma')
collection = client.get_collection("prompt_engineering_knowledge", embedding_function=embedding_function)

In [None]:
query = 'How to make a few-show prompting?'

In [5]:
from langchain_ollama.chat_models import ChatOllama

llm = ChatOllama(model='deepseek-r1', temperature=0.7)

In [12]:
from langchain_core.messages import SystemMessage, HumanMessage

def expand(query) -> list[str]:
    system = SystemMessage(content='''
                           You are a vectorDB specialist, and your task is to create 5 queries from the original user query.
                           Provide queries that are related to the user query topic.
                           Answer with only the new queries separated by Two breaklines (\\n\\n) without additional text.
                           '''.strip())
    user = HumanMessage(content=f"Query: {query}")
    result = llm.invoke([
        system,
        user
    ])

    final_result = result.content.split('</think>')[-1]

    if not final_result or final_result == '':
        return []

    return [q.strip() for q in final_result.split('\n')]


In [13]:
expanded_queries = expand(query)

for q in expanded_queries:
    print(q)


How to implement few-shot learning with examples?
What are some effective techniques for few-show prompting (assuming typo of few-shot)?
Can you provide an example prompt structure for few-shot prompting in language models like GPT-3?
How does few-shot prompting differ from zero-shot prompting in AI applications?
Best practices for using few-show prompting to improve model performance on specific tasks.


In [17]:
results = collection.query(query_texts=[query] + expanded_queries, n_results=10)

retrieved_documents = set()
for documents in results['documents']:
    for document in documents:
        retrieved_documents.add(document)
retrieved_documents = list(retrieved_documents)

print(retrieved_documents[0])

) ( feb 2023 ) - [ dr chatgpt, tell me what i want to hear : how prompt knowledge impacts health answer correctness ] ( https : / / arxiv. org / abs / 2302. 13793 ) ( feb 2023 ) - [ an independent evaluation of chatgpt on mathematical word problems ( mwp ) ] ( https : / / arxiv. org / abs / 2302. 13814 ) ( feb 2023 )


## CrossEncoder & Re-ranking

In [None]:
from sentence_transformers import CrossEncoder
import numpy as np
cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')

In [22]:
pairs = [[query, doc] for doc in retrieved_documents]
scores = cross_encoder.predict(pairs)

top_five = [retrieved_documents[i] for i in np.argsort(scores)[::-1]]

for d in top_five:
    print(d, end='\n\n')

* * * few - shot prompting * * : add few - shot demonstrations / exemplars if you need to meet a desired output that the model is struggling with. make sure to align these with your high - level instructions to avoid confusion. few - shot prompting is particularly useful when it ’ s hard to explain the desired output and to provide examples of the behavior you want the model to avoid. * * * use descriptive and clear modifiers when instructing the models : * * you can steer models like o3 and claude 4 to produce more complex and higher - quality outputs ( e. g., for code and search results ) by using clear modifiers and more details in the instructions. [ obtained from the claude 4 documentation ] ( https : / / docs. anthropic. com / en / docs / build - with - claude / prompt - engineering / claude - 4 - best - practices # enhance - visual - and - frontend - code - generation ), an example for generating front - end code would be “ add thoughtful details like hover states, transitions, 